字典樹trie
顧名思義,在一個字符串的集合里查詢某個字符串是否存在樹形結構。
樹存儲方式上用的是結構體數組,類似滿二叉樹的形式。
模板
定義結構體和trie
- 結構體必須的內容:當前結點的字符,孩子數組
- 可選:
end
用于查詢,repeat
用于統計。
typedef struct node {char c;int children[30] = {0};int end = 0, repeat = 0;
};
vector<node>trie;
node root;
trie.push_back(root);
建樹三部曲
- 不好理解的可能就是這個
fa
,用于表示當前的父節點是誰。 - 查詢結點是否存在于父節點的孩子中:
if (!trie[fa].children[idx])
-
沒有存在則添加結點:創建新下標
int new_idx = trie.size()
,父節點的孩子添加該下標。
int new_idx = trie.size();trie[fa].children[idx] = new_idx;node x;x.c = s[i];if (i == s.size() - 1) {x.end = 1;}trie.push_back(x);
- 父節點變為子節點:
fa = trie[fa].children[idx]
void build(string s) {int fa = 0;for (int i = 0; i < s.size(); i++) {int idx = s[i] - 'a';if (!trie[fa].children[idx]) {int new_idx = trie.size();trie[fa].children[idx] = new_idx;node x;x.c = s[i];if (i == s.size() - 1) {x.end = 1;}trie.push_back(x);fa = new_idx;}else {if (i == s.size() - 1) {trie[fa].end = 1;}fa = trie[fa].children[idx];}}
}
字符串的查詢
- 從父親結點一直遍歷到葉子結點,最后
i==s.size()-1
時特判end
是否為end==true?
即可 - 與建樹過程幾乎完全一致
void query(string s) {int fa = 0;for (int i = 0; i < s.size(); i++) {int idx = s[i] - 'a';if (!trie[fa].children[idx]) {cout << "WRONG" << endl;return;}else {fa = trie[fa].children[idx];if (i == s.size() - 1) {if (trie[fa].repeat) {cout << "REPEAT" << endl;}else {if (trie[fa].end) {trie[fa].repeat = 1;cout << "OK" << endl;}else cout << "WRONG" << endl;}}}}
}
打印字典樹
void print_trie() {for (int i = 0; i < trie.size(); i++) {cout << trie[i].c << "|";for (int j = 0; j < 30; j++) {if (trie[i].children[j])cout << trie[i].children[j] << " ";}cout << "|" << trie[i].end << "|" << trie[i].repeat<<endl;}
}
應用
- P2580 于是他錯誤的點名開始了:標準的查詢字符串操作。
0-1字典樹
-
將數字用二進制形式保存在trie中,一般是高位到低位。配合貪心思想,可以節約查詢操作。
-
P4551 最長異或路徑:這里需要用的異或的自反性質:自己跟自己異或不影響計算。當i<j, [ 1 , i ] ⊕ [ 1 , j ] = [ i , j ] [1,i] \oplus [1,j]=[i,j] [1,i]⊕[1,j]=[i,j], [ i , j ] [i,j] [i,j]表示i到j的異或路徑。現在這個問題就變成了先獲得[1,i],然后再遍歷每個j,計算 [ 1 , i ] ⊕ [ 1 , j ] [1,i] \oplus [1,j] [1,i]⊕[1,j]的最大值。時間復雜度為平方。將這些[1,i]結果保存為字典樹,然后利用貪心從高位到低位選擇最大的異或結果即可!
-
注意
bitset<int>val
這個庫,接受字符串和整型作為構造函數。一定要注意的是遍歷時:從低位到高位遍歷,val[0]
表示從右往左第一位(也就是低位)。for (int i = bitval.size() - 1; i >= 0; i--)
#include<bits/stdc++.h>
#define MAX_VALUE 1000009
#define mod 1000007
using ll = long long;
using namespace std;
typedef struct node {int x, w;node(int a, int b) :x(a), w(b) {};
};
typedef struct trie_node {int val, children[2] = { 0 };
};
vector<list<node>>graph(100009, list<node>());
int n, u, v, w, xors[100009], ans = INT_MIN;
vector<trie_node>trie;
void dfs(int st, int val) {xors[st] = val;for (auto item : graph[st]) {dfs(item.x, val ^ item.w);}
}
void build(int val) {int fa = 0;bitset<32>bitval(val);//bitset[0]是指第一位for (int i = bitval.size() - 1; i >= 0; i--) {//cout << "val:"<<val<<" i:"<<i<<" bitval[i]:" << bitval[i] << endl;if (!trie[fa].children[bitval[i]]) {int new_idx = trie.size();trie[fa].children[bitval[i]] = new_idx;trie_node x;x.val = bitval[i];trie.push_back(x);fa = new_idx;}else {fa = trie[fa].children[bitval[i]];}}
}
int query(int val) {int fa = 0;bitset<32>bitval(val);//二進制bitset<32>res;//bitset[0]是指第一位for (int i = bitval.size() - 1; i >= 0; i--) {if (trie[fa].children[!bitval[i]]) {//取反fa = trie[fa].children[!bitval[i]];res[i] = 1;}else {fa = trie[fa].children[bitval[i]];res[i] = 0;}}return (int)res.to_ulong();
}
void printout() {for (int i = 0; i < trie.size(); i++) {cout << trie[i].val << "|";for (int j = 0; j < 2; j++) {if (trie[i].children[j])cout << trie[i].children[j] << " ";}cout << "|" << endl;}
}
void solve() {cin >> n;for (int i = 1; i < n; i++) {cin >> u >> v >> w;graph[u].push_back(node(v, w));}dfs(1, 0);//for (int i = 1; i <= n; i++) {// cout << xors[i] << " ";//}//cout << endl;trie_node root;trie.push_back(root);for (int i = 1; i <= n; i++) {build(xors[i]);}//printout();for (int i = 1; i <= n; i++) {ans = max(ans, query(xors[i]));}cout << ans;}
int main() {std::ios::sync_with_stdio(false);std::cin.tie(0);std::cout.tie(0);solve();return 0;
}
- P6824 「EZEC-4」可樂:這里x是有最大值的,暴力方法是對于每一個x都進行異或看看是否滿足條件,時間復雜度為平方。利用a[i]構造0-1trie,可以在位級別,加速判斷。對于第i位,如果k[i]為1,只需要找到與x[i]相同數字的結點,就一定可以知道x與該結點下的a[i]滿足條件。如果k[i]=0,需要找到與x[i]相同數字的結點,否則一定不能與該父節點下的a[i]滿足條件,提前結束。這里也是用由高位到低位的貪心思想。
#include<bits/stdc++.h>
#define MAX_VALUE 1000009
#define mod 1000007
using ll = long long;
using namespace std;
int n, k, a[1000009],res=INT_MIN;
typedef struct node {int val, children[2] = {0}, repeat = 1;
};
vector<node>trie;
void build(int val) {bitset<32>bitval(val);int fa = 0;for (int i = bitval.size() - 1; i >= 0; i--) {if (!trie[fa].children[bitval[i]]) {int new_idx = trie.size();trie[fa].children[bitval[i]] = new_idx;node x;x.val = bitval[i];trie.push_back(x);fa = new_idx;}else {int idx = trie[fa].children[bitval[i]];fa = idx;trie[idx].repeat++;}}
}
void printout() {for (int i = 0; i < trie.size(); i++) {cout << trie[i].val << "|";for (int j = 0; j < 2; j++) {if(trie[i].children[j])cout << trie[i].children[j] << " ";}cout << "|" << trie[i].repeat << endl;}
}
int query(int val,int k) {int ans = 0;bitset<32>bitval(val);bitset<32>bitk(k);int fa = 0;for (int i = bitval.size() - 1; i >= 0; i--) {if (bitk[i]) {//bitk[i]==1if (trie[fa].children[bitval[i]]) {//xor can be 0int idx = trie[fa].children[bitval[i]];//cout << bitval[i] << " "<<trie[idx].repeat << endl;ans += trie[idx].repeat;}if (trie[fa].children[!bitval[i]]) {fa = trie[fa].children[!bitval[i]];}else {break;}}else {//bitk[i]==0if (trie[fa].children[bitval[i]]) {fa = trie[fa].children[bitval[i]];if (i == 0) {ans += 1;//最后一個結點//cout << "end:" << 1 << endl;}}else break;}}return ans;
}
void solve() {node root;trie.push_back(root);cin >> n >> k;for (int i = 1; i <= n; i++) {cin >> a[i];build(a[i]);}//printout();for (int i = 0; i <= (1<<20); i++) {res = max(res, query(i, k));}cout << res;//cout<<query(0, k);
}int main() {std::ios::sync_with_stdio(false);std::cin.tie(0);std::cout.tie(0);solve();return 0;
}