diff options
Diffstat (limited to 'string')
| -rw-r--r-- | string/string.tex | 3 | ||||
| -rw-r--r-- | string/suffixTree.cpp | 79 | ||||
| -rw-r--r-- | string/trie.cpp | 6 |
3 files changed, 85 insertions, 3 deletions
diff --git a/string/string.tex b/string/string.tex index 4d7a3d7..5658330 100644 --- a/string/string.tex +++ b/string/string.tex @@ -9,6 +9,9 @@ \subsection{Trie} \lstinputlisting{string/trie.cpp} +\subsection{Suffix-Baum} +\lstinputlisting{string/suffixTree.cpp} + \subsection{Suffix-Array} \lstinputlisting{string/suffixArray.cpp} diff --git a/string/suffixTree.cpp b/string/suffixTree.cpp new file mode 100644 index 0000000..fba8e16 --- /dev/null +++ b/string/suffixTree.cpp @@ -0,0 +1,79 @@ +// Baut Suffixbaum online auf. Laufzeit: O(n) +// Einmal initSuffixTree() aufrufen und dann extend für jeden Buchstaben. +// $-Zeichen (oder ähnliches) an den Text anhängen! +static const int ALPHABET_SIZE = 128, INF = 0x3FFFFFFF; +string s; +int root, lastIdx, needsSuffix, pos, remainder, curVert, curEdge, curLen; +struct Vert { + int start, end, suffix, next[ALPHABET_SIZE]; + int len() { return min(end, pos + 1) - start; } +}; +vector<Vert> tree; + +int newVert(int start, int end = INF) { + Vert v; + v.start = start; + v.end = end; + v.suffix = 0; + memset(v.next, 0, sizeof(v.next)); + tree.push_back(v); + return ++lastIdx; +} + +void addSuffixLink(int vert) { + if (needsSuffix) tree[needsSuffix].suffix = vert; + needsSuffix = vert; +} + +bool fullImplicitEdge(int vert) { + if (curLen >= tree[vert].len()) { + curEdge += tree[vert].len(); + curLen -= tree[vert].len(); + curVert = vert; + return true; + } + return false; +} + +void initSuffixTree() { + needsSuffix = remainder = curEdge = curLen = 0; + lastIdx = pos = -1; + root = curVert = newVert(-1, -1); +} + +void extend() { + pos++; + needsSuffix = 0; + remainder++; + while (remainder) { + if (curLen == 0) curEdge = pos; + if (tree[curVert].next[(int)s[curEdge]] == 0) { + int leaf = newVert(pos); + tree[curVert].next[(int)s[curEdge]] = leaf; + tree[curVert].next[(int)s[curEdge]] = leaf; + addSuffixLink(curVert); + } else { + int nxt = tree[curVert].next[(int)s[curEdge]]; + if (fullImplicitEdge(nxt)) continue; + if (s[tree[nxt].start + curLen] == s[pos]) { + curLen++; + addSuffixLink(curVert); + break; + } + int split = newVert(tree[nxt].start, tree[nxt].start + curLen); + tree[curVert].next[(int)s[curEdge]] = split; + int leaf = newVert(pos); + tree[split].next[(int)s[pos]] = leaf; + tree[nxt].start += curLen; + tree[split].next[(int)s[tree[nxt].start]] = nxt; + addSuffixLink(split); + } + remainder--; + if (curVert == root && curLen) { + curLen--; + curEdge = pos - remainder + 1; + } else { + curVert = tree[curVert].suffix ? tree[curVert].suffix : root; + } + } +} diff --git a/string/trie.cpp b/string/trie.cpp index 33889dc..163a110 100644 --- a/string/trie.cpp +++ b/string/trie.cpp @@ -4,7 +4,7 @@ struct node { node() { for(int i = 0; i < 26; i++) e[i] = NULL; } }; -void insert(node *root, string &txt, int s) { // Laufzeit: O(|txt|) +void insert(node *root, string &txt, int s = 0) { // Laufzeit: O(|txt|) if(s == (int)txt.size()) root->c++; else { int idx = (int)(txt[s] - 'a'); @@ -12,8 +12,8 @@ void insert(node *root, string &txt, int s) { // Laufzeit: O(|txt|) insert(root->e[idx], txt, s+1); }} -int contains(node *root, string &txt, int s) { // Laufzeit: O(|txt|) - if(s == txt.size()) return root->c; +int contains(node *root, string &txt, int s = 0) { // Laufzeit: O(|txt|) + if(s == (int)txt.size()) return root->c; int idx = (int)(txt[s] - 'a'); if(root->e[idx] != NULL) return contains(root->e[idx], txt, s + 1); else return 0; |
