summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--string/string.tex3
-rw-r--r--string/suffixTree.cpp79
-rw-r--r--string/trie.cpp6
-rw-r--r--tcr.pdfbin290527 -> 295781 bytes
4 files changed, 85 insertions, 3 deletions
diff --git a/string/string.tex b/string/string.tex
index 4d7a3d7..5658330 100644
--- a/string/string.tex
+++ b/string/string.tex
@@ -9,6 +9,9 @@
\subsection{Trie}
\lstinputlisting{string/trie.cpp}
+\subsection{Suffix-Baum}
+\lstinputlisting{string/suffixTree.cpp}
+
\subsection{Suffix-Array}
\lstinputlisting{string/suffixArray.cpp}
diff --git a/string/suffixTree.cpp b/string/suffixTree.cpp
new file mode 100644
index 0000000..fba8e16
--- /dev/null
+++ b/string/suffixTree.cpp
@@ -0,0 +1,79 @@
+// Baut Suffixbaum online auf. Laufzeit: O(n)
+// Einmal initSuffixTree() aufrufen und dann extend für jeden Buchstaben.
+// $-Zeichen (oder ähnliches) an den Text anhängen!
+static const int ALPHABET_SIZE = 128, INF = 0x3FFFFFFF;
+string s;
+int root, lastIdx, needsSuffix, pos, remainder, curVert, curEdge, curLen;
+struct Vert {
+ int start, end, suffix, next[ALPHABET_SIZE];
+ int len() { return min(end, pos + 1) - start; }
+};
+vector<Vert> tree;
+
+int newVert(int start, int end = INF) {
+ Vert v;
+ v.start = start;
+ v.end = end;
+ v.suffix = 0;
+ memset(v.next, 0, sizeof(v.next));
+ tree.push_back(v);
+ return ++lastIdx;
+}
+
+void addSuffixLink(int vert) {
+ if (needsSuffix) tree[needsSuffix].suffix = vert;
+ needsSuffix = vert;
+}
+
+bool fullImplicitEdge(int vert) {
+ if (curLen >= tree[vert].len()) {
+ curEdge += tree[vert].len();
+ curLen -= tree[vert].len();
+ curVert = vert;
+ return true;
+ }
+ return false;
+}
+
+void initSuffixTree() {
+ needsSuffix = remainder = curEdge = curLen = 0;
+ lastIdx = pos = -1;
+ root = curVert = newVert(-1, -1);
+}
+
+void extend() {
+ pos++;
+ needsSuffix = 0;
+ remainder++;
+ while (remainder) {
+ if (curLen == 0) curEdge = pos;
+ if (tree[curVert].next[(int)s[curEdge]] == 0) {
+ int leaf = newVert(pos);
+ tree[curVert].next[(int)s[curEdge]] = leaf;
+ tree[curVert].next[(int)s[curEdge]] = leaf;
+ addSuffixLink(curVert);
+ } else {
+ int nxt = tree[curVert].next[(int)s[curEdge]];
+ if (fullImplicitEdge(nxt)) continue;
+ if (s[tree[nxt].start + curLen] == s[pos]) {
+ curLen++;
+ addSuffixLink(curVert);
+ break;
+ }
+ int split = newVert(tree[nxt].start, tree[nxt].start + curLen);
+ tree[curVert].next[(int)s[curEdge]] = split;
+ int leaf = newVert(pos);
+ tree[split].next[(int)s[pos]] = leaf;
+ tree[nxt].start += curLen;
+ tree[split].next[(int)s[tree[nxt].start]] = nxt;
+ addSuffixLink(split);
+ }
+ remainder--;
+ if (curVert == root && curLen) {
+ curLen--;
+ curEdge = pos - remainder + 1;
+ } else {
+ curVert = tree[curVert].suffix ? tree[curVert].suffix : root;
+ }
+ }
+}
diff --git a/string/trie.cpp b/string/trie.cpp
index 33889dc..163a110 100644
--- a/string/trie.cpp
+++ b/string/trie.cpp
@@ -4,7 +4,7 @@ struct node {
node() { for(int i = 0; i < 26; i++) e[i] = NULL; }
};
-void insert(node *root, string &txt, int s) { // Laufzeit: O(|txt|)
+void insert(node *root, string &txt, int s = 0) { // Laufzeit: O(|txt|)
if(s == (int)txt.size()) root->c++;
else {
int idx = (int)(txt[s] - 'a');
@@ -12,8 +12,8 @@ void insert(node *root, string &txt, int s) { // Laufzeit: O(|txt|)
insert(root->e[idx], txt, s+1);
}}
-int contains(node *root, string &txt, int s) { // Laufzeit: O(|txt|)
- if(s == txt.size()) return root->c;
+int contains(node *root, string &txt, int s = 0) { // Laufzeit: O(|txt|)
+ if(s == (int)txt.size()) return root->c;
int idx = (int)(txt[s] - 'a');
if(root->e[idx] != NULL) return contains(root->e[idx], txt, s + 1);
else return 0;
diff --git a/tcr.pdf b/tcr.pdf
index bf0ab23..544f37a 100644
--- a/tcr.pdf
+++ b/tcr.pdf
Binary files differ