summaryrefslogtreecommitdiff
path: root/string
diff options
context:
space:
mode:
Diffstat (limited to 'string')
-rw-r--r--string/LCSubstring.cpp26
-rw-r--r--string/string.tex4
-rw-r--r--string/suffixArray.cpp61
-rw-r--r--string/suffixAutomaton.cpp62
4 files changed, 92 insertions, 61 deletions
diff --git a/string/LCSubstring.cpp b/string/LCSubstring.cpp
deleted file mode 100644
index 69d636f..0000000
--- a/string/LCSubstring.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-//longest common substring.
-struct lcse {
- int i = 0, s = 0;
-};
-string lcp(string s[2]) {
- if(s[0].length() == 0 || s[1].length() == 0) return "";
- vector<lcse> a(s[0].length()+s[1].length());
- for(int k = 0; k < a.size(); k++) a[k].i=(k < s[0].length() ? k : k - s[0].length()), a[k].s = (k < s[0].length() ? 0 : 1);
- sort(a.begin(), a.end(), [&] (const lcse &u, const lcse &l) {
- int ui = u.i, li = l.i;
- while(ui < s[u.s].length() && li < s[l.s].length()) {
- if(s[u.s][ui] < s[l.s][li]) return true;
- else if(s[u.s][ui] > s[l.s][li]) return false;
- ui++; li++;
- }
- return !(ui < s[u.s].length());
- });
- int r = 0, m=0, c=0;
- for(int i = 0; i < a.size() - 1; i++) {
- if(a[i].s == a[i+1].s) continue;
- c = 0;
- while(a[i].i+c < s[a[i].s].length() && a[i+1].i+c < s[a[i+1].s].length() && s[a[i].s][a[i].i+c] == s[a[i+1].s][a[i+1].i+c]) c++;
- if(c > m) r=i, m=c;
- }
- return m == 0 ? "" : s[a[r].s].substr(a[r].i, m);
-}
diff --git a/string/string.tex b/string/string.tex
index 16ad30a..854b05a 100644
--- a/string/string.tex
+++ b/string/string.tex
@@ -12,8 +12,8 @@
\subsection{Suffix-Array}
\lstinputlisting{string/suffixArray.cpp}
-\subsection{Longest Common Substring}
-\lstinputlisting{string/LCSubstring.cpp}
+\subsection{Suffix-Automaton}
+\lstinputlisting{string/suffixAutomaton.cpp}
\subsection{Longest Common Subsequence}
\lstinputlisting{string/longestCommonSubsequence.cpp}
diff --git a/string/suffixArray.cpp b/string/suffixArray.cpp
index 73c7aff..17a10cf 100644
--- a/string/suffixArray.cpp
+++ b/string/suffixArray.cpp
@@ -1,36 +1,31 @@
-//longest common substring in one string (overlapping not excluded)
-//contains suffix array:--------------------------------------------------------------------
-int cmp(string &s,vector<vector<int>> &v, int i, int vi, int u, int l) {
- int vi2 = (vi + 1) % 2, u2 = u + i / 2, l2 = l + i / 2;
- if(i == 1) return s[u] - s[l];
- else if (v[vi2][u] != v[vi2][l]) return (v[vi2][u] - v[vi2][l]);
- else { //beide groesser tifft nicht mehr ein, da ansonsten vorher schon unterschied in Laenge
- if(u2 >= s.length()) return -1;
- else if(l2 >= s.length()) return 1;
- else return v[vi2][u2] - v[vi2][l2];
- }
-}
+struct SuffixArray { // MAX_LG = ceil(log2(MAX_N))
+ static const int MAX_N = 100010, MAX_LG = 17;
+ pair<pair<int, int>, int> L[MAX_N];
+ int P[MAX_LG + 1][MAX_N], n, step, count;
+ int suffixArray[MAX_N], lcpArray[MAX_N];
-string lcsub(string s) {
- if(s.length() == 0) return "";
- vector<int> a(s.length());
- vector<vector<int>> v(2, vector<int>(s.length(), 0));
- int vi = 0;
- for(int k = 0; k < a.size(); k++) a[k] = k;
- for(int i = 1; i <= s.length(); i *= 2, vi = (vi + 1) % 2) {
- sort(a.begin(), a.end(), [&] (const int &u, const int &l) {
- return cmp(s, v, i, vi, u, l) < 0;
- });
- v[vi][a[0]] = 0;
- for(int z = 1; z < a.size(); z++) v[vi][a[z]] = v[vi][a[z-1]] + (cmp(s, v, i, vi, a[z], a[z-1]) == 0 ? 0 : 1);
- }
-//-------------------------------------------------------------------------------------------
- int r = 0, m=0, c=0;
- for(int i = 0; i < a.size() - 1; i++) {
- c = 0;
- while(a[i]+c < s.length() && a[i+1]+c < s.length() && s[a[i]+c] == s[a[i+1]+c]) c++;
- if(c > m) r=i, m=c;
+ SuffixArray(const string &s) : n(s.size()) { // Laufzeit: O(n*log^2(n))
+ for (int i = 0; i < n; i++) P[0][i] = s[i];
+ suffixArray[0] = 0; // Falls n == 1.
+ for (step = 1, count = 1; count < n; step++, count <<= 1) {
+ for (int i = 0; i < n; i++) L[i] =
+ {{P[step-1][i], i+count < n ? P[step-1][i+count] : -1}, i};
+ sort(L, L + n);
+ for (int i = 0; i < n; i++) P[step][L[i].second] = i > 0 &&
+ L[i].first == L[i-1].first ? P[step][L[i-1].second] : i;
+ }
+ for (int i = 0; i < n; i++) suffixArray[i] = L[i].second;
+ for (int i = 1; i < n; i++)
+ lcpArray[i] = lcp(suffixArray[i - 1], suffixArray[i]);
}
- return m == 0 ? "" : s.substr(a[r], m);
-}
+ // x und y sind Indizes im String, nicht im Suffixarray.
+ int lcp(int x, int y) { // Laufzeit: O(log(n))
+ int k, ret = 0;
+ if (x == y) return n - x;
+ for (k = step - 1; k >= 0 && x < n && y < n; k--)
+ if (P[k][x] == P[k][y])
+ x += 1 << k, y += 1 << k, ret += 1 << k;
+ return ret;
+ }
+};
diff --git a/string/suffixAutomaton.cpp b/string/suffixAutomaton.cpp
new file mode 100644
index 0000000..4e8abcf
--- /dev/null
+++ b/string/suffixAutomaton.cpp
@@ -0,0 +1,62 @@
+#define ALPHABET_SIZE 26
+struct SuffixAutomaton {
+ struct State {
+ int length; int link; int next[ALPHABET_SIZE];
+ State() { memset(next, 0, sizeof(next)); }
+ };
+ static const int MAX_N = 100000; // Maximale Länge des Strings.
+ State states[2 * MAX_N];
+ int size, last;
+
+ SuffixAutomaton(string &s) { // Laufzeit: O(|s|)
+ size = 1; last = 0;
+ states[0].length = 0;
+ states[0].link = -1;
+ for (auto c : s) extend(c);
+ }
+
+ void extend(char c) { // Werte von c müssen bei 0 beginnen.
+ c -= 'a';
+ int current = size++;
+ states[current].length = states[last].length + 1;
+ int pos = last;
+ while (pos != -1 && !states[pos].next[(int)c]) {
+ states[pos].next[(int)c] = current;
+ pos = states[pos].link;
+ }
+ if (pos == -1) states[current].link = 0;
+ else {
+ int q = states[pos].next[(int)c];
+ if (states[pos].length + 1 == states[q].length) {
+ states[current].link = q;
+ } else {
+ int clone = size++;
+ states[clone].length = states[pos].length + 1;
+ states[clone].link = states[q].link;
+ memcpy(states[clone].next, states[q].next,
+ sizeof(states[q].next));
+ while (pos != -1 && states[pos].next[(int)c] == q) {
+ states[pos].next[(int)c] = clone;
+ pos = states[pos].link;
+ }
+ states[q].link = states[current].link = clone;
+ }}
+ last = current;
+ }
+
+ // Paar mit Startposition und Länge des LCS. Index in Parameter s.
+ ii longestCommonSubstring(string &s) { // Laufzeit: O(|s|)
+ int v = 0, l = 0, best = 0, bestpos = 0;
+ for (int i = 0; i < (int)s.size(); i++) {
+ int c = s[i] - 'a';
+ while (v && !states[v].next[c]) {
+ v = states[v].link;
+ l = states[v].length;
+ }
+ if (states[v].next[c]) { v = states[v].next[c]; l++; }
+ if (l > best) { best = l; bestpos = i; }
+ }
+ return ii(bestpos - best + 1, best);
+ }
+};
+