summaryrefslogtreecommitdiff
path: root/string/suffixArray.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'string/suffixArray.cpp')
-rw-r--r--string/suffixArray.cpp61
1 files changed, 28 insertions, 33 deletions
diff --git a/string/suffixArray.cpp b/string/suffixArray.cpp
index 73c7aff..17a10cf 100644
--- a/string/suffixArray.cpp
+++ b/string/suffixArray.cpp
@@ -1,36 +1,31 @@
-//longest common substring in one string (overlapping not excluded)
-//contains suffix array:--------------------------------------------------------------------
-int cmp(string &s,vector<vector<int>> &v, int i, int vi, int u, int l) {
- int vi2 = (vi + 1) % 2, u2 = u + i / 2, l2 = l + i / 2;
- if(i == 1) return s[u] - s[l];
- else if (v[vi2][u] != v[vi2][l]) return (v[vi2][u] - v[vi2][l]);
- else { //beide groesser tifft nicht mehr ein, da ansonsten vorher schon unterschied in Laenge
- if(u2 >= s.length()) return -1;
- else if(l2 >= s.length()) return 1;
- else return v[vi2][u2] - v[vi2][l2];
- }
-}
+struct SuffixArray { // MAX_LG = ceil(log2(MAX_N))
+ static const int MAX_N = 100010, MAX_LG = 17;
+ pair<pair<int, int>, int> L[MAX_N];
+ int P[MAX_LG + 1][MAX_N], n, step, count;
+ int suffixArray[MAX_N], lcpArray[MAX_N];
-string lcsub(string s) {
- if(s.length() == 0) return "";
- vector<int> a(s.length());
- vector<vector<int>> v(2, vector<int>(s.length(), 0));
- int vi = 0;
- for(int k = 0; k < a.size(); k++) a[k] = k;
- for(int i = 1; i <= s.length(); i *= 2, vi = (vi + 1) % 2) {
- sort(a.begin(), a.end(), [&] (const int &u, const int &l) {
- return cmp(s, v, i, vi, u, l) < 0;
- });
- v[vi][a[0]] = 0;
- for(int z = 1; z < a.size(); z++) v[vi][a[z]] = v[vi][a[z-1]] + (cmp(s, v, i, vi, a[z], a[z-1]) == 0 ? 0 : 1);
- }
-//-------------------------------------------------------------------------------------------
- int r = 0, m=0, c=0;
- for(int i = 0; i < a.size() - 1; i++) {
- c = 0;
- while(a[i]+c < s.length() && a[i+1]+c < s.length() && s[a[i]+c] == s[a[i+1]+c]) c++;
- if(c > m) r=i, m=c;
+ SuffixArray(const string &s) : n(s.size()) { // Laufzeit: O(n*log^2(n))
+ for (int i = 0; i < n; i++) P[0][i] = s[i];
+ suffixArray[0] = 0; // Falls n == 1.
+ for (step = 1, count = 1; count < n; step++, count <<= 1) {
+ for (int i = 0; i < n; i++) L[i] =
+ {{P[step-1][i], i+count < n ? P[step-1][i+count] : -1}, i};
+ sort(L, L + n);
+ for (int i = 0; i < n; i++) P[step][L[i].second] = i > 0 &&
+ L[i].first == L[i-1].first ? P[step][L[i-1].second] : i;
+ }
+ for (int i = 0; i < n; i++) suffixArray[i] = L[i].second;
+ for (int i = 1; i < n; i++)
+ lcpArray[i] = lcp(suffixArray[i - 1], suffixArray[i]);
}
- return m == 0 ? "" : s.substr(a[r], m);
-}
+ // x und y sind Indizes im String, nicht im Suffixarray.
+ int lcp(int x, int y) { // Laufzeit: O(log(n))
+ int k, ret = 0;
+ if (x == y) return n - x;
+ for (k = step - 1; k >= 0 && x < n && y < n; k--)
+ if (P[k][x] == P[k][y])
+ x += 1 << k, y += 1 << k, ret += 1 << k;
+ return ret;
+ }
+};