From f1b3e645381d9b8ea8197fb1473f115de2ee8f96 Mon Sep 17 00:00:00 2001 From: Paul Jungeblut Date: Sat, 22 Nov 2014 13:03:04 +0100 Subject: adding string chapter --- string/suffixArray.cpp | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 string/suffixArray.cpp (limited to 'string/suffixArray.cpp') diff --git a/string/suffixArray.cpp b/string/suffixArray.cpp new file mode 100644 index 0000000..2b2a87b --- /dev/null +++ b/string/suffixArray.cpp @@ -0,0 +1,36 @@ +//longest common substring in one string (overlapping not excluded) +//contains suffix array:-------------------------------------------------------------------- +int cmp(string &s,vector> &v, vector &a, int i, int vi, int u, int l) { + int vi2 = (vi + 1) % 2, u2 = u + i / 2, l2 = l + i / 2; + if(i == 1) return s[u] - s[l]; + else if (v[vi2][u] != v[vi2][l]) return (v[vi2][u] - v[vi2][l]); + else { //beide groesser tifft nicht mehr ein, da ansonsten vorher schon unterschied in Laenge + if(u2 >= s.length()) return -1; + else if(l2 >= s.length()) return 1; + else return v[vi2][u2] - v[vi2][l2]; + } +} + +string lcsub(string s) { + if(s.length() == 0) return ""; + vector a(s.length()); + vector> v(2, vector(s.length(), 0)); + int vi = 0; + for(int k = 0; k < a.size(); k++) a[k] = k; + for(int i = 1; i < s.length(); i *= 2, vi = (vi + 1) % 2) { + sort(a.begin(), a.end(), [&] (const int &u, const int &l) { + return cmp(s, v, a, i, vi, u, l) < 0; + }); + v[vi][a[0]] = 0; + for(int z = 1; z < a.size(); z++) v[vi][a[z]] = v[vi][a[z-1]] + (cmp(s, v, a, i, vi, a[z], a[z-1]) == 0 ? 0 : 1); + } +//------------------------------------------------------------------------------------------- + int r = 0, m=0, c=0; + for(int i = 0; i < a.size() - 1; i++) { + c = 0; + while(a[i]+c < s.length() && a[i+1]+c < s.length() && s[a[i]+c] == s[a[i+1]+c]) c++; + if(c > m) r=i, m=c; + } + return m == 0 ? "" : s.substr(a[r], m); +} + -- cgit v1.2.3 From 6faad4725bebb99535ebb1d479d35c39e45120e1 Mon Sep 17 00:00:00 2001 From: JBatzill Date: Sat, 22 Nov 2014 13:38:26 +0100 Subject: optimized sorting, new runtime: O(n*log²(n)) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- string/suffixArray.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'string/suffixArray.cpp') diff --git a/string/suffixArray.cpp b/string/suffixArray.cpp index 2b2a87b..7c03d70 100644 --- a/string/suffixArray.cpp +++ b/string/suffixArray.cpp @@ -1,6 +1,6 @@ //longest common substring in one string (overlapping not excluded) //contains suffix array:-------------------------------------------------------------------- -int cmp(string &s,vector> &v, vector &a, int i, int vi, int u, int l) { +int cmp(string &s,vector> &v, int i, int vi, int u, int l) { int vi2 = (vi + 1) % 2, u2 = u + i / 2, l2 = l + i / 2; if(i == 1) return s[u] - s[l]; else if (v[vi2][u] != v[vi2][l]) return (v[vi2][u] - v[vi2][l]); @@ -19,10 +19,10 @@ string lcsub(string s) { for(int k = 0; k < a.size(); k++) a[k] = k; for(int i = 1; i < s.length(); i *= 2, vi = (vi + 1) % 2) { sort(a.begin(), a.end(), [&] (const int &u, const int &l) { - return cmp(s, v, a, i, vi, u, l) < 0; + return cmp(s, v, i, vi, u, l) < 0; }); v[vi][a[0]] = 0; - for(int z = 1; z < a.size(); z++) v[vi][a[z]] = v[vi][a[z-1]] + (cmp(s, v, a, i, vi, a[z], a[z-1]) == 0 ? 0 : 1); + for(int z = 1; z < a.size(); z++) v[vi][a[z]] = v[vi][a[z-1]] + (cmp(s, v, i, vi, a[z], a[z-1]) == 0 ? 0 : 1); } //------------------------------------------------------------------------------------------- int r = 0, m=0, c=0; -- cgit v1.2.3 From 6671028d5a0421a95fe313a04cabd8123fe312b6 Mon Sep 17 00:00:00 2001 From: JBatzill Date: Sat, 22 Nov 2014 15:25:21 +0100 Subject: Update suffixArray.cpp --- string/suffixArray.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'string/suffixArray.cpp') diff --git a/string/suffixArray.cpp b/string/suffixArray.cpp index 7c03d70..73c7aff 100644 --- a/string/suffixArray.cpp +++ b/string/suffixArray.cpp @@ -17,7 +17,7 @@ string lcsub(string s) { vector> v(2, vector(s.length(), 0)); int vi = 0; for(int k = 0; k < a.size(); k++) a[k] = k; - for(int i = 1; i < s.length(); i *= 2, vi = (vi + 1) % 2) { + for(int i = 1; i <= s.length(); i *= 2, vi = (vi + 1) % 2) { sort(a.begin(), a.end(), [&] (const int &u, const int &l) { return cmp(s, v, i, vi, u, l) < 0; }); -- cgit v1.2.3