diff options
| author | Paul Jungeblut <s_jungeb@i08pc57.atis-stud.uni-karlsruhe.de> | 2014-11-22 13:03:04 +0100 |
|---|---|---|
| committer | Paul Jungeblut <s_jungeb@i08pc57.atis-stud.uni-karlsruhe.de> | 2014-11-22 13:03:04 +0100 |
| commit | f1b3e645381d9b8ea8197fb1473f115de2ee8f96 (patch) | |
| tree | 3bd28497e0dcbb025a9a740fbf441932ea927fd2 /string | |
| parent | 9fe234e7181b1cad9652655e674e7f9f821814b7 (diff) | |
adding string chapter
Diffstat (limited to 'string')
| -rw-r--r-- | string/LCSubstring.cpp | 26 | ||||
| -rw-r--r-- | string/string.tex | 13 | ||||
| -rw-r--r-- | string/suffixArray.cpp | 36 | ||||
| -rw-r--r-- | string/trie.cpp | 25 |
4 files changed, 100 insertions, 0 deletions
diff --git a/string/LCSubstring.cpp b/string/LCSubstring.cpp new file mode 100644 index 0000000..69d636f --- /dev/null +++ b/string/LCSubstring.cpp @@ -0,0 +1,26 @@ +//longest common substring. +struct lcse { + int i = 0, s = 0; +}; +string lcp(string s[2]) { + if(s[0].length() == 0 || s[1].length() == 0) return ""; + vector<lcse> a(s[0].length()+s[1].length()); + for(int k = 0; k < a.size(); k++) a[k].i=(k < s[0].length() ? k : k - s[0].length()), a[k].s = (k < s[0].length() ? 0 : 1); + sort(a.begin(), a.end(), [&] (const lcse &u, const lcse &l) { + int ui = u.i, li = l.i; + while(ui < s[u.s].length() && li < s[l.s].length()) { + if(s[u.s][ui] < s[l.s][li]) return true; + else if(s[u.s][ui] > s[l.s][li]) return false; + ui++; li++; + } + return !(ui < s[u.s].length()); + }); + int r = 0, m=0, c=0; + for(int i = 0; i < a.size() - 1; i++) { + if(a[i].s == a[i+1].s) continue; + c = 0; + while(a[i].i+c < s[a[i].s].length() && a[i+1].i+c < s[a[i+1].s].length() && s[a[i].s][a[i].i+c] == s[a[i+1].s][a[i+1].i+c]) c++; + if(c > m) r=i, m=c; + } + return m == 0 ? "" : s[a[r].s].substr(a[r].i, m); +} diff --git a/string/string.tex b/string/string.tex new file mode 100644 index 0000000..1335c4a --- /dev/null +++ b/string/string.tex @@ -0,0 +1,13 @@ +\section{Strings} + +\subsection{\textsc{Knuth-Morris-Pratt}-Algorithmus} +\lstinputlisting{string/kmp.cpp} + +\subsection{Trie} +\lstinputlisting{string/trie.cpp} + +\subsection{Suffix-Array} +\lstinputlisting{string/suffixArray.cpp} + +\subsection{Longest Common Substring} +\lstinputlisting{string/LCSubstring.cpp} diff --git a/string/suffixArray.cpp b/string/suffixArray.cpp new file mode 100644 index 0000000..2b2a87b --- /dev/null +++ b/string/suffixArray.cpp @@ -0,0 +1,36 @@ +//longest common substring in one string (overlapping not excluded) +//contains suffix array:-------------------------------------------------------------------- +int cmp(string &s,vector<vector<int>> &v, vector<int> &a, int i, int vi, int u, int l) { + int vi2 = (vi + 1) % 2, u2 = u + i / 2, l2 = l + i / 2; + if(i == 1) return s[u] - s[l]; + else if (v[vi2][u] != v[vi2][l]) return (v[vi2][u] - v[vi2][l]); + else { //beide groesser tifft nicht mehr ein, da ansonsten vorher schon unterschied in Laenge + if(u2 >= s.length()) return -1; + else if(l2 >= s.length()) return 1; + else return v[vi2][u2] - v[vi2][l2]; + } +} + +string lcsub(string s) { + if(s.length() == 0) return ""; + vector<int> a(s.length()); + vector<vector<int>> v(2, vector<int>(s.length(), 0)); + int vi = 0; + for(int k = 0; k < a.size(); k++) a[k] = k; + for(int i = 1; i < s.length(); i *= 2, vi = (vi + 1) % 2) { + sort(a.begin(), a.end(), [&] (const int &u, const int &l) { + return cmp(s, v, a, i, vi, u, l) < 0; + }); + v[vi][a[0]] = 0; + for(int z = 1; z < a.size(); z++) v[vi][a[z]] = v[vi][a[z-1]] + (cmp(s, v, a, i, vi, a[z], a[z-1]) == 0 ? 0 : 1); + } +//------------------------------------------------------------------------------------------- + int r = 0, m=0, c=0; + for(int i = 0; i < a.size() - 1; i++) { + c = 0; + while(a[i]+c < s.length() && a[i+1]+c < s.length() && s[a[i]+c] == s[a[i+1]+c]) c++; + if(c > m) r=i, m=c; + } + return m == 0 ? "" : s.substr(a[r], m); +} + diff --git a/string/trie.cpp b/string/trie.cpp new file mode 100644 index 0000000..f4b979c --- /dev/null +++ b/string/trie.cpp @@ -0,0 +1,25 @@ +//nur fuer kleinbuchstaben! +struct node { + node *(e)[26]; + int c = 0;//anzahl der woerter die an dem node enden. + node() { for(int i = 0; i < 26; i++) e[i] = NULL; } +}; + +void insert(node *root, string *txt, int s) { + if(s >= txt->length()) root->c++; + else { + int idx = (int)((*txt).at(s) - 'a'); + if(root->e[idx] == NULL) { + root->e[idx] = new node(); + } + insert(root->e[idx], txt, s+1); + } +} + +int contains(node *root, string *txt, int s) { + if(s >= txt->length()) return root->c; + int idx = (int)((*txt).at(s) - 'a'); + if(root->e[idx] != NULL) { + return contains(root->e[idx], txt, s+1); + } else return 0; +} |
