summaryrefslogtreecommitdiff
path: root/string
diff options
context:
space:
mode:
Diffstat (limited to 'string')
-rw-r--r--string/LCSubSequence.cpp17
-rw-r--r--string/LCSubstring.cpp26
-rw-r--r--string/kmp.cpp35
-rw-r--r--string/string.tex16
-rw-r--r--string/suffixArray.cpp36
-rw-r--r--string/trie.cpp25
6 files changed, 155 insertions, 0 deletions
diff --git a/string/LCSubSequence.cpp b/string/LCSubSequence.cpp
new file mode 100644
index 0000000..0ea2913
--- /dev/null
+++ b/string/LCSubSequence.cpp
@@ -0,0 +1,17 @@
+string lcss(string &a, string &b) {
+ int m[a.length() + 1][b.length() + 1], x=0, y=0;
+ memset(m, 0, sizeof(m));
+ for(int y = a.length() - 1; y >= 0; y--) {
+ for(int x = b.length() - 1; x >= 0; x--) {
+ if(a[y] == b[x]) m[y][x] = 1 + m[y+1][x+1];
+ else m[y][x] = max(m[y+1][x], m[y][x+1]);
+ }
+ } //for length only: return m[0][0];
+ string res;
+ while(x < b.length() && y < a.length()) {
+ if(a[y] == b[x]) res += a[y++], x++;
+ else if(m[y][x+1] > m[y+1][x+1]) x++;
+ else y++;
+ }
+ return res;
+}
diff --git a/string/LCSubstring.cpp b/string/LCSubstring.cpp
new file mode 100644
index 0000000..69d636f
--- /dev/null
+++ b/string/LCSubstring.cpp
@@ -0,0 +1,26 @@
+//longest common substring.
+struct lcse {
+ int i = 0, s = 0;
+};
+string lcp(string s[2]) {
+ if(s[0].length() == 0 || s[1].length() == 0) return "";
+ vector<lcse> a(s[0].length()+s[1].length());
+ for(int k = 0; k < a.size(); k++) a[k].i=(k < s[0].length() ? k : k - s[0].length()), a[k].s = (k < s[0].length() ? 0 : 1);
+ sort(a.begin(), a.end(), [&] (const lcse &u, const lcse &l) {
+ int ui = u.i, li = l.i;
+ while(ui < s[u.s].length() && li < s[l.s].length()) {
+ if(s[u.s][ui] < s[l.s][li]) return true;
+ else if(s[u.s][ui] > s[l.s][li]) return false;
+ ui++; li++;
+ }
+ return !(ui < s[u.s].length());
+ });
+ int r = 0, m=0, c=0;
+ for(int i = 0; i < a.size() - 1; i++) {
+ if(a[i].s == a[i+1].s) continue;
+ c = 0;
+ while(a[i].i+c < s[a[i].s].length() && a[i+1].i+c < s[a[i+1].s].length() && s[a[i].s][a[i].i+c] == s[a[i+1].s][a[i+1].i+c]) c++;
+ if(c > m) r=i, m=c;
+ }
+ return m == 0 ? "" : s[a[r].s].substr(a[r].i, m);
+}
diff --git a/string/kmp.cpp b/string/kmp.cpp
new file mode 100644
index 0000000..f7c3630
--- /dev/null
+++ b/string/kmp.cpp
@@ -0,0 +1,35 @@
+#include <iostream>
+#include <vector>
+
+using namespace std;
+
+//Preprocessing Substring sub for KMP-Search
+vector<int> kmp_preprocessing(string& sub) {
+ vector<int> b(sub.size() + 1);
+ b[0] = -1;
+ int i = 0, j = -1;
+ while(i < sub.size()) {
+ while(j >= 0 && sub[i] != sub[j])
+ j = b[j];
+ i++; j++;
+ b[i] = j;
+ }
+ return b;
+}
+
+//Searching after Substring sub in s
+vector<int> kmp_search(string& s, string& sub) {
+ vector<int> pre = kmp_preprocessing(sub);
+ vector<int> result;
+ int i = 0, j = -1;
+ while(i < s.size()) {
+ while(j >= 0 && s[i] != sub[j])
+ j = pre[j];
+ i++; j++;
+ if(j == sub.size()) {
+ result.push_back(i-j);
+ j = pre[j];
+ }
+ }
+ return result;
+}
diff --git a/string/string.tex b/string/string.tex
new file mode 100644
index 0000000..61385fc
--- /dev/null
+++ b/string/string.tex
@@ -0,0 +1,16 @@
+\section{Strings}
+
+\subsection{\textsc{Knuth-Morris-Pratt}-Algorithmus}
+\lstinputlisting{string/kmp.cpp}
+
+\subsection{Trie}
+\lstinputlisting{string/trie.cpp}
+
+\subsection{Suffix-Array}
+\lstinputlisting{string/suffixArray.cpp}
+
+\subsection{Longest Common Substring}
+\lstinputlisting{string/LCSubstring.cpp}
+
+\subsection{Longest Common Subsequence}
+\lstinputlisting{string/LCSubSequence.cpp}
diff --git a/string/suffixArray.cpp b/string/suffixArray.cpp
new file mode 100644
index 0000000..73c7aff
--- /dev/null
+++ b/string/suffixArray.cpp
@@ -0,0 +1,36 @@
+//longest common substring in one string (overlapping not excluded)
+//contains suffix array:--------------------------------------------------------------------
+int cmp(string &s,vector<vector<int>> &v, int i, int vi, int u, int l) {
+ int vi2 = (vi + 1) % 2, u2 = u + i / 2, l2 = l + i / 2;
+ if(i == 1) return s[u] - s[l];
+ else if (v[vi2][u] != v[vi2][l]) return (v[vi2][u] - v[vi2][l]);
+ else { //beide groesser tifft nicht mehr ein, da ansonsten vorher schon unterschied in Laenge
+ if(u2 >= s.length()) return -1;
+ else if(l2 >= s.length()) return 1;
+ else return v[vi2][u2] - v[vi2][l2];
+ }
+}
+
+string lcsub(string s) {
+ if(s.length() == 0) return "";
+ vector<int> a(s.length());
+ vector<vector<int>> v(2, vector<int>(s.length(), 0));
+ int vi = 0;
+ for(int k = 0; k < a.size(); k++) a[k] = k;
+ for(int i = 1; i <= s.length(); i *= 2, vi = (vi + 1) % 2) {
+ sort(a.begin(), a.end(), [&] (const int &u, const int &l) {
+ return cmp(s, v, i, vi, u, l) < 0;
+ });
+ v[vi][a[0]] = 0;
+ for(int z = 1; z < a.size(); z++) v[vi][a[z]] = v[vi][a[z-1]] + (cmp(s, v, i, vi, a[z], a[z-1]) == 0 ? 0 : 1);
+ }
+//-------------------------------------------------------------------------------------------
+ int r = 0, m=0, c=0;
+ for(int i = 0; i < a.size() - 1; i++) {
+ c = 0;
+ while(a[i]+c < s.length() && a[i+1]+c < s.length() && s[a[i]+c] == s[a[i+1]+c]) c++;
+ if(c > m) r=i, m=c;
+ }
+ return m == 0 ? "" : s.substr(a[r], m);
+}
+
diff --git a/string/trie.cpp b/string/trie.cpp
new file mode 100644
index 0000000..f4b979c
--- /dev/null
+++ b/string/trie.cpp
@@ -0,0 +1,25 @@
+//nur fuer kleinbuchstaben!
+struct node {
+ node *(e)[26];
+ int c = 0;//anzahl der woerter die an dem node enden.
+ node() { for(int i = 0; i < 26; i++) e[i] = NULL; }
+};
+
+void insert(node *root, string *txt, int s) {
+ if(s >= txt->length()) root->c++;
+ else {
+ int idx = (int)((*txt).at(s) - 'a');
+ if(root->e[idx] == NULL) {
+ root->e[idx] = new node();
+ }
+ insert(root->e[idx], txt, s+1);
+ }
+}
+
+int contains(node *root, string *txt, int s) {
+ if(s >= txt->length()) return root->c;
+ int idx = (int)((*txt).at(s) - 'a');
+ if(root->e[idx] != NULL) {
+ return contains(root->e[idx], txt, s+1);
+ } else return 0;
+}