summaryrefslogtreecommitdiff
path: root/string
diff options
context:
space:
mode:
authormzuenni <michi.zuendorf@gmail.com>2022-06-27 17:19:28 +0200
committermzuenni <michi.zuendorf@gmail.com>2022-06-27 17:19:28 +0200
commit5ab8a5088b729a9953b8dff1b2a985dc8fb2098b (patch)
treeed40d6936c0e9eee40ba62751cbf99ecddbaddc2 /string
parentadabbad9c51cf7cd3874bfde8eac1fbcf84fec10 (diff)
updated tcr
Diffstat (limited to 'string')
-rw-r--r--string/ahoCorasick.cpp103
-rw-r--r--string/deBruijn.cpp7
-rw-r--r--string/duval.cpp21
-rw-r--r--string/kmp.cpp44
-rw-r--r--string/longestCommonSubsequence.cpp14
-rw-r--r--string/lyndon.cpp11
-rw-r--r--string/manacher.cpp51
-rw-r--r--string/rollingHash.cpp32
-rw-r--r--string/string.tex142
-rw-r--r--string/suffixArray.cpp52
-rw-r--r--string/suffixAutomaton.cpp49
-rw-r--r--string/suffixTree.cpp151
-rw-r--r--string/trie.cpp44
-rw-r--r--string/z.cpp15
14 files changed, 434 insertions, 302 deletions
diff --git a/string/ahoCorasick.cpp b/string/ahoCorasick.cpp
index d221fcb..530490e 100644
--- a/string/ahoCorasick.cpp
+++ b/string/ahoCorasick.cpp
@@ -1,57 +1,54 @@
-// Laufzeit: O(n + m + z), n = #Text, m = Summe #Pattern, z = #Matches
-// Findet mehrere Patterns gleichzeitig in einem String.
-// 1) Wurzel erstellen: aho.push_back(vertex());
-// 2) Mit addString(0, pattern, idx); Patterns hinzufügen.
-// 3) finishAutomaton(0) aufrufen.
-// 4) Mit state = go(state, c) in nächsten Zustand wechseln.
-// DANACH: Wenn patterns-Vektor nicht leer ist: Hier enden alle
-// enthaltenen Patterns.
-// ACHTUNG: Die Zahlenwerte der auftretenden Buchstaben müssen
-// zusammenhängend sein und bei 0 beginnen!
-struct vertex {
- int next[ALPHABET_SIZE], failure;
- int character;
- vector<int> patterns; // Indizes der Patterns, die hier enden.
- vertex() { for (int i = 0; i < ALPHABET_SIZE; i++) next[i] = -1; }
-};
-vector<vertex> aho;
+constexpr ll ALPHABET_SIZE = 26;
+constexpr char OFFSET = 26;
+struct AhoCorasick {
+ struct vert {
+ int suffix, exit, character, parent;
+ vector<int> nxt, patterns;
+ vert(int c, int p) : suffix(-1), exit(-1),
+ character(c), nxt(ALPHABET_SIZE, -1), parent(p) {}
+ };
+ vector<vert> aho;
-void addString(int v, vector<int> &pattern, int patternIdx) {
- for (int i = 0; i < (int)pattern.size(); i++) {
- if (aho[v].next[pattern[i]] == -1) {
- aho[v].next[pattern[i]] = aho.size();
- aho.push_back(vertex());
- aho.back().character = pattern[i];
- }
- v = aho[v].next[pattern[i]];
- }
- aho[v].patterns.push_back(patternIdx);
-}
+ AhoCorasick() {aho.push_back(vert(-1, 0));}
-void finishAutomaton(int v) {
- for (int i = 0; i < ALPHABET_SIZE; i++)
- if (aho[v].next[i] == -1) aho[v].next[i] = v;
+ // Call once for each pattern.
+ void addString(string &s, int patternIdx) {
+ int v = 0;
+ for (char c : s) {
+ int idx = c - OFFSET;
+ if (aho[v].nxt[idx] == -1) {
+ aho[v].nxt[idx] = sz(aho);
+ aho.emplace_back(idx, v);
+ }
+ v = aho[v].nxt[idx];
+ }
+ aho[v].patterns.push_back(patternIdx);
+ }
- queue<int> q;
- for (int i = 0; i < ALPHABET_SIZE; i++) {
- if (aho[v].next[i] != v) {
- aho[aho[v].next[i]].failure = v;
- q.push(aho[v].next[i]);
- }}
- while (!q.empty()) {
- int r = q.front(); q.pop();
- for (int i = 0; i < ALPHABET_SIZE; i++) {
- if (aho[r].next[i] != -1) {
- q.push(aho[r].next[i]);
- int f = aho[r].failure;
- while (aho[f].next[i] == -1) f = aho[f].failure;
- aho[aho[r].next[i]].failure = aho[f].next[i];
- for (int j = 0; j < (int)aho[aho[f].next[i]].patterns.size(); j++) {
- aho[aho[r].next[i]].patterns.push_back(
- aho[aho[f].next[i]].patterns[j]);
-}}}}}
+ int getSuffix(int v) {
+ if (aho[v].suffix == -1) {
+ if (v == 0 || aho[v].parent == 0) aho[v].suffix = 0;
+ else aho[v].suffix = go(getSuffix(aho[v].parent),
+ aho[v].character);
+ }
+ return aho[v].suffix;
+ }
-int go(int v, int c) {
- if (aho[v].next[c] != -1) return aho[v].next[c];
- else return go(aho[v].failure, c);
-}
+ int getExit(int v) {
+ if (aho[v].exit == -1) {
+ int suffix = getSuffix(v);
+ if (v == 0) aho[v].exit = 0;
+ else {
+ if (aho[suffix].patterns.empty()) {
+ aho[v].exit = getExit(suffix);
+ } else {
+ aho[v].exit = suffix;
+ }}}
+ return aho[v].exit;
+ }
+
+ int go(int v, int idx) { // Root is v=0.
+ if (aho[v].nxt[idx] != -1) return aho[v].nxt[idx];
+ else return v == 0 ? 0 : go(getSuffix(v), idx);
+ }
+}; \ No newline at end of file
diff --git a/string/deBruijn.cpp b/string/deBruijn.cpp
new file mode 100644
index 0000000..e829137
--- /dev/null
+++ b/string/deBruijn.cpp
@@ -0,0 +1,7 @@
+string deBruijn(int n, char mi = '0', char ma = '1') {
+ string res, c(1, mi);
+ do {
+ if (n % sz(c) == 0) res += c;
+ } while(next(c, n, mi, ma));
+ return res;
+}
diff --git a/string/duval.cpp b/string/duval.cpp
new file mode 100644
index 0000000..6d80e95
--- /dev/null
+++ b/string/duval.cpp
@@ -0,0 +1,21 @@
+vector<pair<int, int>> duval(const string& s) {
+ vector<pair<int, int>> res;
+ for (int i = 0; i < sz(s);) {
+ int j = i + 1, k = i;
+ for (; j < sz(s) && s[k] <= s[j]; j++) {
+ if (s[k] < s[j]) k = i;
+ else k++;
+ }
+ while (i <= k) {
+ res.push_back({i, i + j - k});
+ i += j - k;
+ }}
+ return res;
+}
+
+int minrotation(const string& s) {
+ auto parts = duval(s+s);
+ for (auto e : parts) {
+ if (e.first < sz(s) && e.second >= sz(s)) {
+ return e.first;
+}}}
diff --git a/string/kmp.cpp b/string/kmp.cpp
index 450b368..282019e 100644
--- a/string/kmp.cpp
+++ b/string/kmp.cpp
@@ -1,25 +1,23 @@
-// Laufzeit: O(n + m), n = #Text, m = #Pattern
-vector<int> kmpPreprocessing(string &sub) {
- vector<int> b(sub.length() + 1);
- b[0] = -1;
- int i = 0, j = -1;
- while (i < (int)sub.length()) {
- while (j >= 0 && sub[i] != sub[j]) j = b[j];
- i++; j++;
- b[i] = j;
- }
- return b;
+vector<int> kmpPreprocessing(const string& sub) {
+ vector<int> b(sub.size() + 1);
+ b[0] = -1;
+ int i = 0, j = -1;
+ while (i < (int)sub.size()) {
+ while (j >= 0 && sub[i] != sub[j]) j = b[j];
+ i++; j++;
+ b[i] = j;
+ }
+ return b;
}
-
-vector<int> kmpSearch(string &s, string &sub) {
- vector<int> pre = kmpPreprocessing(sub), result;
- int i = 0, j = 0;
- while (i < (int)s.length()) {
- while (j >= 0 && s[i] != sub[j]) j = pre[j];
- i++; j++;
- if (j == (int)sub.length()) {
- result.push_back(i - j);
- j = pre[j];
- }}
- return result;
+vector<int> kmpSearch(const string& s, const string& sub) {
+ vector<int> pre = kmpPreprocessing(sub), result;
+ int i = 0, j = 0;
+ while (i < (int)s.size()) {
+ while (j >= 0 && s[i] != sub[j]) j = pre[j];
+ i++; j++;
+ if (j == (int)sub.size()) {
+ result.push_back(i - j);
+ j = pre[j];
+ }}
+ return result;
}
diff --git a/string/longestCommonSubsequence.cpp b/string/longestCommonSubsequence.cpp
index 7bcf851..dd2368e 100644
--- a/string/longestCommonSubsequence.cpp
+++ b/string/longestCommonSubsequence.cpp
@@ -1,14 +1,12 @@
-// Laufzeit: O(|a|*|b|)
-string lcss(string &a, string &b) {
- int m[a.length() + 1][b.length() + 1], x=0, y=0;
- memset(m, 0, sizeof(m));
- for(int y = a.length() - 1; y >= 0; y--) {
- for(int x = b.length() - 1; x >= 0; x--) {
+string lcss(string& a, string& b) {
+ vector<vector<int>> m(a.size() + 1, vector<int>(b.size() + 1));
+ for(int y = a.size() - 1; y >= 0; y--) {
+ for(int x = b.size() - 1; x >= 0; x--) {
if(a[y] == b[x]) m[y][x] = 1 + m[y+1][x+1];
else m[y][x] = max(m[y+1][x], m[y][x+1]);
}} // Für die Länge: return m[0][0];
- string res;
- while(x < b.length() && y < a.length()) {
+ string res; int x=0; int y=0;
+ while(x < b.size() && y < a.size()) {
if(a[y] == b[x]) res += a[y++], x++;
else if(m[y][x+1] > m[y+1][x+1]) x++;
else y++;
diff --git a/string/lyndon.cpp b/string/lyndon.cpp
new file mode 100644
index 0000000..6a131a5
--- /dev/null
+++ b/string/lyndon.cpp
@@ -0,0 +1,11 @@
+bool next(string& s, int n, char mi = '0', char ma = '1') {
+ for (ll i = sz(s), j = sz(s); i < n; i++)
+ s.push_back(s[i % j]);
+ while(!s.empty() && s.back() == ma) s.pop_back();
+ if (s.empty()) {
+ s = mi;
+ return false;
+ } else {
+ s.back()++;
+ return true;
+}}
diff --git a/string/manacher.cpp b/string/manacher.cpp
index 8bd58fb..a1cf2da 100644
--- a/string/manacher.cpp
+++ b/string/manacher.cpp
@@ -1,30 +1,27 @@
-char input[MAX_N];
-char s[2 * MAX_N + 1];
-int longest[2 * MAX_N + 1];
+string a, b; //a needs to be set
+vector<int> longest;
-void setDots() {
- s[0] = '.';
- int j = 1;
- for (int i = 0; i < (int)strlen(input); i++) {
- s[j++] = input[i];
- s[j++] = '.';
- }
- s[j] = '\0';
-}
+//transformes "aa" to ".a.a." to find even length palindromes
+void init() {
+ b = string(sz(a) * 2 + 1, '.');
+ longest.assign(sz(b), 0);
+ for (int i = 0; i < sz(a); i++) {
+ b[2 * i + 1] = a[i];
+}}
void manacher() {
- int center = 0, last = 0, n = strlen(s);
- memset(longest, 0, sizeof(longest));
-
- for (int i = 1; i < n - 1; i++) {
- int i2 = 2 * center - i;
- longest[i] = (last > i) ? min(last - i, longest[i2]) : 0;
- while (i + longest[i] + 1 < n && i - longest[i] - 1 >= 0 &&
- s[i + longest[i] + 1] == s[i - longest[i] - 1]) longest[i]++;
- if (i + longest[i] > last) {
- center = i;
- last = i + longest[i];
- }
- }
- for (int i = 0; i < n; i++) longest[i] = 2 * longest[i] + 1;
-}
+ int center = 0, last = 0, n = sz(b);
+ for (int i = 1; i < n - 1; i++) {
+ int i2 = 2 * center - i;
+ longest[i] = (last > i) ? min(last - i, longest[i2]) : 0;
+ while (i + longest[i] + 1 < n && i - longest[i] - 1 >= 0 &&
+ b[i + longest[i] + 1] == b[i - longest[i] - 1]) {
+ longest[i]++;
+ }
+ if (i + longest[i] > last) {
+ center = i;
+ last = i + longest[i];
+ }}
+ //convert lengths to string b (optional)
+ for (int i = 0; i < n; i++) longest[i] = 2 * longest[i] + 1;
+} \ No newline at end of file
diff --git a/string/rollingHash.cpp b/string/rollingHash.cpp
index 0df4c87..2185207 100644
--- a/string/rollingHash.cpp
+++ b/string/rollingHash.cpp
@@ -1,19 +1,17 @@
-ll q = 31; // Größer als Alphabetgröße. q=31,53,311
+// q = 29, 53, 101, 257, 1009, 65537
+// or choose q random from [sigma, m)
+// m = 1500000001, 1600000009, 1700000009
struct Hasher {
- string s;
- ll mod;
- vector<ll> power, pref;
- Hasher(const string& s, ll mod) : s(s), mod(mod) {
- power.push_back(1);
- for (int i = 1; i < (int)s.size(); i++)
- power.push_back(power.back() * q % mod);
- pref.push_back(0);
- for (int i = 0; i < (int)s.size(); i++)
- pref.push_back((pref.back() * q % mod + s[i]) % mod);
- }
+ vector<ll> power = {1}, pref = {0};
+ ll m, q; char c;
+ Hasher(const string& s, ll m, ll q, char c) :
+ m(m), q(q), c(c) {
+ for (char x : s) {
+ power.push_back(power.back() * q % m);
+ pref.push_back((pref.back() * q % m + (x - c)) % m);
+ }}
- // Berechnet hash(s[l..r]). l,r inklusive.
- ll hash(int l, int r) {
- return (pref[r+1] - power[r-l+1] * pref[l] % mod + mod) % mod;
- }
-};
+ ll hash(int l, int r) { // Berechnet hash(s[l..r)).
+ return (pref[r] - power[r-l] * pref[l] % m + m) % m;
+ }
+}; \ No newline at end of file
diff --git a/string/string.tex b/string/string.tex
index e13b516..f426c61 100644
--- a/string/string.tex
+++ b/string/string.tex
@@ -1,48 +1,118 @@
\section{Strings}
-\subsection{\textsc{Knuth-Morris-Pratt}-Algorithmus}
-\lstinputlisting{string/kmp.cpp}
+\begin{algorithm}{\textsc{Knuth-Morris-Pratt}-Algorithmus}
+ \begin{methods}
+ \method{kmpSearch}{sucht \code{sub} in \code{s}}{\abs{s}+\abs{sub}}
+ \end{methods}
+ \sourcecode{string/kmp.cpp}
+\end{algorithm}
-\subsection{\textsc{Aho-Corasick}-Automat}
-\lstinputlisting{string/ahoCorasick.cpp}
+\begin{algorithm}{Z-Algorithmus}
+ \begin{methods}[ll]
+ $z_i\coloneqq$ Längstes gemeinsames Präfix von $s_0\cdots s_{n-1}$ und $s_i\cdots s_{n-1}$ & \runtime{n}
+ \end{methods}
+ Suchen: Z-Algorithmus auf \code{P\$S} ausführen, Positionen mit $z_i=\abs{P}$ zurückgeben
+ \sourcecode{string/z.cpp}
+\end{algorithm}
-\subsection{Trie}
-\lstinputlisting{string/trie.cpp}
+\begin{algorithm}{Trie}
+ \sourcecode{string/trie.cpp}
+\end{algorithm}
-\subsection{Suffix-Baum}
-\lstinputlisting{string/suffixTree.cpp}
+\begin{algorithm}{Longest Common Subsequence}
+ \begin{methods}
+ \method{lcss}{findet längste gemeinsame Sequenz}{\abs{a}\*\abs{b}}
+ \end{methods}
+ \sourcecode{string/longestCommonSubsequence.cpp}
+\end{algorithm}
-\subsection{Suffix-Array}
-\lstinputlisting{string/suffixArray.cpp}
+\begin{algorithm}{\textsc{Manacher}'s Algorithm, Longest Palindrome}
+ \begin{methods}
+ \method{init}{transformiert \code{string a}}{n}
+ \method{manacher}{berechnet Länge der Palindrome}{n}
+ \end{methods}
+ \sourcecode{string/manacher.cpp}
+\end{algorithm}
-\subsection{Suffix-Automaton}
-\lstinputlisting{string/suffixAutomaton.cpp}
-\begin{itemize}[nosep]
- \item \textbf{Ist \lstinline{w} Substring von \lstinline{s}?}
- Baue Automaten für \lstinline{s} und wende ihn auf \lstinline{w} an.
- Wenn alle Übergänge vorhanden sind, ist \lstinline{w} Substring von \lstinline{s}.
+\begin{algorithm}{Rolling Hash}
+ \sourcecode{string/rollingHash.cpp}
+\end{algorithm}
- \item \textbf{Ist \lstinline{w} Suffix von \lstinline{s}?}
- Wie oben.
- Überprüfe am Ende, ob aktueller Zustand ein Terminal ist.
+\begin{algorithm}{\textsc{Aho-Corasick}-Automat}
+ \begin{methods}[ll]
+ sucht patterns im Text & \runtime{\abs{Text}+\sum\abs{pattern}+\abs{matches}}
+ \end{methods}
+ \begin{enumerate}
+ \item mit \code{addString(pattern, idx);} Patterns hinzufügen.
+ \item mit \code{state = go(state, idx)} in nächsten Zustand wechseln.
+ \item mit \code{state = getExit(state)} den exit Kanten folgen bis \code{state == 0}
+ \item dabei mit \code{aho[state].patterns} die Matches zählen
+ \end{enumerate}
+ \sourcecode{string/ahoCorasick.cpp}
+\end{algorithm}
- \item \textbf{Anzahl verschiedener Substrings.}
- Jeder Pfad im Automaten entspricht einem Substring.
- Für einen Knoten ist die Anzahl der ausgehenden Pfade gleich der Summe über die Anzahlen der Kindknoten plus 1.
- Der letzte Summand ist der Pfad, der in diesem Knoten endet.
+\begin{algorithm}{Suffix-Baum}
+ \begin{methods}
+ \method{SuffixTree}{berechnet einen Suffixbaum}{\abs{s}}
+ \method{extend}{fügt den nächsten Buchstaben aus \code{s} ein}{1}
+ \end{methods}
+ \sourcecode{string/suffixTree.cpp}
+\end{algorithm}
- \item \textbf{Wie oft taucht \lstinline{w} in \lstinline{s} auf?}
- Sei \lstinline{p} der Zustand nach Abarbeitung von \lstinline{w}.
- Lösung ist Anzahl der Pfade, die in \lstinline{p} starten und in einem Terminal enden.
- Diese Zahl lässt sich wie oben rekursiv berechnen.
- Bei jedem Knoten darf nur dann plus 1 gerechnet werden, wenn es ein Terminal ist.
-\end{itemize}
+\begin{algorithm}{Suffix-Array}
+ \begin{methods}
+ \method{SuffixArray}{berechnet ein Suffix Array}{\abs{s}\*\log^2(\abs{s})}
+ \method{lcp}{berechnet den logest common prefix}{\log(\abs{s})}
+ \method{}{von \code{s[x]} und \code{s[y]}}{}
+ \end{methods}
+ \textbf{ACHTUNG:} \code{s} muss mit einem sentinel enden! \code{'\$'} oder \code{'#'}
+ \sourcecode{string/suffixArray.cpp}
+\end{algorithm}
-\subsection{Longest Common Subsequence}
-\lstinputlisting{string/longestCommonSubsequence.cpp}
+\begin{algorithm}{Suffix-Automaton}
+ \sourcecode{string/suffixAutomaton.cpp}
+ \begin{itemize}
+ \item \textbf{Ist \textit{w} Substring von \textit{s}?}
+ Baue Automaten für \textit{s} und wende ihn auf \textit{w} an.
+ Wenn alle Übergänge vorhanden sind, ist \textit{w} Substring von \textit{s}.
+
+ \item \textbf{Ist \textit{w} Suffix von \textit{s}?}
+ Wie oben.
+ Überprüfe am Ende, ob aktueller Zustand ein Terminal ist.
+
+ \item \textbf{Anzahl verschiedener Substrings.}
+ Jeder Pfad im Automaten entspricht einem Substring.
+ Für einen Knoten ist die Anzahl der ausgehenden Pfade gleich der Summe über die Anzahlen der Kindknoten plus 1.
+ Der letzte Summand ist der Pfad, der in diesem Knoten endet.
+
+ \item \textbf{Wie oft taucht \textit{w} in \textit{s} auf?}
+ Sei \textit{p} der Zustand nach Abarbeitung von \textit{w}.
+ Lösung ist Anzahl der Pfade, die in \textit{p} starten und in einem Terminal enden.
+ Diese Zahl lässt sich wie oben rekursiv berechnen.
+ Bei jedem Knoten darf nur dann plus 1 gerechnet werden, wenn es ein Terminal ist.
+ \end{itemize}
+\end{algorithm}
-\subsection{Rolling Hash}
-\lstinputlisting{string/rollingHash.cpp}
-
-\subsection{\textsc{Manacher}'s Algorithm, Longest Palindrome}
-\lstinputlisting{string/manacher.cpp}
+\begin{algorithm}{Lyndon und De-Bruijn}
+ \begin{itemize}
+ \item \textbf{Lyndon-Wort:} Ein Wort das lexikographisch kleiner ist als jede seiner Rotationen.
+ \item Jedes Wort kann \emph{eindeutig} in eine nicht ansteigende Folge von Lyndon-Worten zerlegt werden.
+ \item Für Lyndon-Worte $u, v$ mit $u<v$ gilt, dass $uv$ auch ein Lyndon-Wort ist.
+ \end{itemize}
+ \begin{methods}
+ \method[, Durchschnitt $\Theta(1)$]{next}{lexikographisch nächstes Lyndon-Wort}{n}
+ \method{duval}{zerlegt $s$ in Lyndon-Worte}{n}
+ \method{minrotation}{berechnet kleinste Rotation von $s$}{n}
+ \end{methods}
+ \sourcecode{string/lyndon.cpp}
+ \sourcecode{string/duval.cpp}
+ \begin{itemize}
+ \item \textbf{De-Bruijn-Sequenze $\boldsymbol{B(\Sigma, n)}$:}~ein Wort das jedes Wort der Länge $n$ genau einmal als substring enthält (und minimal ist). Wobei $B(\Sigma, n)$ zyklisch betrachtet wird.
+ \item es gibt $\frac{(k!)^{k^{n-1}}}{k^{n}}$ verschiedene $B(\Sigma, n)$
+ \item $B(\Sigma, n)$ hat Länge $\abs{\Sigma}^n$
+ \end{itemize}
+ \begin{methods}
+ \method{deBruijn}{berechnet ein festes $B(\Sigma, n)$}{\abs{\Sigma}^n}
+ \end{methods}
+ \sourcecode{string/deBruijn.cpp}
+\end{algorithm}
diff --git a/string/suffixArray.cpp b/string/suffixArray.cpp
index 17a10cf..66b0ee9 100644
--- a/string/suffixArray.cpp
+++ b/string/suffixArray.cpp
@@ -1,31 +1,37 @@
-struct SuffixArray { // MAX_LG = ceil(log2(MAX_N))
- static const int MAX_N = 100010, MAX_LG = 17;
- pair<pair<int, int>, int> L[MAX_N];
- int P[MAX_LG + 1][MAX_N], n, step, count;
- int suffixArray[MAX_N], lcpArray[MAX_N];
+struct SuffixArray {
+ vector<pair<pair<int, int>, int>> L;
+ int n, step, count;
+ vector<vector<int>> P;
+ vector<int> SA, LCP;
- SuffixArray(const string &s) : n(s.size()) { // Laufzeit: O(n*log^2(n))
+ SuffixArray(const string& s) : n(sz(s)) {
+ SA.resize(n); LCP.resize(n); L.resize(n);
+ P.assign(ceil(log2(n)) + 1, vector<int>(n));
for (int i = 0; i < n; i++) P[0][i] = s[i];
- suffixArray[0] = 0; // Falls n == 1.
- for (step = 1, count = 1; count < n; step++, count <<= 1) {
- for (int i = 0; i < n; i++) L[i] =
- {{P[step-1][i], i+count < n ? P[step-1][i+count] : -1}, i};
- sort(L, L + n);
- for (int i = 0; i < n; i++) P[step][L[i].second] = i > 0 &&
- L[i].first == L[i-1].first ? P[step][L[i-1].second] : i;
- }
- for (int i = 0; i < n; i++) suffixArray[i] = L[i].second;
- for (int i = 1; i < n; i++)
- lcpArray[i] = lcp(suffixArray[i - 1], suffixArray[i]);
+ for (step = 1, count = 1; count < n; step++, count *= 2) {
+ for (int i = 0; i < n; i++)
+ L[i] = {{P[step-1][i],
+ i+count < n ? P[step-1][i+count] : -1}, i};
+ sort(L.begin(), L.end());
+ for (int i = 0; i < n; i++) {
+ P[step][L[i].second] =
+ i > 0 && L[i].first == L[i-1].first ?
+ P[step][L[i-1].second] : i;
+ }}
+ for (int i = 0; i < n; i++) SA[i] = L[i].second;
+ for (int i = 1; i < n; i++) LCP[i] = lcp(SA[i - 1], SA[i]);
}
- // x und y sind Indizes im String, nicht im Suffixarray.
- int lcp(int x, int y) { // Laufzeit: O(log(n))
- int k, ret = 0;
+ // x and y are text-indices, not SA-indices.
+ int lcp(int x, int y) {
+ int ret = 0;
if (x == y) return n - x;
- for (k = step - 1; k >= 0 && x < n && y < n; k--)
- if (P[k][x] == P[k][y])
- x += 1 << k, y += 1 << k, ret += 1 << k;
+ for (int k = step - 1; k >= 0 && x < n && y < n; k--)
+ if (P[k][x] == P[k][y]) {
+ x += 1 << k;
+ y += 1 << k;
+ ret += 1 << k;
+ }
return ret;
}
};
diff --git a/string/suffixAutomaton.cpp b/string/suffixAutomaton.cpp
index 7f4885c..15265c8 100644
--- a/string/suffixAutomaton.cpp
+++ b/string/suffixAutomaton.cpp
@@ -1,42 +1,42 @@
-#define ALPHABET_SIZE 26
+constexpr char MIN_CHAR = 'a';
+constexpr long long ALPHABET_SIZE = 26;
struct SuffixAutomaton {
struct State {
- int length; int link; int next[ALPHABET_SIZE];
- State() { memset(next, 0, sizeof(next)); }
+ int length, link;
+ vector<int> next;
+ State() : next(ALPHABET_SIZE) {}
};
- static const int MAX_N = 100000; // Maximale Länge des Strings.
- State states[2 * MAX_N];
+ vector<State> states;
int size, last;
- SuffixAutomaton(string &s) { // Laufzeit: O(|s|)
+ SuffixAutomaton(string &s) {
+ states.resize(2 * sz(s));
size = 1; last = 0;
states[0].length = 0;
states[0].link = -1;
- for (auto c : s) extend(c);
+ for (auto c : s) extend(c - MIN_CHAR);
}
- void extend(char c) {
- c -= 'a'; // Werte von c müssen bei 0 beginnen.
+ void extend(int c) {
int current = size++;
states[current].length = states[last].length + 1;
int pos = last;
- while (pos != -1 && !states[pos].next[(int)c]) {
- states[pos].next[(int)c] = current;
+ while (pos != -1 && !states[pos].next[c]) {
+ states[pos].next[c] = current;
pos = states[pos].link;
}
if (pos == -1) states[current].link = 0;
else {
- int q = states[pos].next[(int)c];
+ int q = states[pos].next[c];
if (states[pos].length + 1 == states[q].length) {
states[current].link = q;
} else {
int clone = size++;
states[clone].length = states[pos].length + 1;
states[clone].link = states[q].link;
- memcpy(states[clone].next, states[q].next,
- sizeof(states[q].next));
- while (pos != -1 && states[pos].next[(int)c] == q) {
- states[pos].next[(int)c] = clone;
+ states[clone].next = states[q].next;
+ while (pos != -1 && states[pos].next[c] == q) {
+ states[pos].next[c] = clone;
pos = states[pos].link;
}
states[q].link = states[current].link = clone;
@@ -44,22 +44,23 @@ struct SuffixAutomaton {
last = current;
}
- // Paar mit Startposition und Länge des LCS. Index in Parameter s.
- ii longestCommonSubstring(string &s) { // Laufzeit: O(|s|)
+ // Pair with start index and length of LCS.
+ // Index in parameter t.
+ pair<int, int> longestCommonSubstring(string &t) {
int v = 0, l = 0, best = 0, bestpos = 0;
- for (int i = 0; i < (int)s.size(); i++) {
- int c = s[i] - 'a';
+ for (int i = 0; i < (int)t.size(); i++) {
+ int c = t[i] - MIN_CHAR;
while (v && !states[v].next[c]) {
v = states[v].link;
l = states[v].length;
}
- if (states[v].next[c]) { v = states[v].next[c]; l++; }
- if (l > best) { best = l; bestpos = i; }
+ if (states[v].next[c]) {v = states[v].next[c]; l++;}
+ if (l > best) {best = l; bestpos = i;}
}
- return ii(bestpos - best + 1, best);
+ return {bestpos - best + 1, best};
}
- // Berechnet die Terminale des Automaten.
+ // Returns all terminals of the automaton.
vector<int> calculateTerminals() {
vector<int> terminals;
int pos = last;
diff --git a/string/suffixTree.cpp b/string/suffixTree.cpp
index fe065b2..f96992e 100644
--- a/string/suffixTree.cpp
+++ b/string/suffixTree.cpp
@@ -1,78 +1,83 @@
-// Baut Suffixbaum online auf. Laufzeit: O(n)
-// Einmal initSuffixTree() aufrufen und dann extend für jeden Buchstaben.
-// '\0'-Zeichen (oder ähnliches) an den Text anhängen!
-string s;
-int root, lastIdx, needsSuffix, pos, remainder, curVert, curEdge, curLen;
-struct Vert {
- int start, end, suffix; // Kante [start,end)
- map<char, int> next;
- int len() { return min(end, pos + 1) - start; }
-};
-vector<Vert> tree;
+struct SuffixTree {
+ struct Vert {
+ int start, end, suffix;
+ map<char, int> next;
+ };
+ string s;
+ int root, lastIdx, needsSuffix, pos, remainder;
+ int curVert, curEdge, curLen;
+ // Each Vertex gives its children range as [start, end)
+ vector<Vert> tree;
-int newVert(int start, int end) {
- Vert v;
- v.start = start;
- v.end = end;
- v.suffix = 0;
- tree.push_back(v);
- return ++lastIdx;
-}
+ SuffixTree(string& s) : s(s) {
+ needsSuffix = remainder = curEdge = curLen = 0;
+ lastIdx = pos = -1;
+ root = curVert = newVert(-1, -1);
+ for (int i = 0; i < sz(s); i++) extend();
+ }
-void addSuffixLink(int vert) {
- if (needsSuffix) tree[needsSuffix].suffix = vert;
- needsSuffix = vert;
-}
+ int newVert(int start, int end) {
+ Vert v;
+ v.start = start;
+ v.end = end;
+ v.suffix = 0;
+ tree.push_back(v);
+ return ++lastIdx;
+ }
-bool fullImplicitEdge(int vert) {
- if (curLen >= tree[vert].len()) {
- curEdge += tree[vert].len();
- curLen -= tree[vert].len();
- curVert = vert;
- return true;
- }
- return false;
-}
+ int len(Vert& v) {
+ return min(v.end, pos + 1) - v.start;
+ }
-void initSuffixTree() {
- needsSuffix = remainder = curEdge = curLen = 0;
- lastIdx = pos = -1;
- root = curVert = newVert(-1, -1);
-}
+ void addSuffixLink(int vert) {
+ if (needsSuffix) tree[needsSuffix].suffix = vert;
+ needsSuffix = vert;
+ }
-void extend() {
- pos++;
- needsSuffix = 0;
- remainder++;
- while (remainder) {
- if (curLen == 0) curEdge = pos;
- if (!tree[curVert].next.count(s[curEdge])) {
- int leaf = newVert(pos, s.size());
- tree[curVert].next[s[curEdge]] = leaf;
- tree[curVert].next[s[curEdge]] = leaf;
- addSuffixLink(curVert);
- } else {
- int nxt = tree[curVert].next[s[curEdge]];
- if (fullImplicitEdge(nxt)) continue;
- if (s[tree[nxt].start + curLen] == s[pos]) {
- curLen++;
- addSuffixLink(curVert);
- break;
- }
- int split = newVert(tree[nxt].start, tree[nxt].start + curLen);
- tree[curVert].next[s[curEdge]] = split;
- int leaf = newVert(pos, s.size());
- tree[split].next[s[pos]] = leaf;
- tree[nxt].start += curLen;
- tree[split].next[s[tree[nxt].start]] = nxt;
- addSuffixLink(split);
- }
- remainder--;
- if (curVert == root && curLen) {
- curLen--;
- curEdge = pos - remainder + 1;
- } else {
- curVert = tree[curVert].suffix ? tree[curVert].suffix : root;
- }
- }
-}
+ bool fullImplicitEdge(int vert) {
+ if (curLen >= len(tree[vert])) {
+ curEdge += len(tree[vert]);
+ curLen -= len(tree[vert]);
+ curVert = vert;
+ return true;
+ }
+ return false;
+ }
+
+ void extend() {
+ pos++;
+ needsSuffix = 0;
+ remainder++;
+ while (remainder) {
+ if (curLen == 0) curEdge = pos;
+ if (!tree[curVert].next.count(s[curEdge])) {
+ int leaf = newVert(pos, sz(s));
+ tree[curVert].next[s[curEdge]] = leaf;
+ tree[curVert].next[s[curEdge]] = leaf;
+ addSuffixLink(curVert);
+ } else {
+ int nxt = tree[curVert].next[s[curEdge]];
+ if (fullImplicitEdge(nxt)) continue;
+ if (s[tree[nxt].start + curLen] == s[pos]) {
+ curLen++;
+ addSuffixLink(curVert);
+ break;
+ }
+ int split = newVert(tree[nxt].start,
+ tree[nxt].start + curLen);
+ tree[curVert].next[s[curEdge]] = split;
+ int leaf = newVert(pos, sz(s));
+ tree[split].next[s[pos]] = leaf;
+ tree[nxt].start += curLen;
+ tree[split].next[s[tree[nxt].start]] = nxt;
+ addSuffixLink(split);
+ }
+ remainder--;
+ if (curVert == root && curLen) {
+ curLen--;
+ curEdge = pos - remainder + 1;
+ } else {
+ curVert = tree[curVert].suffix ? tree[curVert].suffix
+ : root;
+ }}}
+}; \ No newline at end of file
diff --git a/string/trie.cpp b/string/trie.cpp
index fa9ec49..f112e1e 100644
--- a/string/trie.cpp
+++ b/string/trie.cpp
@@ -1,25 +1,33 @@
// Zahlenwerte müssen bei 0 beginnen und zusammenhängend sein.
+constexpr int ALPHABET_SIZE = 2;
struct node {
- int children[ALPHABET_SIZE], c; // c = #Wörter, die hier enden.
- node () {
- idx = -1;
- for (int i = 0; i < ALPHABET_SIZE; i++) children[i] = -1;
- }
+ int words, wordEnds; vector<int> children;
+ node() : words(0), wordEnds(0), children(ALPHABET_SIZE, -1){}
};
-vector<node> trie; // Anlegen mit trie.push_back(node());
+vector<node> trie = {node()};
-void insert(int vert, vector<int> &txt, int s) { // Laufzeit: O(|txt|)
- if (s == (int)txt.size()) { trie[vert].c++; return; }
- if (trie[vert].children[txt[s]] == -1) {
- trie[vert].children[txt[s]] = trie.size();
- trie.push_back(node());
- }
- insert(trie[vert].children[txt[s]], txt, s + 1);
+int insert(vector<int>& word) {
+ int id = 0;
+ for (int c : word) {
+ trie[id].words++;
+ if (trie[id].children[c] < 0) {
+ trie[id].children[c] = trie.size();
+ trie.emplace_back();
+ }
+ id = trie[id].children[c];
+ }
+ trie[id].words++;
+ trie[id].wordEnds++;
+ return id;
}
-int contains(int vert, vector<int> &txt, int s) { // Laufzeit: O(|txt|)
- if (s == (int)txt.size()) return trie[vert].c;
- if (trie[vert].children[txt[s]] != -1) {
- return contains(trie[vert].children[txt[s]], txt, s + 1);
- } else return 0;
+void erase(vector<int>& word) {
+ int id = 0;
+ for (int c : word) {
+ trie[id].words--;
+ id = trie[id].children[c];
+ if (id < 0) return;
+ }
+ trie[id].words--;
+ trie[id].wordEnds--;
}
diff --git a/string/z.cpp b/string/z.cpp
new file mode 100644
index 0000000..dbd5ce5
--- /dev/null
+++ b/string/z.cpp
@@ -0,0 +1,15 @@
+vector<int> Z(const vector<int> &s) {
+ int n = sz(s);
+ vector<int> z(n, n);
+ int l = 0, r = 0, p, q;
+ for (int i = 1; i < n; ++i) {
+ if (i <= r && z[i - l] < r - i + 1) {
+ z[i] = z[i - l];
+ } else {
+ if (i > r) p = 0, q = i;
+ else p = r - i + 1, q = r + 1;
+ while (q < n && s[p] == s[q]) ++p, ++q;
+ z[i] = q - i, l = i, r = q - 1;
+ }}
+ return z;
+}