moved more stuff

author: MZuenni <michi.zuendorf@gmail.com> 2023-02-14 16:41:24 +0100
committer: MZuenni <michi.zuendorf@gmail.com> 2023-02-14 16:41:24 +0100
commit: eb4bc75111da45a17604fdff2f9eed0977f93dff (patch)
tree: ffd990c0cc12a73c897a6e5c0d8216ce178a78c5 /string
parent: f07738a30c46f0a277af5609a3b4c4b01674ad84 (diff)
5 files changed, 79 insertions, 95 deletions
diff --git a/string/kmp.cpp b/string/kmp.cpp
index 12ae3eb..421479e 100644
--- a/string/kmp.cpp
+++ b/string/kmp.cpp
@@ -1,18 +1,15 @@
 vector<int> kmpPreprocessing(const string& sub) {
 	vector<int> b(sz(sub) + 1);
 	b[0] = -1;
-	int i = 0, j = -1;
-	while (i < sz(sub)) {
+	for (int i = 0, j = -1; i < sz(sub);) {
 		while (j >= 0 && sub[i] != sub[j]) j = b[j];
-		i++; j++;
-		b[i] = j;
+		b[++i] = ++j;
 	}
 	return b;
 }
 vector<int> kmpSearch(const string& s, const string& sub) {
-	vector<int> pre = kmpPreprocessing(sub), result;
-	int i = 0, j = 0;
-	while (i < sz(s)) {
+	vector<int> result, pre = kmpPreprocessing(sub);
+	for (int i = 0, j = 0; i < sz(s);) {
 		while (j >= 0 && s[i] != sub[j]) j = pre[j];
 		i++; j++;
 		if (j == sz(sub)) {
diff --git a/string/longestCommonSubsequence.cpp b/string/longestCommonSubsequence.cpp
index fa1adb6..2a0b74c 100644
--- a/string/longestCommonSubsequence.cpp
+++ b/string/longestCommonSubsequence.cpp
@@ -5,7 +5,7 @@ string lcss(string& a, string& b) {
 			if(a[y] == b[x]) m[y][x] = 1 + m[y+1][x+1];
 			else m[y][x] = max(m[y+1][x], m[y][x+1]);
 	}} // Für die Länge: return m[0][0];
-	string res; int x=0; int y=0;
+	string res; int x = 0, y = 0;
 	while(x < sz(b) && y < sz(a)) {
 		if(a[y] == b[x]) res += a[y++], x++;
 		else if(m[y][x+1] > m[y+1][x+1]) x++;
diff --git a/string/string.tex b/string/string.tex
index 0daaef2..6b83e2c 100644
--- a/string/string.tex
+++ b/string/string.tex
@@ -15,15 +15,21 @@
 	\sourcecode{string/z.cpp}
 \end{algorithm}
 
-\begin{algorithm}{Trie}
-	\sourcecode{string/trie.cpp}
+\begin{algorithm}{Rolling Hash}
+	\sourcecode{string/rollingHash.cpp}
 \end{algorithm}
 
-\begin{algorithm}{Longest Common Subsequence}
-	\begin{methods}
-		\method{lcss}{findet längste gemeinsame Sequenz}{\abs{a}\*\abs{b}}
-	\end{methods}
-	\sourcecode{string/longestCommonSubsequence.cpp}
+\begin{algorithm}{Pattern Matching mit Wildcards}
+	Gegeben zwei strings $A$ und $B$,$B$ enthält $k$ \emph{wildcards} enthält. Sei:
+	\begin{align*}
+		a_i&=\cos(\alpha_i) + i\sin(\alpha_i) &\text{ mit } \alpha_i&=\frac{2\pi A[i]}{\Sigma}\\
+		b_i&=\cos(\beta_i) + i\sin(\beta_i) &\text{ mit } \beta_i&=\begin{cases*}
+			\frac{2\pi B[\abs{B}-i-1]}{\Sigma} & falls $B[\abs{B}-i-1]\in\Sigma$ \\
+			0 & sonst
+		\end{cases*}
+	\end{align*}
+	$B$ matcht $A$ an stelle $i$ wenn $(b\cdot a)[|B|-1+i]=|B|-k$.
+	Benutze FFT um $(b\cdot a)$ zu berechnen.
 \end{algorithm}
 
 \begin{algorithm}{\textsc{Manacher}'s Algorithm, Longest Palindrome}
@@ -34,8 +40,11 @@
 	\sourcecode{string/manacher.cpp}
 \end{algorithm}
 
-\begin{algorithm}{Rolling Hash}
-	\sourcecode{string/rollingHash.cpp}
+\begin{algorithm}{Longest Common Subsequence}
+	\begin{methods}
+		\method{lcss}{findet längste gemeinsame Sequenz}{\abs{a}\*\abs{b}}
+	\end{methods}
+	\sourcecode{string/longestCommonSubsequence.cpp}
 \end{algorithm}
 
 \begin{algorithm}{\textsc{Aho-Corasick}-Automat}
@@ -51,22 +60,46 @@
 	\sourcecode{string/ahoCorasick.cpp}
 \end{algorithm}
 
-\begin{algorithm}{Suffix-Baum}
+\begin{algorithm}{Lyndon und De-Bruijn}
+	\begin{itemize}
+		\item \textbf{Lyndon-Wort:} Ein Wort das lexikographisch kleiner ist als jede seiner Rotationen.
+		\item Jedes Wort kann \emph{eindeutig} in eine nicht ansteigende Folge von Lyndon-Worten zerlegt werden.
+		\item Für Lyndon-Worte $u, v$ mit $u<v$ gilt, dass $uv$ auch ein Lyndon-Wort ist.
+	\end{itemize}
 	\begin{methods}
-		\method{SuffixTree}{berechnet einen Suffixbaum}{\abs{s}}
-		\method{extend}{fügt den nächsten Buchstaben aus \code{s} ein}{1}
+		\method[, Durchschnitt $\Theta(1)$]{next}{lexikographisch nächstes Lyndon-Wort}{n}
+		\method{duval}{zerlegt $s$ in Lyndon-Worte}{n}
+		\method{minrotation}{berechnet kleinste Rotation von $s$}{n}
 	\end{methods}
-	\sourcecode{string/suffixTree.cpp}
+	\sourcecode{string/lyndon.cpp}
+	\sourcecode{string/duval.cpp}
+	\begin{itemize}
+		\item \textbf{De-Bruijn-Sequenze $\boldsymbol{B(\Sigma, n)}$:}~~~ein Wort das jedes Wort der Länge $n$ genau einmal als substring enthält (und minimal ist). Wobei $B(\Sigma, n)$ zyklisch betrachtet wird.
+		\item es gibt $\frac{(k!)^{k^{n-1}}}{k^{n}}$ verschiedene $B(\Sigma, n)$
+		\item $B(\Sigma, n)$ hat Länge $\abs{\Sigma}^n$
+	\end{itemize}
+	\begin{methods}
+		\method{deBruijn}{berechnet ein festes $B(\Sigma, n)$}{\abs{\Sigma}^n}
+	\end{methods}
+	\sourcecode{string/deBruijn.cpp}
 \end{algorithm}
 
 \begin{algorithm}{Suffix-Array}
+\begin{methods}
+	\method{SuffixArray}{berechnet ein Suffix Array}{\abs{s}\*\log^2(\abs{s})}
+	\method{lcp}{berechnet den logest common prefix}{\log(\abs{s})}
+	\method{}{von \code{s[x]} und \code{s[y]}}{}
+\end{methods}
+\textbf{ACHTUNG:} \code{s} muss mit einem sentinel enden! \code{'\$'} oder \code{'#'}
+\sourcecode{string/suffixArray.cpp}
+\end{algorithm}
+
+\begin{algorithm}{Suffix-Baum}
 	\begin{methods}
-		\method{SuffixArray}{berechnet ein Suffix Array}{\abs{s}\*\log^2(\abs{s})}
-		\method{lcp}{berechnet den logest common prefix}{\log(\abs{s})}
-		\method{}{von \code{s[x]} und \code{s[y]}}{}
+		\method{SuffixTree}{berechnet einen Suffixbaum}{\abs{s}}
+		\method{extend}{fügt den nächsten Buchstaben aus \code{s} ein}{1}
 	\end{methods}
-	\textbf{ACHTUNG:} \code{s} muss mit einem sentinel enden! \code{'\$'} oder \code{'#'}
-	\sourcecode{string/suffixArray.cpp}
+	\sourcecode{string/suffixTree.cpp}
 \end{algorithm}
 
 \begin{algorithm}{Suffix-Automaton}
@@ -77,8 +110,7 @@
 		Wenn alle Übergänge vorhanden sind, ist \textit{w} Substring von \textit{s}.
 	
 		\item \textbf{Ist \textit{w} Suffix von \textit{s}?}
-		Wie oben.
-		Überprüfe am Ende, ob aktueller Zustand ein Terminal ist.
+		Wie oben und prüfe, ob Endzustand ein Terminal ist.
 	
 		\item \textbf{Anzahl verschiedener Substrings.}
 		Jeder Pfad im Automaten entspricht einem Substring.
@@ -93,39 +125,6 @@
 	\end{itemize}
 \end{algorithm}
 
-\begin{algorithm}{Lyndon und De-Bruijn}
-	\begin{itemize}
-		\item \textbf{Lyndon-Wort:} Ein Wort das lexikographisch kleiner ist als jede seiner Rotationen.
-		\item Jedes Wort kann \emph{eindeutig} in eine nicht ansteigende Folge von Lyndon-Worten zerlegt werden.
-		\item Für Lyndon-Worte $u, v$ mit $u<v$ gilt, dass $uv$ auch ein Lyndon-Wort ist.
-	\end{itemize}
-	\begin{methods}
-		\method[, Durchschnitt $\Theta(1)$]{next}{lexikographisch nächstes Lyndon-Wort}{n}
-		\method{duval}{zerlegt $s$ in Lyndon-Worte}{n}
-		\method{minrotation}{berechnet kleinste Rotation von $s$}{n}
-	\end{methods}
-	\sourcecode{string/lyndon.cpp}
-	\sourcecode{string/duval.cpp}
-	\begin{itemize}
-		\item \textbf{De-Bruijn-Sequenze $\boldsymbol{B(\Sigma, n)}$:}~~~ein Wort das jedes Wort der Länge $n$ genau einmal als substring enthält (und minimal ist). Wobei $B(\Sigma, n)$ zyklisch betrachtet wird.
-		\item es gibt $\frac{(k!)^{k^{n-1}}}{k^{n}}$ verschiedene $B(\Sigma, n)$
-		\item $B(\Sigma, n)$ hat Länge $\abs{\Sigma}^n$
-	\end{itemize}
-	\begin{methods}
-		\method{deBruijn}{berechnet ein festes $B(\Sigma, n)$}{\abs{\Sigma}^n}
-	\end{methods}
-	\sourcecode{string/deBruijn.cpp}
-\end{algorithm}
-
-\begin{algorithm}{Pattern Matching mit Wildcards}
-	Gegeben zwei strings $A$ und $B$,$B$ enthält $k$ \emph{wildcards} enthält. Sei:
-	\begin{align*}
-		a_i&=\cos(\alpha_i) + i\sin(\alpha_i) &\text{ mit } \alpha_i&=\frac{2\pi A[i]}{\Sigma}\\
-		b_i&=\cos(\beta_i) + i\sin(\beta_i) &\text{ mit } \beta_i&=\begin{cases*}
-			\frac{2\pi B[\abs{B}-i-1]}{\Sigma} & falls $B[\abs{B}-i-1]\in\Sigma$ \\
-			0 & sonst
-		\end{cases*}
-	\end{align*}
-	$B$ matcht $A$ an stelle $i$ wenn $(b\cdot a)[|B|-1+i]=|B|-k$.
-	Benutze FFT um $(b\cdot a)$ zu berechnen.
+\begin{algorithm}{Trie}
+	\sourcecode{string/trie.cpp}
 \end{algorithm}
diff --git a/string/suffixAutomaton.cpp b/string/suffixAutomaton.cpp
index 69aa979..4d05b6e 100644
--- a/string/suffixAutomaton.cpp
+++ b/string/suffixAutomaton.cpp
@@ -11,7 +11,6 @@ struct SuffixAutomaton {
 
 	SuffixAutomaton(const string &s) : states(2*sz(s)) {
 		size = 1; last = 0;
-		states[0].length = 0;
 		states[0].link = -1;
 		for (auto c : s) extend(c - MIN_CHAR);
 	}
@@ -43,8 +42,7 @@ struct SuffixAutomaton {
 		last = current;
 	}
 
-	// Pair with start index and length of LCS.
-	// Index in parameter t.
+	// Pair with start index (in t) and length of LCS.
 	pair<int, int> longestCommonSubstring(const string &t) {
 		int v = 0, l = 0, best = 0, bestpos = 0;
 		for (int i = 0; i < sz(t); i++) {
diff --git a/string/suffixTree.cpp b/string/suffixTree.cpp
index 4faea86..caeeecf 100644
--- a/string/suffixTree.cpp
+++ b/string/suffixTree.cpp
@@ -1,48 +1,39 @@
 struct SuffixTree {
 	struct Vert {
-		int start, end, suffix;
+		int start, end, suf;
 		map<char, int> next;
 	};
 	string s;
-	int root, lastIdx, needsSuffix, pos, remainder;
-	int curVert, curEdge, curLen;
+	int needsSuffix, pos, remainder, curVert, curEdge, curLen;
 	// Each Vertex gives its children range as [start, end)
-	vector<Vert> tree;
+	vector<Vert> tree = {Vert{-1, -1, 0 {}}};
 
-	SuffixTree(string& s) : s(s) {
-		needsSuffix = remainder = curEdge = curLen = 0;
-		lastIdx = pos = -1;
-		root = curVert = newVert(-1, -1);
+	SuffixTree(const string& s) : s(s) {
+		needsSuffix = remainder = curVert = curEdge = curLen = 0;
+		pos = -1;
 		for (int i = 0; i < sz(s); i++) extend();
 	}
 
 	int newVert(int start, int end) {
-		Vert v;
-		v.start = start;
-		v.end = end;
-		v.suffix = 0;
-		tree.push_back(v);
-		return ++lastIdx;
-	}
-
-	int len(Vert& v) {
-		return min(v.end, pos + 1) - v.start;
+		tree.push_back({start, end, 0, {}});
+		return sz(tree) - 1;
 	}
 
 	void addSuffixLink(int vert) {
-		if (needsSuffix) tree[needsSuffix].suffix = vert;
+		if (needsSuffix) tree[needsSuffix].suf = vert;
 		needsSuffix = vert;
 	}
 
 	bool fullImplicitEdge(int vert) {
-		if (curLen >= len(tree[vert])) {
-			curEdge += len(tree[vert]);
-			curLen -= len(tree[vert]);
+		len = min(tree[vert].end, pos + 1) - tree[vert].start;
+		if (curLen >= len) {
+			curEdge += len;
+			curLen -= len;
 			curVert = vert;
 			return true;
-		}
-		return false;
-	}
+		} else {
+			return false;
+	}}
 
 	void extend() {
 		pos++;
@@ -72,11 +63,10 @@ struct SuffixTree {
 				addSuffixLink(split);
 			}
 			remainder--;
-			if (curVert == root && curLen) {
+			if (curVert == 0 && curLen) {
 				curLen--;
 				curEdge = pos - remainder + 1;
 			} else {
-				curVert = tree[curVert].suffix ? tree[curVert].suffix
-				                               : root;
+				curVert = tree[curVert].suf ? tree[curVert].suf : 0;
 	}}}
-};
+};
+\ No newline at end of file
author	MZuenni <michi.zuendorf@gmail.com>	2023-02-14 16:41:24 +0100
committer	MZuenni <michi.zuendorf@gmail.com>	2023-02-14 16:41:24 +0100
commit	eb4bc75111da45a17604fdff2f9eed0977f93dff (patch)
tree	ffd990c0cc12a73c897a6e5c0d8216ce178a78c5 /string
parent	f07738a30c46f0a277af5609a3b4c4b01674ad84 (diff)