diff options
Diffstat (limited to 'string')
| -rw-r--r-- | string/ahoCorasick.cpp | 66 | ||||
| -rw-r--r-- | string/manacher.cpp | 39 | ||||
| -rw-r--r-- | string/string.tex | 11 |
3 files changed, 55 insertions, 61 deletions
diff --git a/string/ahoCorasick.cpp b/string/ahoCorasick.cpp index 9ffa6c9..eac312c 100644 --- a/string/ahoCorasick.cpp +++ b/string/ahoCorasick.cpp @@ -1,54 +1,52 @@ -constexpr ll ALPHABET_SIZE = 26; -constexpr char OFFSET = 'a'; +constexpr ll ALPHABET_SIZE = 26, OFFSET = 'a'; struct AhoCorasick { struct vert { - int suffix, exit, character, parent; - vector<int> nxt, patterns; - vert(int c, int p) : suffix(-1), exit(-1), - character(c), nxt(ALPHABET_SIZE, -1), parent(p) {} - }; - vector<vert> aho; + int suffix = 0, ch, cnt = 0; + array<int, ALPHABET_SIZE> nxt = {}; - AhoCorasick() {aho.push_back(vert(-1, 0));} + vert(int p, int c) : suffix(-p), ch(c) {} + }; + vector<vert> aho = {{0, -1}}; - // Call once for each pattern. - void addString(string &s, int patternIdx) { + int addString(string &s) { int v = 0; - for (char c : s) { + for (auto c : s) { int idx = c - OFFSET; - if (aho[v].nxt[idx] == -1) { + if (!aho[v].nxt[idx]) { aho[v].nxt[idx] = sz(aho); - aho.emplace_back(idx, v); + aho.emplace_back(v, idx); } v = aho[v].nxt[idx]; } - aho[v].patterns.push_back(patternIdx); + aho[v].cnt++; + return v; // trie node index of pattern (pattern state) } int getSuffix(int v) { - if (aho[v].suffix == -1) { - if (v == 0 || aho[v].parent == 0) aho[v].suffix = 0; - else aho[v].suffix = go(getSuffix(aho[v].parent), - aho[v].character); + if (aho[v].suffix < 0) { + aho[v].suffix = go(getSuffix(-aho[v].suffix), aho[v].ch); } return aho[v].suffix; } - int getExit(int v) { - if (aho[v].exit == -1) { - int suffix = getSuffix(v); - if (v == 0) aho[v].exit = 0; - else { - if (aho[suffix].patterns.empty()) { - aho[v].exit = getExit(suffix); - } else { - aho[v].exit = suffix; - }}} - return aho[v].exit; - } - - int go(int v, int idx) { // Root is v=0. - if (aho[v].nxt[idx] != -1) return aho[v].nxt[idx]; + int go(int v, int idx) { // Root is v=0, idx is char - OFFSET + if (aho[v].nxt[idx]) return aho[v].nxt[idx]; else return v == 0 ? 0 : go(getSuffix(v), idx); } + + vector<vector<int>> adj; + vector<ll> dp; + void buildGraph() { + adj.resize(sz(aho)); + dp.assign(sz(aho), 0); + for (int i = 1; i < sz(aho); i++) { + adj[getSuffix(i)].push_back(i); + }} + + void dfs(int v = 0) { // dp on tree + for (int u : adj[v]) { + //dp[u] = dp[v] + aho[u].cnt; // pattern count + dfs(u); + dp[v] += dp[u]; // no of matches + }} }; diff --git a/string/manacher.cpp b/string/manacher.cpp index 03c06e1..112bd55 100644 --- a/string/manacher.cpp +++ b/string/manacher.cpp @@ -1,27 +1,20 @@ -string a, b; //a needs to be set -vector<int> longest; +vector<int> manacher(const string& t) { + //transforms "aa" to ".a.a." to find even length palindromes + string s(sz(t) * 2 + 1, '.'); + for (int i = 0; i < sz(t); i++) s[2 * i + 1] = t[i]; -//transformes "aa" to ".a.a." to find even length palindromes -void init() { - b = string(sz(a) * 2 + 1, '.'); - longest.assign(sz(b), 0); - for (int i = 0; i < sz(a); i++) { - b[2 * i + 1] = a[i]; -}} - -void manacher() { - int center = 0, last = 0, n = sz(b); + int mid = 0, r = 0, n = sz(s); + vector<int> pal(n); for (int i = 1; i < n - 1; i++) { - int i2 = 2 * center - i; - longest[i] = (last > i) ? min(last - i, longest[i2]) : 0; - while (i + longest[i] + 1 < n && i - longest[i] - 1 >= 0 && - b[i + longest[i] + 1] == b[i - longest[i] - 1]) { - longest[i]++; + if (r > i) pal[i] = min(r - i, pal[2 * mid - i]); + while (pal[i] < min(i, n - i - 1) && + s[i + pal[i] + 1] == s[i - pal[i] - 1]) { + pal[i]++; } - if (i + longest[i] > last) { - center = i; - last = i + longest[i]; - }} - //convert lengths to string b (optional) - for (int i = 0; i < n; i++) longest[i] = 2 * longest[i] + 1; + if (i + pal[i] > r) mid = i, r = i + pal[i]; + } + + //convert lengths to constructed string s (optional) + //for (int i = 0; i < n; i++) pal[i] = 2 * pal[i] + 1; + return pal; } diff --git a/string/string.tex b/string/string.tex index aec50d5..fe8e40c 100644 --- a/string/string.tex +++ b/string/string.tex @@ -47,18 +47,21 @@ \sourcecode{string/longestCommonSubsequence.cpp} \end{algorithm} +\columnbreak \begin{algorithm}{\textsc{Aho-Corasick}-Automat} \begin{methods}[ll] - sucht patterns im Text & \runtime{\abs{Text}+\sum\abs{pattern}+\abs{matches}} + sucht patterns im Text & \runtime{\abs{Text}+\sum\abs{pattern}} \end{methods} \begin{enumerate} - \item mit \code{addString(pattern, idx);} Patterns hinzufügen. + \item mit \code{addString(pattern, idx)} Patterns hinzufügen. + \item rufe \code{buildGraph()} auf \item mit \code{state = go(state, idx)} in nächsten Zustand wechseln. - \item mit \code{state = getExit(state)} den exit Kanten folgen bis \code{state == 0} - \item dabei mit \code{aho[state].patterns} die Matches zählen + \item erhöhe dabei \code{dp[state]++} + \item rufe \code{dfs()} auf. In dp[pattern state] stehen die Anzahl der Matches \end{enumerate} \sourcecode{string/ahoCorasick.cpp} \end{algorithm} +\clearpage \begin{algorithm}{\textsc{Lyndon} und \textsc{De-Bruijn}} \begin{itemize} |
