diff options
| author | Noobie99 <noob999noob999@gmail.com> | 2023-03-25 13:53:47 +0100 |
|---|---|---|
| committer | Noobie99 <noob999noob999@gmail.com> | 2023-03-25 13:53:47 +0100 |
| commit | 3b91d2662310aee532cc84e1447824459671767e (patch) | |
| tree | 9eb7ce41088b9d6603d16319ad54c81e93261e84 | |
| parent | 61710d33fddc342904c69b955cc3b5a363473d1e (diff) | |
improved suffix automaton
| -rw-r--r-- | string/suffixAutomaton.cpp | 85 | ||||
| -rw-r--r-- | tcr.pdf | bin | 649611 -> 665743 bytes |
2 files changed, 37 insertions, 48 deletions
diff --git a/string/suffixAutomaton.cpp b/string/suffixAutomaton.cpp index c6205da..8b79822 100644 --- a/string/suffixAutomaton.cpp +++ b/string/suffixAutomaton.cpp @@ -1,70 +1,59 @@ -constexpr char MIN_CHAR = 'a'; -constexpr ll ALPHABET_SIZE = 26; +constexpr int ALPHABET_SIZE = 26; +constexpr char OFFSET = 'a'; struct SuffixAutomaton { struct State { - int length, link; - vector<int> next; - State() : next(ALPHABET_SIZE) {} + int len, link = -1; + int nxt[ALPHABET_SIZE] = {}; // map if large Alphabet + State(int l) : len(l) {} }; - vector<State> states; - int size, last; - SuffixAutomaton(const string &s) : states(2*sz(s)) { - size = 1; last = 0; - states[0].link = -1; - for (auto c : s) extend(c - MIN_CHAR); + vector<State> st{0}; + int last = 0; + + SuffixAutomaton(const string &s) { + st.reserve(2 * sz(s)); + for (auto c : s) extend(c - OFFSET); } void extend(int c) { - int current = size++; - states[current].length = states[last].length + 1; - int pos = last; - while (pos != -1 && !states[pos].next[c]) { - states[pos].next[c] = current; - pos = states[pos].link; + st.emplace_back(st[last].len + 1); + int p = last, cur = last = sz(st) - 1; + for (; p != -1 && !st[p].nxt[c]; p = st[p].link) { + st[p].nxt[c] = cur; } - if (pos == -1) states[current].link = 0; + if (p == -1) st[cur].link = 0; else { - int q = states[pos].next[c]; - if (states[pos].length + 1 == states[q].length) { - states[current].link = q; + int q = st[p].nxt[c]; + if (st[p].len + 1 == st[q].len) { + st[cur].link = q; } else { - int clone = size++; - states[clone].length = states[pos].length + 1; - states[clone].link = states[q].link; - states[clone].next = states[q].next; - while (pos != -1 && states[pos].next[c] == q) { - states[pos].next[c] = clone; - pos = states[pos].link; + st.emplace_back(st[p].len + 1); + st.back().link = st[q].link; + copy(all(st[q].nxt), st.back().nxt); // back.nxt=[q].nxt + for (; p != -1 && st[p].nxt[c] == q; p = st[p].link) { + st[p].nxt[c] = sz(st) - 1; } - states[q].link = states[current].link = clone; + st[q].link = st[cur].link = sz(st) - 1; }} - last = current; + } + + vector<int> calculateTerminals() { + vector<int> terminals; + for (int p = last; p != -1; p = st[p].link) { + terminals.push_back(p); + } + return terminals; } // Pair with start index (in t) and length of LCS. pair<int, int> longestCommonSubstring(const string &t) { int v = 0, l = 0, best = 0, bestpos = 0; for (int i = 0; i < sz(t); i++) { - int c = t[i] - MIN_CHAR; - while (v && !states[v].next[c]) { - v = states[v].link; - l = states[v].length; - } - if (states[v].next[c]) {v = states[v].next[c]; l++;} - if (l > best) {best = l; bestpos = i;} + int c = t[i] - OFFSET; + for (; v && !st[v].nxt[c]; v = st[v].link) l = st[v].len; + if (st[v].nxt[c]) v = st[v].nxt[c], l++; + if (l > best) best = l, bestpos = i; } return {bestpos - best + 1, best}; } - - // Returns all terminals of the automaton. - vector<int> calculateTerminals() { - vector<int> terminals; - int pos = last; - while (pos != -1) { - terminals.push_back(pos); - pos = states[pos].link; - } - return terminals; - } }; Binary files differ |
