summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNoobie99 <noob999noob999@gmail.com>2023-03-25 13:53:47 +0100
committerNoobie99 <noob999noob999@gmail.com>2023-03-25 13:53:47 +0100
commit3b91d2662310aee532cc84e1447824459671767e (patch)
tree9eb7ce41088b9d6603d16319ad54c81e93261e84
parent61710d33fddc342904c69b955cc3b5a363473d1e (diff)
improved suffix automaton
-rw-r--r--string/suffixAutomaton.cpp85
-rw-r--r--tcr.pdfbin649611 -> 665743 bytes
2 files changed, 37 insertions, 48 deletions
diff --git a/string/suffixAutomaton.cpp b/string/suffixAutomaton.cpp
index c6205da..8b79822 100644
--- a/string/suffixAutomaton.cpp
+++ b/string/suffixAutomaton.cpp
@@ -1,70 +1,59 @@
-constexpr char MIN_CHAR = 'a';
-constexpr ll ALPHABET_SIZE = 26;
+constexpr int ALPHABET_SIZE = 26;
+constexpr char OFFSET = 'a';
struct SuffixAutomaton {
struct State {
- int length, link;
- vector<int> next;
- State() : next(ALPHABET_SIZE) {}
+ int len, link = -1;
+ int nxt[ALPHABET_SIZE] = {}; // map if large Alphabet
+ State(int l) : len(l) {}
};
- vector<State> states;
- int size, last;
- SuffixAutomaton(const string &s) : states(2*sz(s)) {
- size = 1; last = 0;
- states[0].link = -1;
- for (auto c : s) extend(c - MIN_CHAR);
+ vector<State> st{0};
+ int last = 0;
+
+ SuffixAutomaton(const string &s) {
+ st.reserve(2 * sz(s));
+ for (auto c : s) extend(c - OFFSET);
}
void extend(int c) {
- int current = size++;
- states[current].length = states[last].length + 1;
- int pos = last;
- while (pos != -1 && !states[pos].next[c]) {
- states[pos].next[c] = current;
- pos = states[pos].link;
+ st.emplace_back(st[last].len + 1);
+ int p = last, cur = last = sz(st) - 1;
+ for (; p != -1 && !st[p].nxt[c]; p = st[p].link) {
+ st[p].nxt[c] = cur;
}
- if (pos == -1) states[current].link = 0;
+ if (p == -1) st[cur].link = 0;
else {
- int q = states[pos].next[c];
- if (states[pos].length + 1 == states[q].length) {
- states[current].link = q;
+ int q = st[p].nxt[c];
+ if (st[p].len + 1 == st[q].len) {
+ st[cur].link = q;
} else {
- int clone = size++;
- states[clone].length = states[pos].length + 1;
- states[clone].link = states[q].link;
- states[clone].next = states[q].next;
- while (pos != -1 && states[pos].next[c] == q) {
- states[pos].next[c] = clone;
- pos = states[pos].link;
+ st.emplace_back(st[p].len + 1);
+ st.back().link = st[q].link;
+ copy(all(st[q].nxt), st.back().nxt); // back.nxt=[q].nxt
+ for (; p != -1 && st[p].nxt[c] == q; p = st[p].link) {
+ st[p].nxt[c] = sz(st) - 1;
}
- states[q].link = states[current].link = clone;
+ st[q].link = st[cur].link = sz(st) - 1;
}}
- last = current;
+ }
+
+ vector<int> calculateTerminals() {
+ vector<int> terminals;
+ for (int p = last; p != -1; p = st[p].link) {
+ terminals.push_back(p);
+ }
+ return terminals;
}
// Pair with start index (in t) and length of LCS.
pair<int, int> longestCommonSubstring(const string &t) {
int v = 0, l = 0, best = 0, bestpos = 0;
for (int i = 0; i < sz(t); i++) {
- int c = t[i] - MIN_CHAR;
- while (v && !states[v].next[c]) {
- v = states[v].link;
- l = states[v].length;
- }
- if (states[v].next[c]) {v = states[v].next[c]; l++;}
- if (l > best) {best = l; bestpos = i;}
+ int c = t[i] - OFFSET;
+ for (; v && !st[v].nxt[c]; v = st[v].link) l = st[v].len;
+ if (st[v].nxt[c]) v = st[v].nxt[c], l++;
+ if (l > best) best = l, bestpos = i;
}
return {bestpos - best + 1, best};
}
-
- // Returns all terminals of the automaton.
- vector<int> calculateTerminals() {
- vector<int> terminals;
- int pos = last;
- while (pos != -1) {
- terminals.push_back(pos);
- pos = states[pos].link;
- }
- return terminals;
- }
};
diff --git a/tcr.pdf b/tcr.pdf
index 2b8297e..e18fce8 100644
--- a/tcr.pdf
+++ b/tcr.pdf
Binary files differ