diff options
| author | Paul Jungeblut <paul.jungeblut@gmail.com> | 2017-10-22 12:44:29 +0200 |
|---|---|---|
| committer | Paul Jungeblut <paul.jungeblut@gmail.com> | 2017-10-22 12:44:29 +0200 |
| commit | 893415a01c6bd4f2d6babcf5bfc8765bf83c04ea (patch) | |
| tree | 38a6847d8a0aa104a545d46ae0416efff2aa0d6f /string/ahoCorasick.cpp | |
| parent | f72e45ff8cdb0fae8302a8ba36f5d7f826c1aa5e (diff) | |
Removing pointers from Aho Corasick Automaton.
Diffstat (limited to 'string/ahoCorasick.cpp')
| -rw-r--r-- | string/ahoCorasick.cpp | 66 |
1 files changed, 34 insertions, 32 deletions
diff --git a/string/ahoCorasick.cpp b/string/ahoCorasick.cpp index c283f6f..d221fcb 100644 --- a/string/ahoCorasick.cpp +++ b/string/ahoCorasick.cpp @@ -1,55 +1,57 @@ // Laufzeit: O(n + m + z), n = #Text, m = Summe #Pattern, z = #Matches // Findet mehrere Patterns gleichzeitig in einem String. -// 1) Wurzel erstellen: vertex *automaton = new vertex(); -// 2) Mit addString(automaton, s, idx); Patterns hinzufügen. -// 3) finishAutomaton(automaton) aufrufen. -// 4) Mit automaton = go(automaton, c) in nächsten Zustand wechseln. +// 1) Wurzel erstellen: aho.push_back(vertex()); +// 2) Mit addString(0, pattern, idx); Patterns hinzufügen. +// 3) finishAutomaton(0) aufrufen. +// 4) Mit state = go(state, c) in nächsten Zustand wechseln. // DANACH: Wenn patterns-Vektor nicht leer ist: Hier enden alle // enthaltenen Patterns. // ACHTUNG: Die Zahlenwerte der auftretenden Buchstaben müssen // zusammenhängend sein und bei 0 beginnen! struct vertex { - vertex *next[ALPHABET_SIZE], *failure; - char character; + int next[ALPHABET_SIZE], failure; + int character; vector<int> patterns; // Indizes der Patterns, die hier enden. - vertex() { for (int i = 0; i < ALPHABET_SIZE; i++) next[i] = NULL; } + vertex() { for (int i = 0; i < ALPHABET_SIZE; i++) next[i] = -1; } }; +vector<vertex> aho; -void addString(vertex *v, string &pattern, int patternIdx) { - for (int i = 0; i < (int)pattern.length(); i++) { - if (!v->next[(int)pattern[i]]) { - vertex *w = new vertex(); - w->character = pattern[i]; - v->next[(int)pattern[i]] = w; +void addString(int v, vector<int> &pattern, int patternIdx) { + for (int i = 0; i < (int)pattern.size(); i++) { + if (aho[v].next[pattern[i]] == -1) { + aho[v].next[pattern[i]] = aho.size(); + aho.push_back(vertex()); + aho.back().character = pattern[i]; } - v = v->next[(int)pattern[i]]; + v = aho[v].next[pattern[i]]; } - v->patterns.push_back(patternIdx); + aho[v].patterns.push_back(patternIdx); } -void finishAutomaton(vertex *v) { +void finishAutomaton(int v) { for (int i = 0; i < ALPHABET_SIZE; i++) - if (!v->next[i]) v->next[i] = v; + if (aho[v].next[i] == -1) aho[v].next[i] = v; - queue<vertex*> q; + queue<int> q; for (int i = 0; i < ALPHABET_SIZE; i++) { - if (v->next[i] != v) { - v->next[i]->failure = v; - q.push(v->next[i]); + if (aho[v].next[i] != v) { + aho[aho[v].next[i]].failure = v; + q.push(aho[v].next[i]); }} while (!q.empty()) { - vertex *r = q.front(); q.pop(); + int r = q.front(); q.pop(); for (int i = 0; i < ALPHABET_SIZE; i++) { - if (r->next[i]) { - q.push(r->next[i]); - vertex *f = r->failure; - while (!f->next[i]) f = f->failure; - r->next[i]->failure = f->next[i]; - for (int j = 0; j < (int)f->next[i]->patterns.size(); j++) { - r->next[i]->patterns.push_back(f->next[i]->patterns[j]); + if (aho[r].next[i] != -1) { + q.push(aho[r].next[i]); + int f = aho[r].failure; + while (aho[f].next[i] == -1) f = aho[f].failure; + aho[aho[r].next[i]].failure = aho[f].next[i]; + for (int j = 0; j < (int)aho[aho[f].next[i]].patterns.size(); j++) { + aho[aho[r].next[i]].patterns.push_back( + aho[aho[f].next[i]].patterns[j]); }}}}} -vertex* go(vertex *v, char c) { - if (v->next[(int)c]) return v->next[(int)c]; - else return go(v->failure, c); +int go(int v, int c) { + if (aho[v].next[c] != -1) return aho[v].next[c]; + else return go(aho[v].failure, c); } |
