summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Jungeblut <paul.jungeblut@gmail.com>2017-10-22 12:44:29 +0200
committerPaul Jungeblut <paul.jungeblut@gmail.com>2017-10-22 12:44:29 +0200
commit893415a01c6bd4f2d6babcf5bfc8765bf83c04ea (patch)
tree38a6847d8a0aa104a545d46ae0416efff2aa0d6f
parentf72e45ff8cdb0fae8302a8ba36f5d7f826c1aa5e (diff)
Removing pointers from Aho Corasick Automaton.
-rw-r--r--string/ahoCorasick.cpp66
-rw-r--r--tcr.pdfbin315950 -> 317656 bytes
2 files changed, 34 insertions, 32 deletions
diff --git a/string/ahoCorasick.cpp b/string/ahoCorasick.cpp
index c283f6f..d221fcb 100644
--- a/string/ahoCorasick.cpp
+++ b/string/ahoCorasick.cpp
@@ -1,55 +1,57 @@
// Laufzeit: O(n + m + z), n = #Text, m = Summe #Pattern, z = #Matches
// Findet mehrere Patterns gleichzeitig in einem String.
-// 1) Wurzel erstellen: vertex *automaton = new vertex();
-// 2) Mit addString(automaton, s, idx); Patterns hinzufügen.
-// 3) finishAutomaton(automaton) aufrufen.
-// 4) Mit automaton = go(automaton, c) in nächsten Zustand wechseln.
+// 1) Wurzel erstellen: aho.push_back(vertex());
+// 2) Mit addString(0, pattern, idx); Patterns hinzufügen.
+// 3) finishAutomaton(0) aufrufen.
+// 4) Mit state = go(state, c) in nächsten Zustand wechseln.
// DANACH: Wenn patterns-Vektor nicht leer ist: Hier enden alle
// enthaltenen Patterns.
// ACHTUNG: Die Zahlenwerte der auftretenden Buchstaben müssen
// zusammenhängend sein und bei 0 beginnen!
struct vertex {
- vertex *next[ALPHABET_SIZE], *failure;
- char character;
+ int next[ALPHABET_SIZE], failure;
+ int character;
vector<int> patterns; // Indizes der Patterns, die hier enden.
- vertex() { for (int i = 0; i < ALPHABET_SIZE; i++) next[i] = NULL; }
+ vertex() { for (int i = 0; i < ALPHABET_SIZE; i++) next[i] = -1; }
};
+vector<vertex> aho;
-void addString(vertex *v, string &pattern, int patternIdx) {
- for (int i = 0; i < (int)pattern.length(); i++) {
- if (!v->next[(int)pattern[i]]) {
- vertex *w = new vertex();
- w->character = pattern[i];
- v->next[(int)pattern[i]] = w;
+void addString(int v, vector<int> &pattern, int patternIdx) {
+ for (int i = 0; i < (int)pattern.size(); i++) {
+ if (aho[v].next[pattern[i]] == -1) {
+ aho[v].next[pattern[i]] = aho.size();
+ aho.push_back(vertex());
+ aho.back().character = pattern[i];
}
- v = v->next[(int)pattern[i]];
+ v = aho[v].next[pattern[i]];
}
- v->patterns.push_back(patternIdx);
+ aho[v].patterns.push_back(patternIdx);
}
-void finishAutomaton(vertex *v) {
+void finishAutomaton(int v) {
for (int i = 0; i < ALPHABET_SIZE; i++)
- if (!v->next[i]) v->next[i] = v;
+ if (aho[v].next[i] == -1) aho[v].next[i] = v;
- queue<vertex*> q;
+ queue<int> q;
for (int i = 0; i < ALPHABET_SIZE; i++) {
- if (v->next[i] != v) {
- v->next[i]->failure = v;
- q.push(v->next[i]);
+ if (aho[v].next[i] != v) {
+ aho[aho[v].next[i]].failure = v;
+ q.push(aho[v].next[i]);
}}
while (!q.empty()) {
- vertex *r = q.front(); q.pop();
+ int r = q.front(); q.pop();
for (int i = 0; i < ALPHABET_SIZE; i++) {
- if (r->next[i]) {
- q.push(r->next[i]);
- vertex *f = r->failure;
- while (!f->next[i]) f = f->failure;
- r->next[i]->failure = f->next[i];
- for (int j = 0; j < (int)f->next[i]->patterns.size(); j++) {
- r->next[i]->patterns.push_back(f->next[i]->patterns[j]);
+ if (aho[r].next[i] != -1) {
+ q.push(aho[r].next[i]);
+ int f = aho[r].failure;
+ while (aho[f].next[i] == -1) f = aho[f].failure;
+ aho[aho[r].next[i]].failure = aho[f].next[i];
+ for (int j = 0; j < (int)aho[aho[f].next[i]].patterns.size(); j++) {
+ aho[aho[r].next[i]].patterns.push_back(
+ aho[aho[f].next[i]].patterns[j]);
}}}}}
-vertex* go(vertex *v, char c) {
- if (v->next[(int)c]) return v->next[(int)c];
- else return go(v->failure, c);
+int go(int v, int c) {
+ if (aho[v].next[c] != -1) return aho[v].next[c];
+ else return go(aho[v].failure, c);
}
diff --git a/tcr.pdf b/tcr.pdf
index b337149..34ea147 100644
--- a/tcr.pdf
+++ b/tcr.pdf
Binary files differ