summaryrefslogtreecommitdiff
path: root/content/string/ahoCorasick.cpp
diff options
context:
space:
mode:
authormzuenni <mzuenni@users.noreply.github.com>2024-07-28 22:54:40 +0200
committerGitHub <noreply@github.com>2024-07-28 22:54:40 +0200
commit8d11c6c8213f46f0fa19826917c255edd5d43cb1 (patch)
tree96d75baff33d5a04b5a60f1a41f514a26c716874 /content/string/ahoCorasick.cpp
parent8c33b4e0d3030cfed17fc64b4fe41133339f6d87 (diff)
Test (#4)
* update * moved content in subdir * rename file * add test setup * add test setup * add github action * automaticly test all cpp files * timeout after 10s * setulimit and dont zero memory * test build pdf * install latexmk * update * update * ngerman * fonts * removed old code * add first test * added tests * test in sorted order * more tests * simplified test * more tests * fix suffix tree * fixes and improvements * done ust lst directly * fix swap * add links to pdf * fix constants * add primorial * add comment * various improvements * more tests * added missing stuf * more tests * fix tests * more tests * more tests * more tests * fix recursion? * test trie * more tests * only use python temporarily for listings * only use python temporarily for listings * more tests * fix longestCommonSubstring * more tests * more tests * made code more similiar * fix? * more tests * more tests * more tests * add ahoCorasick test + limit 4GB stack size * more tests * fix test * add additional test * more tests * more tests * fix? * better fix * fix virtual tree * more tests * more tests * recursive closest pair * more tests * decrease limit * new tests * more tests * fix name * more tests * add test * new test * more tests * more tests * more tests * more tests * new test and content * new code * new code * larger tests * fix and test * new test * new test * update pdf * remove comments * new test * more tests * more testcases * more tests * increased limit * more tests * more tests * more tests * new tests * more tests * shortened code * new test * add basic tests for bigint * more tests * removed old files * new test * ignore some files * more auto more ccw * fix test * more tests * fix * new tests * more tests * more tests * stronger test * actually verify delaunay... * more tests * fix header * more tests * run tests parallel? * test parralel? * add --missing * separate workflows * test * is the pdf checked? * separate workflows * fix workflow * more workflows --------- Co-authored-by: Yidi <noob999noob999@gmail.com>
Diffstat (limited to 'content/string/ahoCorasick.cpp')
-rw-r--r--content/string/ahoCorasick.cpp52
1 files changed, 52 insertions, 0 deletions
diff --git a/content/string/ahoCorasick.cpp b/content/string/ahoCorasick.cpp
new file mode 100644
index 0000000..eac312c
--- /dev/null
+++ b/content/string/ahoCorasick.cpp
@@ -0,0 +1,52 @@
+constexpr ll ALPHABET_SIZE = 26, OFFSET = 'a';
+struct AhoCorasick {
+ struct vert {
+ int suffix = 0, ch, cnt = 0;
+ array<int, ALPHABET_SIZE> nxt = {};
+
+ vert(int p, int c) : suffix(-p), ch(c) {}
+ };
+ vector<vert> aho = {{0, -1}};
+
+ int addString(string &s) {
+ int v = 0;
+ for (auto c : s) {
+ int idx = c - OFFSET;
+ if (!aho[v].nxt[idx]) {
+ aho[v].nxt[idx] = sz(aho);
+ aho.emplace_back(v, idx);
+ }
+ v = aho[v].nxt[idx];
+ }
+ aho[v].cnt++;
+ return v; // trie node index of pattern (pattern state)
+ }
+
+ int getSuffix(int v) {
+ if (aho[v].suffix < 0) {
+ aho[v].suffix = go(getSuffix(-aho[v].suffix), aho[v].ch);
+ }
+ return aho[v].suffix;
+ }
+
+ int go(int v, int idx) { // Root is v=0, idx is char - OFFSET
+ if (aho[v].nxt[idx]) return aho[v].nxt[idx];
+ else return v == 0 ? 0 : go(getSuffix(v), idx);
+ }
+
+ vector<vector<int>> adj;
+ vector<ll> dp;
+ void buildGraph() {
+ adj.resize(sz(aho));
+ dp.assign(sz(aho), 0);
+ for (int i = 1; i < sz(aho); i++) {
+ adj[getSuffix(i)].push_back(i);
+ }}
+
+ void dfs(int v = 0) { // dp on tree
+ for (int u : adj[v]) {
+ //dp[u] = dp[v] + aho[u].cnt; // pattern count
+ dfs(u);
+ dp[v] += dp[u]; // no of matches
+ }}
+};