AC自动机学习记录

参考资料

字典树(讲解+模版)

AC自动机算法

AC自动机算法详解

hdu 2222 ac自动机入门题

HDU 2222

Keywords Search

题意

给出一些模式串和一个主串,求主串中能匹配到的模式串数量。

思路

AC自动机裸题。

代码

#include <cstdio>
#include <cstring>
#include <queue>
using namespace std;
#define LOCAL

const int kind = 26;
const int s_maxn = 50+7;
const int t_maxn = 1000000+7;

int n;

struct node {
    int count;
    struct node *next[kind];
    struct node *fail;
    void init() {
        for(int i=0; i < kind; ++i)
            next[i] = NULL;
        count = 0;
        fail = NULL;
    }
} *root;

void insert(char str[]) {
    node *p = root;
    for(int i=0; str[i]; ++i) {
        int pos = str[i]-'a';
        if(p->next[pos] == NULL) {
            p->next[pos] = new node;
            p->next[pos]->init();
            p = p->next[pos];
        }
        else p = p->next[pos];
    }
    p->count++;
}

void getFail() {
    node *p = root, *son, *temp;
    queue<node*> que;
    que.push(p);
    while(!que.empty()){
        temp = que.front(); que.pop();
        for(int i=0; i < kind; ++i){
            son = temp->next[i];
            if(son == NULL) continue;
            if(temp == root) son->fail = root;
            else {
                p = temp->fail;
                while(p){
                    if(p->next[i]){
                        son->fail = p->next[i];
                        break;
                    }
                    p = p->fail;
                }
                if(!p) son->fail = root;
            }
            que.push(son);
        }
    }
}

void query(char str[]) {
    int cnt = 0;
    node *p, *temp;
    p = root;
    for (int i=0; str[i]; ++i) {
        int pos = str[i]-'a';
        while(!p->next[pos] && p!=root)
            p = p->fail;
        p = p->next[pos];
        if(!p) p = root;
        temp = p;
        while (temp != root) {
            if(temp->count >= 0) {
                cnt += temp->count;
                temp->count = -1;
            }
            else break;
            temp = temp->fail;
        }
    }
    printf("%d\n", cnt);
}

void init() {
    char s[s_maxn];
    root = new node;
    root->init();
    scanf("%d", &n);
    for(int i=0; i < n; ++i) {
        scanf("%s", s);
        insert(s);
    }
    getFail();
}

int main() {
#ifdef LOCAL
    freopen("in.txt", "r", stdin);
#endif
    int cas; scanf("%d", &cas);
    while(cas--) {
        init();
        char t[t_maxn];
        scanf("%s", t);
        query(t);
    }
    return 0;
}

HDU 2896

病毒侵袭

题意

给出一些模式串,再给出一些主串,求每个主串中能匹配到的模式串的编号,以及能匹配到模式串的主串数量。

思路

本来是想用上面 HDU 2222 的模板改的,发现会MLE,就重新找了一个模板。

这个模板是开一个node数组,通过nextfail都通过下标来指示位置,
数据量一定时节省空间但是不能根据数据量自动拓展。

代码

#include <cstdio>
#include <cstring>
#include <cmath>
#include <iostream>
#include <algorithm>
#include <queue>

using namespace std;

const int kind = 128;
const int s_maxn = 200+7;
const int t_maxn = 10000+7;
const int maxn = 500+7;

struct node {
    int fail, flag, next[kind];
    void init() {
        for(int i=0; i < kind; ++i)
            next[i] = 0;
        fail = -1, flag = 0;
    }
} tb[100007];

int web_cnt, n, m, cur;
char s[s_maxn], t[t_maxn];

void insert(const char str[], int id) {
    int temp = 0;
    for(int k=0; str[k]; ++k) {
        int pos = str[k];
        if(tb[temp].next[pos] == 0) {
            tb[++cur].init();
            tb[temp].next[pos] = cur;
        }
        temp = tb[temp].next[pos];
    }
    tb[temp].flag = id;
}

void getfail() {
    int p, son;
    queue<int> que;
    que.push(0);
    while(!que.empty()){
        int temp = que.front(); que.pop();
        for(int i=0; i < kind; ++i){
            son = tb[temp].next[i];
            if (!son) continue;
            p = tb[temp].fail;
            while (~p && !tb[p].next[i])
                p = tb[p].fail;
            if (!~p)
                tb[son].fail = 0;
            else
                tb[son].fail = tb[p].next[i];
            que.push(son);
        }
    }
}

void query(char str[], int id) {
    int ans[maxn] = {}, cnt = 0;
    int p = 0, temp;
    for (int i=0; str[i]; ++i) {
        int pos = str[i];
        if (tb[p].next[pos])
            p = tb[p].next[pos];
        else {
            temp = tb[p].fail;
            while (~temp && !tb[temp].next[pos])
                temp = tb[temp].fail;
            if (p == -1) p = 0;
            else p = tb[temp].next[pos];
        }
        if(tb[p].flag) {
            ans[tb[p].flag] = 1;
            ++cnt;
            temp = tb[p].fail;
            while (temp && !ans[tb[temp].flag]) {
                ans[tb[temp].flag] = 1;
                temp = tb[temp].fail;
                ++cnt;
            }
        }
    }
    if (cnt == 0) return;
    ++web_cnt;
    printf("web %d:", id);
    for (int i = 1; i <= n; ++i)
        if (ans[i]) printf(" %d", i);
    puts("");
}

void solve() {
    web_cnt = cur = 0;
    tb[0].init();
    for (int i = 1; i <= n; ++i) {
        scanf("%s", s);
        insert(s, i);
    }
    getfail();
    int m; scanf("%d", &m);
    for (int i = 1; i <= m; ++i) {
        scanf("%s", t);
        query(t, i);
    }
    printf("total: %d\n", web_cnt);
}

int main(){
#ifdef LOCAL
    freopen("in.txt", "r", stdin);
#endif
    while (~scanf("%d", &n))
        solve();
    return 0;
}

HDU 3065

题意

给出若干模式串,要求在主串中找到所有的模式串,并输出每种模式串有多少个。

思路

模板。唯一需要注意的是模式串只含有大写字母,主串有ASCII中所有的可见字符,在读到大写字母以外的字符时要重新回到根节点。

代码

#include <cstdio>
#include <cstring>
#include <cmath>
#include <iostream>
#include <algorithm>
#include <queue>

using namespace std;
#define LOCAL

const int kind = 26;
const int maxn = 1000+7;
const int s_maxn = 50+7;
const int t_maxn = 2000000+7;
const char FLAG = 'A';

int n, ans[maxn];
char s[maxn][s_maxn];

struct node {
    int count;
    struct node *next[kind];
    struct node *fail;
    void init() {
        for(int i=0; i < kind; ++i)
            next[i] = NULL;
        count = 0;
        fail = NULL;
    }
} *root;

void insert(char str[], int id) {
    node *p = root;
    for(int i=0; str[i]; ++i) {
        int pos = str[i]-FLAG;
        if(p->next[pos] == NULL) {
            p->next[pos] = new node;
            p->next[pos]->init();
            p = p->next[pos];
        }
        else p = p->next[pos];
    }
    p->count = id;
}

void getFail() {
    node *p = root, *son, *temp;
    queue<node*> que;
    que.push(p);
    while(!que.empty()){
        temp = que.front(); que.pop();
        for(int i=0; i < kind; ++i){
            son = temp->next[i];
            if(son == NULL) continue;
            if(temp == root) son->fail = root;
            else {
                p = temp->fail;
                while(p){
                    if(p->next[i]){
                        son->fail = p->next[i];
                        break;
                    }
                    p = p->fail;
                }
                if(!p) son->fail = root;
            }
            que.push(son);
        }
    }
}

void query(char str[]) {
    node *p, *temp;
    p = root;
    for (int i=0; str[i]; ++i) {
        int pos = str[i]-FLAG;
        if (pos < 0 || pos > 25) {
            p = root;
            continue;
        }
        while(!p->next[pos] && p!=root)
            p = p->fail;
        p = p->next[pos];
        if(!p) p = root;
        temp = p;
        while (temp != root) {
            if(temp->count)
                ++ans[temp->count];
            else break;
            temp = temp->fail;
        }
    }
}

void init() {
    memset(ans, 0, sizeof ans);
    root = new node;
    root->init();
    for(int i=1; i <= n; ++i) {
        scanf("%s", s[i]);
        insert(s[i], i);
    }
    getFail();
}

int main(){
#ifdef LOCAL
    freopen("in.txt", "r", stdin);
#endif
    while (~scanf("%d", &n)) {
        init();
        char t[t_maxn];
        scanf("%s", t);
        query(t);
        for (int i = 1; i <= n; ++i)
            if (ans[i]) printf("%s: %d\n", s[i], ans[i]);
    }
    return 0;
}