Реализации алгоритмов/Суффиксный массив

Материал из Викиучебника — открытых книг для открытого мира

Построение суффиксного массива за линейное время c поиском.

C++[править]

TSuffixArray::TSuffixArray(TSuffixTree& tree) {
        Word = tree.Word;
        std::vector<std::pair<int,int> > st;
        st.push_back(std::make_pair(0, 0));
        while (!st.empty()) {
            std::pair<int,int> top = st.back();
            st.pop_back();
            if (top.second != 0 && tree.Len[top.second] == TSuffixTree::INF) {
                Array.push_back(Word.size() - (Word.size() - tree.LPos[top.second]) - top.first);
                continue;
            }
            std::map<int,int>::reverse_iterator iter;
            for (iter = tree.Edges[top.second].rbegin(); iter != tree.Edges[top.second].rend(); ++iter) {
                int newDepth = top.first;
                if (tree.Len[iter->second] != TSuffixTree::INF) {
                    newDepth += tree.Len[iter->second];
                }
                if (top.second != 0 || iter->first != '$') {
                     st.push_back(std::make_pair(newDepth, iter->second));
                }
            }
        }
    }
    void TSuffixArray::Search(std::string& word, std::vector<int>& results) {
        int l = -1;
        int r = Array.size();
        bool findSubstr = false;
        while (r - l > 1) {
            int m = (l + r) / 2;
            bool wholeSuffix = true;
            int index = 0;
            for (int i = Array[m]; i < Word.size()-1 && index < word.size(); ++i) {
                if (Word[i] < word[index]) {
                    l = m;
                    wholeSuffix = false;
                    break;
                }
                else if (Word[i] > word[index]) {
                    r = m;
                    wholeSuffix = false;
                    break;
                }
                ++index;
            }
            if (wholeSuffix) {
                l = m;
                if (index == word.size()) {
                    findSubstr = true;
                }
            }
        }
        if (!findSubstr) {
            return;
        }
        int rightAns = l;
        r = l;
        l = -1;
        while (r - l > 1) {
            int m = (l + r) / 2;
            bool wholeSuffix = true;
            int index = 0;
            for (int i = Array[m]; i < Word.size() - 1 && index < word.size(); ++i) {
                if (Word[i] < word[index]) {
                    l = m;
                    wholeSuffix = false;
                    break;
                }
                ++index;
            }
            if (wholeSuffix) {
                if (index == word.size()) {
                    r = m;
                }
                else {
                    l = m;
                }
            }
        }
        int leftAns = r;
        for (int i = leftAns; i <= rightAns; ++i) {
            results.push_back(Array[i] + 1);
        }        
    }
}