30. Substring with Concatenation of All Words - cocoder39/coco39_LC GitHub Wiki

30. Substring with Concatenation of All Words

vector<int> findSubstring(string s, vector<string>& words) {
        int slen = s.length();
        int sz = words.size();
        if (slen == 0 || sz == 0)   return {};
        int wlen = words[0].length();
        if (wlen == 0)  return {};
        
        unordered_map<string, int> Dict;
        for (auto word : words) {
            Dict[word]++;
        }
        
        vector<int> res;
        for (int i = 0; i <= slen - sz * wlen; i++) {
            unordered_map<string, int> Win;
            int start = i, cnt = 0;
            for (int j = i; j <= slen - wlen; j += wlen) {
                string str = s.substr(j, wlen);
                if (Dict.find(str) != Dict.end() && Win[str] < Dict[str]) { //an effective supply
                    Win[str]++;
                    cnt++;
                } else { //violation
                    break;
                }
                
                if (cnt == sz) { //found all words
                    res.push_back(start);
                    Win[s.substr(start, wlen)]--;
                    cnt--;
                    start += wlen;
                }
            }
        }
        return res;
    }
Your input
"barfoofoobarthefoobarman"
["bar","foo","the"]

Your answer
[6,9,12,9,12,12]

Expected answer
[6,9,12]

basic idea is simple:

    1. find out a result
    1. shift the window to check if there are other results: pop a word from the begin of the result and try to push new word into the window.

At step 2, once we have a start pointer, we would check the whole words chain starting from the start. Thus instead of iterating i within [0, slen], here we iterate i within [0, wlen]

wlen times iterations, each iteration access slen/wlen words, access each word at most twice, access each word takes O(wlen) 
time is O(wlen * slen/wlen * 2 * wlen) = O(wlen * slen)
vector<int> findSubstring(string s, vector<string>& words) {
        int slen = s.length();
        int sz = words.size();
        if (slen == 0 || sz == 0)   return {};
        int wlen = words[0].length();
        if (wlen == 0)  return {};
        
        unordered_map<string, int> Dict;
        for (auto word : words) {
            Dict[word]++;
        }
        
        vector<int> res;
        for (int i = 0; i < wlen; i++) { //wlen times iteration, each iteration traverses slen/wlen words
            unordered_map<string, int> Win;
            int cnt = 0, start = i;
            for (int j = i; j <= slen - wlen && j <= slen - (sz - cnt) * wlen; j += wlen) {
                string str = s.substr(j, wlen); //O(wlen) to access each word
                if (Dict.find(str) == Dict.end()) { //invalid word terminates the concatenation
                    Win.clear();
                    cnt = 0;
                    start = j + wlen;
                } else {
                    Win[str]++;
                    while (Win[str] > Dict[str]) { //extra supply, access each word at most twice, push in and pop out
                        Win[s.substr(start, wlen)]--;
                        cnt--;
                        start += wlen;
                    }
                    cnt++;
                    if (cnt == sz)  { 
                        res.push_back(start);
                        Win[s.substr(start, wlen)]--;
                        cnt--;
                        start += wlen;
                    }
                }
            }
        }
        return res;
    }
⚠️ **GitHub.com Fallback** ⚠️