10. Regular Expression Matching - cocoder39/coco39_LC GitHub Wiki

10. Regular Expression Matching

Notes 2020:

Option 1: recursion + memoization

Thinking process:

  1. What if we don't need to consider '*'?
    • take '*' as a separate branch
    • check pCur + 1 < len(p) before checking p[pCur+1] == '*'
  2. Ways to interpret '*':
    • 'x*' -> '' -> helper(sCur, pCur+2)
    • 'x*' -> 'x' or 'xx' ...
      • at least one match -> firstMatch = sCur < len(s) and p[pCur] in {'.', s[sCur]}
        • if p[pCur] == '.', s[sCur] can be any valid char but need to be a char so we still need to check sCur < len(s)
      • Even if we need to match multiple xs, we can reduce search space by 1 each time helper(sCur+1, pCur) and let recursion handle the complexity
  3. terminal case
    • if sCur == len(s), and pCur < len(p), we still need to do match eg, '' vs 'a*a*a*'
    • pCur < len(p), we can stop

Time complexity: we need to fill out memo P*S times where P = len(p) and S = len(s)

class Solution:
    def isMatch(self, s: str, p: str) -> bool:
        
        def helper(sCur, pCur):
            if sCur in memo and pCur in memo[sCur]:
                return memo[sCur][pCur]
            
            if pCur == len(p):
                return sCur == len(s)
                        
            firstMatch = sCur < len(s) and p[pCur] in {'.', s[sCur]}
            
            res = False
            if pCur + 1 < len(p) and p[pCur+1] == '*':
                res = helper(sCur, pCur+2) or (firstMatch and helper(sCur+1, pCur))
            else:
                res = firstMatch and helper(sCur+1, pCur+1)
            
            memo[sCur][pCur] = res
            return res
        
        memo = collections.defaultdict(dict)
        return helper(0, 0)

Option 2: DP

class Solution:
    def isMatch(self, s: str, p: str) -> bool:
        dp = [[False] * (len(p)+1) for i in range(len(s)+1)]
        
        dp[-1][-1] = True
        for i in range(len(s), -1, -1):
            for j in range(len(p)-1, -1, -1):
                match = i < len(s) and p[j] in {s[i], '.'}
                if j+1 < len(p) and p[j+1] == '*':
                    dp[i][j] = dp[i][j+2] or (match and dp[i+1][j])
                else:
                    dp[i][j] = match and dp[i+1][j+1]
                    
        return dp[0][0]

============================================================

when matching s[0 .. i-1] with p[0 .. j-1], we care if p[j-1] is ''. if p[j-1] == '', which can represent two cases.

  • One is cancelling p[j - 2], thus we care if s[0 .. i-1] matches p[0 .. j -3]. d[i][j] = d[i][j - 2]
  • The other is representing one or more p[j - 2]. eg., "baaa" matches "ba*". Then the problem is reduced to check (1) if s[0 .. i-2] matches p[0 .. j-1]. (2) s[i-1] == p[j - 2]. d[i][j] = d[i - 1][j] && (p[j - 2] == s[i - 1] || p[j - 2] == '.')
another idea of considering '*':
case 1: "a*" matches "" => dp[i][j] = dp[i][j - 2]
case 2a (dp): "a*" matches at least one 'a' => "a*" = "a*a" = "a*" + 'a', where "a*" matches 0 or more 'a' (exactly definition of '*') and 'a' matches 'a' => p[i][j] = p[i - 1][j] && a = p[j - 2] matches s[i - 1]
case 2b (recursion): "a*" matches at least one 'a' => "a*" = "aa*" where 'a' matches 'a' and "a*" matches 0 or more 'a' (exactly definition of '*') => p[0] matches a[0] &&  isMatch(a.substr(1), p.substr(1))
bool isMatch(string s, string p) {
        int m = s.length();
        int n = p.length();
        vector<vector<bool>> dp(m + 1, vector<bool>(n + 1)); //dp[i][j] = true iff s[0 .. i-1] matches p[0 .. j-1]
        
        //empty p can only match empty s
        dp[0][0] = true;
        for (int i = 1; i <= m; i++) {
            dp[i][0] = false; 
        }
        
        dp[0][1] = false; //no way to match single-char p with empty s, even if p == "*"
        for (int i = 2; i <= n; i++) {
            dp[0][i] = dp[0][i - 2] && p[i - 1] == '*'; //'*' can cancel p[i - 2] in p
        }
        
        for (int i = 1; i <= m; i++) {
            for (int j = 1; j <= n; j++) {
                if (p[j - 1] != '*') {
                    dp[i][j] = dp[i - 1][j - 1] && (p[j - 1] == s[i - 1] || p[j - 1] == '.');
                }
                else {
                    if (j == 1) { //p == '*' cannot match any s
                        dp[i][j] = false;
                    }
                    else { //cancel p[j - 1] or represent at least one p[j - 1]
                        dp[i][j] = dp[i][j - 2] || (dp[i - 1][j] && (p[j - 2] == s[i - 1] || p[j - 2] == '.'));
                    }        
                }
            }
        }
        return dp[m][n];
    }

remove unnecessary assignment where dp[i][j] is already false from initialization

bool isMatch(string s, string p) {
        int m = s.length(), n = p.length();
        vector<vector<bool>> dp(m + 1, vector<bool>(n + 1));
        
        dp[0][0] = true;
        for (int i = 2; i <= n; i++) {
            dp[0][i] = dp[0][i - 2] && p[i - 1] == '*';
        }
        
        for (int i = 1; i <= m; i++) {
            for (int j = 1; j <= n; j++) {
                if (p[j - 1] != '*') {
                    dp[i][j] = dp[i - 1][j - 1] && (p[j - 1] == s[i - 1] || p[j - 1] == '.');
                } else if (j >= 2) { 
                    dp[i][j] = dp[i][j - 2] || (dp[i - 1][j] && (p[j - 2] == s[i - 1] || p[j - 2] == '.'));
                }
            }
        }
        return dp[m][n];
    }

optimize memory with rolling array. take care initial cases when using rolling array. specifically in this problem, initial cases are dp[pre][0] (because we would use dp[pre][j - 1]) and dp[cur][0] (because we would use dp[cur][j - 2]

bool isMatch(string s, string p) {
        int m = s.length(), n = p.length();
        vector<vector<bool>> dp(2, vector<bool>(n + 1));
        
        dp[0][0] = true;
        for (int i = 2; i <= n; i++) {
            dp[0][i] = dp[0][i - 2] && p[i - 1] == '*';
        }
        
        int pre = 0, cur = 1;
        for (int i = 1; i <= m; i++) {
            if (i > 1) {
                dp[pre][0] = false;
            }
            dp[cur][0] = false;
            
            for (int j = 1; j <= n; j++) {
                if (p[j - 1] != '*') {
                    dp[cur][j] = dp[pre][j - 1] && (p[j - 1] == s[i - 1] || p[j - 1] == '.');
                } else if (j >= 2) { 
                    dp[cur][j] = dp[cur][j - 2] || (dp[pre][j] && (p[j - 2] == s[i - 1] || p[j - 2] == '.'));
                }
            }
            swap(pre, cur);
        }
        return dp[pre][n];
    }

recursion: t(m, n) = 1 + t(m, n-2) + t(m-1, n) => t(m, n-2) = 1 + t(m, n-4) + t(m-1, n-2) => t(m, n) > t(m-1, n) + t(m-1, n-2) > 2 * t(m-1, n-2) = 2 ^ (min(m, n/2))

bool isMatch(string s, string p) {
        if (p.empty())  return s.empty();
        //p.length() >= 1
        if (p[1] == '*') { //p[1] = '\0' and p.substr(2) = "" when p.length() == 1
            return isMatch(s, p.substr(2)) || (! s.empty() && (p[0] == s[0] || p[0] == '.') && isMatch(s.substr(1), p));
        } else {
            return ! s.empty() && (p[0] == s[0] || p[0] == '.') && isMatch(s.substr(1), p.substr(1));
        }
    }
⚠️ **GitHub.com Fallback** ⚠️