10. Regular Expression Matching - cocoder39/coco39_LC GitHub Wiki
10. Regular Expression Matching
Notes 2020:
Option 1: recursion + memoization
Thinking process:
- What if we don't need to consider
'*'
?- take
'*'
as a separate branch - check
pCur + 1 < len(p)
before checkingp[pCur+1] == '*'
- take
- Ways to interpret
'*'
:-
'x*'
->''
->helper(sCur, pCur+2)
-
'x*'
->'x'
or'xx'
...- at least one match ->
firstMatch = sCur < len(s) and p[pCur] in {'.', s[sCur]}
- if
p[pCur] == '.'
, s[sCur] can be any valid char but need to be a char so we still need to checksCur < len(s)
- if
- Even if we need to match multiple
x
s, we can reduce search space by 1 each timehelper(sCur+1, pCur)
and let recursion handle the complexity
- at least one match ->
-
- terminal case
- if
sCur
== len(s), andpCur
< len(p), we still need to do match eg,''
vs'a*a*a*'
-
pCur
< len(p), we can stop
- if
Time complexity: we need to fill out memo P*S times where P = len(p) and S = len(s)
class Solution:
def isMatch(self, s: str, p: str) -> bool:
def helper(sCur, pCur):
if sCur in memo and pCur in memo[sCur]:
return memo[sCur][pCur]
if pCur == len(p):
return sCur == len(s)
firstMatch = sCur < len(s) and p[pCur] in {'.', s[sCur]}
res = False
if pCur + 1 < len(p) and p[pCur+1] == '*':
res = helper(sCur, pCur+2) or (firstMatch and helper(sCur+1, pCur))
else:
res = firstMatch and helper(sCur+1, pCur+1)
memo[sCur][pCur] = res
return res
memo = collections.defaultdict(dict)
return helper(0, 0)
Option 2: DP
class Solution:
def isMatch(self, s: str, p: str) -> bool:
dp = [[False] * (len(p)+1) for i in range(len(s)+1)]
dp[-1][-1] = True
for i in range(len(s), -1, -1):
for j in range(len(p)-1, -1, -1):
match = i < len(s) and p[j] in {s[i], '.'}
if j+1 < len(p) and p[j+1] == '*':
dp[i][j] = dp[i][j+2] or (match and dp[i+1][j])
else:
dp[i][j] = match and dp[i+1][j+1]
return dp[0][0]
============================================================
when matching s[0 .. i-1] with p[0 .. j-1], we care if p[j-1] is ''. if p[j-1] == '', which can represent two cases.
- One is cancelling p[j - 2], thus we care if s[0 .. i-1] matches p[0 .. j -3].
d[i][j] = d[i][j - 2]
- The other is representing one or more p[j - 2]. eg., "baaa" matches "ba*". Then the problem is reduced to check (1) if s[0 .. i-2] matches p[0 .. j-1]. (2) s[i-1] == p[j - 2].
d[i][j] = d[i - 1][j] && (p[j - 2] == s[i - 1] || p[j - 2] == '.')
another idea of considering '*':
case 1: "a*" matches "" => dp[i][j] = dp[i][j - 2]
case 2a (dp): "a*" matches at least one 'a' => "a*" = "a*a" = "a*" + 'a', where "a*" matches 0 or more 'a' (exactly definition of '*') and 'a' matches 'a' => p[i][j] = p[i - 1][j] && a = p[j - 2] matches s[i - 1]
case 2b (recursion): "a*" matches at least one 'a' => "a*" = "aa*" where 'a' matches 'a' and "a*" matches 0 or more 'a' (exactly definition of '*') => p[0] matches a[0] && isMatch(a.substr(1), p.substr(1))
bool isMatch(string s, string p) {
int m = s.length();
int n = p.length();
vector<vector<bool>> dp(m + 1, vector<bool>(n + 1)); //dp[i][j] = true iff s[0 .. i-1] matches p[0 .. j-1]
//empty p can only match empty s
dp[0][0] = true;
for (int i = 1; i <= m; i++) {
dp[i][0] = false;
}
dp[0][1] = false; //no way to match single-char p with empty s, even if p == "*"
for (int i = 2; i <= n; i++) {
dp[0][i] = dp[0][i - 2] && p[i - 1] == '*'; //'*' can cancel p[i - 2] in p
}
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
if (p[j - 1] != '*') {
dp[i][j] = dp[i - 1][j - 1] && (p[j - 1] == s[i - 1] || p[j - 1] == '.');
}
else {
if (j == 1) { //p == '*' cannot match any s
dp[i][j] = false;
}
else { //cancel p[j - 1] or represent at least one p[j - 1]
dp[i][j] = dp[i][j - 2] || (dp[i - 1][j] && (p[j - 2] == s[i - 1] || p[j - 2] == '.'));
}
}
}
}
return dp[m][n];
}
remove unnecessary assignment where dp[i][j] is already false from initialization
bool isMatch(string s, string p) {
int m = s.length(), n = p.length();
vector<vector<bool>> dp(m + 1, vector<bool>(n + 1));
dp[0][0] = true;
for (int i = 2; i <= n; i++) {
dp[0][i] = dp[0][i - 2] && p[i - 1] == '*';
}
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
if (p[j - 1] != '*') {
dp[i][j] = dp[i - 1][j - 1] && (p[j - 1] == s[i - 1] || p[j - 1] == '.');
} else if (j >= 2) {
dp[i][j] = dp[i][j - 2] || (dp[i - 1][j] && (p[j - 2] == s[i - 1] || p[j - 2] == '.'));
}
}
}
return dp[m][n];
}
optimize memory with rolling array. take care initial cases when using rolling array. specifically in this problem, initial cases are dp[pre][0]
(because we would use dp[pre][j - 1]
) and dp[cur][0]
(because we would use dp[cur][j - 2]
bool isMatch(string s, string p) {
int m = s.length(), n = p.length();
vector<vector<bool>> dp(2, vector<bool>(n + 1));
dp[0][0] = true;
for (int i = 2; i <= n; i++) {
dp[0][i] = dp[0][i - 2] && p[i - 1] == '*';
}
int pre = 0, cur = 1;
for (int i = 1; i <= m; i++) {
if (i > 1) {
dp[pre][0] = false;
}
dp[cur][0] = false;
for (int j = 1; j <= n; j++) {
if (p[j - 1] != '*') {
dp[cur][j] = dp[pre][j - 1] && (p[j - 1] == s[i - 1] || p[j - 1] == '.');
} else if (j >= 2) {
dp[cur][j] = dp[cur][j - 2] || (dp[pre][j] && (p[j - 2] == s[i - 1] || p[j - 2] == '.'));
}
}
swap(pre, cur);
}
return dp[pre][n];
}
recursion: t(m, n) = 1 + t(m, n-2) + t(m-1, n) => t(m, n-2) = 1 + t(m, n-4) + t(m-1, n-2) => t(m, n) > t(m-1, n) + t(m-1, n-2) > 2 * t(m-1, n-2) = 2 ^ (min(m, n/2))
bool isMatch(string s, string p) {
if (p.empty()) return s.empty();
//p.length() >= 1
if (p[1] == '*') { //p[1] = '\0' and p.substr(2) = "" when p.length() == 1
return isMatch(s, p.substr(2)) || (! s.empty() && (p[0] == s[0] || p[0] == '.') && isMatch(s.substr(1), p));
} else {
return ! s.empty() && (p[0] == s[0] || p[0] == '.') && isMatch(s.substr(1), p.substr(1));
}
}