C _Regex - RicoJia/notes GitHub Wiki
========================================================================
======================================================================== 0. Introduced in C++11
-
Test Program
#include <iostream> #include <regex> #include <vector> #include <string> using namespace std; int main() { regex r("abc"); //exact match vector<string> vec {"abv", "abc", "ABC"}; for(const string& str: vec){ bool match = regex_match(str, r); cout << (match ? "matched" : "not matched") << endl; } return 0; }
-
match anything but newline:
.*
-
exact matches
regex r("abc"); //exact match vector<string> vec {"abv", "abc", "ABC"}; // not match, match, not match
-
Case Insensitive, regex_constants::icase
regex r("abc", regex_constants::icase); //icase = insenstive casing vector<string> vec {"abv", "abc", "ABC", "aBC"}; //not match, match, match, match
-
Any char but '\n', . (exactly one char)
regex r("abc."); vector<string> vec {"abc\n", "abc", "abcd", "abccc"}; //not match, not match (there's gotta be a char), match, not matched
-
or |
regex r("ab|cd"); // ab or cd vector<string> vec { "ab", "cd", "abc"}; //match, match, not match
-
Be careful with spaces, they count!
regex r("ab "); // space must be counted insenstive vector<string> vec {"ab"}; // not match, cuz YOU NEED THAT SPACE!
-
Escape using
\\
regex r("ab\\["); // 3 chars that are not in [cd] vector<string> vec { "ab", "ab["}; //not match, match
-
operators dependent on number of preceeding chars
-
0 or 1 preceeding char, ?
regex r("abc?"); // ? is [0,1] preceeding char of ?. equivalent to "ab", "abc" vector<string> vec {"ab", "abc", "abd", "abcd"}; // match, match, not match, not match (just one char only)
-
any number of the preceeding char, *, could be zero
regex r("abc*"); // * is [0, inf] of the preceeding char. equivalent to "ab", "abc" vector<string> vec {"ab", "abc", "abcc", "abccc", "abcd", "abcdd"}; // match, match, match, match, not match (must be preceeding char), not match
-
\s*
means any number of white spaces -
.
is wild card for any character
-
-
At least 1 preceeding char, +
regex r("abc+"); // * is [1, inf] of the preceeding char. equivalent to "ab", "abc" vector<string> vec {"ab", "abc", "abcc", "abccc", "abcd", "abcdd"}; // not match, match, match, match, not match (must be preceeding char), not match
-
scope operators
-
A single char from []
regex r("ab[cd]"); // a single char from [] vector<string> vec {"ab", "abc", "abcc", "abccc", "abcd", "abcdd"}; // not match, match, not match, not match, not match, not match
-
A range of char
regex r("ab[a-z]+"); // [a-z] means anything in a-z vector<string> vec { "abcdfaasdfa"}; // match
-
number of chars using {}
-
Exactly some number of chars
regex r("ab[^cd]{3}"); // 3 chars that are not in [cd] vector<string> vec {"abcdc", "abc", "abd", "abeee"}; // not match, not match, not match, match
-
A range of chars
regex r("ab[^cd]{3,5}"); // 3-5 chars that are not in [cd] vector<string> vec { "abeeeeee", "abeee"}; //not match, match
-
more than some number of chars
regex r("ab[^cd]{3,}"); // 3+ chars that are not in [cd] vector<string> vec { "abeeeeee", "abeee"}; //match, match
-
Composite operators
-
[0, inf] number of chars from []
regex r("ab[cd]*"); // [0, inf] number of chars from [] vector<string> vec {"ab", "abc", "abcc", "abccc", "abcd", "abcdd"}; // match, match, match, match, match, match
-
[^ ... ] is anything but these chars
regex r("ab[^c]"); // [^] is anything but these char vector<string> vec {"ab", "abc", "abd"}; // not match, not match, match regex r("ab[^cd]"); // ^ is anything but these chars vector<string> vec {"ab", "abc", "abd"}; // not match, not match, not match
-
[^...]* is [0, inf] number of anything but these chars
regex r("ab[^cd]*"); // [^...]* is [0, inf] number of anything but these chars vector<string> vec {"ab", "abc", "abd", "abfg"}; // match, not match, not match, match
-
() is a sub_group, and
\\1
means to repeat the first sub_groupregex r("(ab)\\1"); // 3 chars that are not in [cd]. () is called capture group, which is the sequence matching "ab" vector<string> vec { "ab", "abab"}; //not match, match
- with an +
regex r("(abc)de+\\1"); // 3 chars that are not in [cd] vector<string> vec { "ab", "abpabd", "abcdeeabc"}; //not match, not match, match
- multiple sub_groups, with + in ()
regex r("(ab)c(de+)\\2\\1"); // 3 chars that are not in [cd] vector<string> vec { "ab", "abpabd", "abcdeedeeab"}; //not match, not match, match
- with an +
-
^means to be at the beginning of the string, when used alone
regex r("^abc."); // searching for an email, [[:w]] is a word char, @ is @, \\. is to escape and we get a pure . vector<string> vec { "abcd", "bbabcd"}; // match, not matched
-
Don't use this: Cpp doesn't support this. $ means abc should be at the end of line
regex r("abc$"); //Don't use this: Cpp doesn't support this. $ means abc should be at the end of line vector<string> vec { "abcd", "bbabc"}; //not match, not matched
-
Examples
-
tell if something is an email:
regex r("[[:w:]]+@[[:w:]]+\\.com"); // searching for an email, [[:w]] is a word char, @ is @, \\. is to escape and we get a pure . // word char: digit, number, underscore vector<string> vec { "[email protected]"}; // match
-
Tell if a string contains a match:
int main() { regex r("[[:w:]]+@[[:w:]]+\\.com"); // searching for an email, [[:w]] is a word char, @ is @, \\. is to escape and we get a pure . vector<string> vec { "[email protected]"}; // match for(const string& str: vec){ bool match = regex_search(str, r); cout << (match ? "matched" : "not matched") << endl; } return 0; }