C _Regex - RicoJia/notes GitHub Wiki

========================================================================

Regex

======================================================================== 0. Introduced in C++11

  1. Test Program

    #include <iostream>
    #include <regex>
    #include <vector>
    #include <string>
    using namespace std;
    
    int main()
    {
      regex r("abc");         //exact match
      vector<string> vec {"abv", "abc", "ABC"}; 
      for(const string& str: vec){
          bool match = regex_match(str, r); 
          cout << (match ? "matched" : "not matched") << endl;  
      }
      return 0;
    }
  2. match anything but newline: .*

  3. exact matches

    regex r("abc");         //exact match
    vector<string> vec {"abv", "abc", "ABC"}; // not match, match, not match
  4. Case Insensitive, regex_constants::icase

    regex r("abc", regex_constants::icase);         //icase = insenstive casing
    vector<string> vec {"abv", "abc", "ABC", "aBC"};    //not match, match, match, match  
  5. Any char but '\n', . (exactly one char)

    regex r("abc.");         
    vector<string> vec {"abc\n", "abc", "abcd", "abccc"};    //not match, not match (there's gotta be a char), match, not matched
  6. or |

    regex r("ab|cd");         // ab or cd
    vector<string> vec { "ab", "cd", "abc"};    //match, match, not match
  7. Be careful with spaces, they count!

    regex r("ab ");         // space must be counted insenstive
    vector<string> vec {"ab"};    // not match, cuz YOU NEED THAT SPACE!
  8. Escape using \\

    regex r("ab\\[");         // 3 chars that are not in [cd]
    vector<string> vec { "ab", "ab["};    //not match, match
  9. operators dependent on number of preceeding chars

  10. 0 or 1 preceeding char, ?

    regex r("abc?");         // ? is [0,1] preceeding char of ?. equivalent to "ab", "abc"
    vector<string> vec {"ab", "abc", "abd", "abcd"};    // match, match, not match, not match (just one char only)
  11. any number of the preceeding char, *, could be zero

    regex r("abc*");         // * is [0, inf] of the preceeding char. equivalent to "ab", "abc"
    vector<string> vec {"ab", "abc", "abcc", "abccc", "abcd", "abcdd"};    // match, match, match, match, not match (must be preceeding char), not match
    • \s* means any number of white spaces
    • . is wild card for any character
  12. At least 1 preceeding char, +

    regex r("abc+");         // * is [1, inf] of the preceeding char. equivalent to "ab", "abc"
    vector<string> vec {"ab", "abc", "abcc", "abccc", "abcd", "abcdd"};    // not match, match, match, match, not match (must be preceeding char), not match
  13. scope operators

  14. A single char from []

    regex r("ab[cd]");         // a single char from []
    vector<string> vec {"ab", "abc", "abcc", "abccc", "abcd", "abcdd"};    // not match, match, not match, not match, not match, not match
  15. A range of char

    regex r("ab[a-z]+");         // [a-z] means anything in a-z
    vector<string> vec { "abcdfaasdfa"};    // match
  16. number of chars using {}

  17. Exactly some number of chars

    regex r("ab[^cd]{3}");         // 3 chars that are not in [cd] 
    vector<string> vec {"abcdc", "abc", "abd", "abeee"};    // not match, not match, not match, match
  18. A range of chars

    regex r("ab[^cd]{3,5}");         // 3-5 chars that are not in [cd] 
    vector<string> vec { "abeeeeee", "abeee"};    //not match, match
  19. more than some number of chars

    regex r("ab[^cd]{3,}");         // 3+ chars that are not in [cd] 
    vector<string> vec { "abeeeeee", "abeee"};    //match, match
  20. Composite operators

  21. [0, inf] number of chars from []

    regex r("ab[cd]*");         // [0, inf] number of chars from []
    vector<string> vec {"ab", "abc", "abcc", "abccc", "abcd", "abcdd"};    // match, match, match, match, match, match
  22. [^ ... ] is anything but these chars

    regex r("ab[^c]");         // [^] is anything but these char
    vector<string> vec {"ab", "abc", "abd"};    // not match, not match, match
    regex r("ab[^cd]");         // ^ is anything but these chars
    vector<string> vec {"ab", "abc", "abd"};    // not match, not match, not match
  23. [^...]* is [0, inf] number of anything but these chars

    regex r("ab[^cd]*");         // [^...]* is [0, inf] number of anything but these chars
    vector<string> vec {"ab", "abc", "abd", "abfg"};    // match, not match, not match, match
  24. () is a sub_group, and \\1 means to repeat the first sub_group

    regex r("(ab)\\1");         // 3 chars that are not in [cd]. () is called capture group, which is the sequence matching "ab"
    vector<string> vec { "ab", "abab"};    //not match, match
    • with an +
      regex r("(abc)de+\\1");         // 3 chars that are not in [cd] 
      vector<string> vec { "ab", "abpabd", "abcdeeabc"};    //not match, not match, match
    • multiple sub_groups, with + in ()
      regex r("(ab)c(de+)\\2\\1");         // 3 chars that are not in [cd] 
      vector<string> vec { "ab", "abpabd", "abcdeedeeab"};    //not match, not match, match
  25. ^means to be at the beginning of the string, when used alone

    regex r("^abc.");         // searching for an email, [[:w]] is a word char, @ is @, \\. is to escape and we get a pure . 
    vector<string> vec { "abcd", "bbabcd"};    // match, not matched 
  26. Don't use this: Cpp doesn't support this. $ means abc should be at the end of line

    regex r("abc$");         //Don't use this: Cpp doesn't support this. $ means abc should be at the end of line 
    vector<string> vec { "abcd", "bbabc"};    //not match, not matched
  27. Examples

  28. tell if something is an email:

    regex r("[[:w:]]+@[[:w:]]+\\.com");         // searching for an email, [[:w]] is a word char, @ is @, \\. is to escape and we get a pure . 
    // word char: digit, number, underscore
    vector<string> vec { "[email protected]"};    // match
  29. Tell if a string contains a match:

    int main()
    {
        regex r("[[:w:]]+@[[:w:]]+\\.com");         // searching for an email, [[:w]] is a word char, @ is @, \\. is to escape and we get a pure .
        vector<string> vec { "[email protected]"};    // match
        for(const string& str: vec){
            bool match = regex_search(str, r);
            cout << (match ? "matched" : "not matched") << endl;
        }
        return 0;
    }
⚠️ **GitHub.com Fallback** ⚠️