import re


dir(re)

['A',
 'ASCII',
 'DEBUG',
 'DOTALL',
 'I',
 'IGNORECASE',
 'L',
 'LOCALE',
 'M',
 'MULTILINE',
 'Match',
 'Pattern',
 'RegexFlag',
 'S',
 'Scanner',
 'T',
 'TEMPLATE',
 'U',
 'UNICODE',
 'VERBOSE',
 'X',
 '_MAXCACHE',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '__version__',
 '_cache',
 '_compile',
 '_compile_repl',
 '_expand',
 '_locale',
 '_pickle',
 '_special_chars_map',
 '_subx',
 'compile',
 'copyreg',
 'enum',
 'error',
 'escape',
 'findall',
 'finditer',
 'fullmatch',
 'functools',
 'match',
 'purge',
 'search',
 'split',
 'sre_compile',
 'sre_parse',
 'sub',
 'subn',
 'template']


pat = 'xxx'


r = re.search(pat, ' x ')


r ### there is no match


r2 = re.search(pat, 'xxxyyyxxx')


r2  ### there is a match!

<re.Match object; span=(0, 3), match='xxx'>


re.search('ab.','abxddd')

<re.Match object; span=(0, 3), match='abx'>


re.search('ab..', 'abc')  ## no match


re.search('\w\w\w','ab345')

<re.Match object; span=(0, 3), match='ab3'>


re.search('\w\w\w','ab dc')  ## no match


re.search('\w\w\W\w','ab dc')

<re.Match object; span=(0, 4), match='ab d'>


re.search(r'\w\w\b\w\w','ab dc')


re.search(r'\b\w\w\b','ab dc')

<re.Match object; span=(0, 2), match='ab'>


re.search(r'\bfoo\b', 'foo')

<re.Match object; span=(0, 3), match='foo'>


re.search(r'\bfoo\b', 'foo.')

<re.Match object; span=(0, 3), match='foo'>


re.search(r'\bfoo\b', '(foo)')

<re.Match object; span=(1, 4), match='foo'>


re.search(r'\bfoo\b', 'bar foo baz')

<re.Match object; span=(4, 7), match='foo'>


re.search(r'\bfoo\b', 'foobar')


re.search(r'\bfoo\b', 'foo3')


re.search('\w\w\s\w\w','ab dc')

<re.Match object; span=(0, 5), match='ab dc'>


re.search('\S\S\s\S\S', 'ab dc')

<re.Match object; span=(0, 5), match='ab dc'>


re.search('\d\d\d','12345')

<re.Match object; span=(0, 3), match='123'>


re.search('\d\d\d\D\d\d\d','123 456')

<re.Match object; span=(0, 7), match='123 456'>


re.search('\d\d\d','x123456')

<re.Match object; span=(1, 4), match='123'>


re.search('^\d\d\d','x123456')  # no match


re.search('^\d\d\d','123456')

<re.Match object; span=(0, 3), match='123'>


re.search('\d\d\d$','123456')

<re.Match object; span=(3, 6), match='456'>


re.search('\d\d\d$','123456 ')  ## no match


re.search('\d\d\d$','123456 '.strip())

<re.Match object; span=(3, 6), match='456'>


re.search('ab\.','abc') ## no match


re.search('ab\.', 'ab...')

<re.Match object; span=(0, 3), match='ab.'>


re.search('(cat|dog)','my cat')

<re.Match object; span=(3, 6), match='cat'>


re.search('(cat|dog)', 'your dog')

<re.Match object; span=(5, 8), match='dog'>


re.search('^(a|e|i|o|u)123', 'e123')

<re.Match object; span=(0, 4), match='e123'>


re.search('^[aeiou]123', 'o123456')

<re.Match object; span=(0, 4), match='o123'>


re.search('^[^aeiou]123','x123456')

<re.Match object; span=(0, 4), match='x123'>


re.search('^[a-z]123','x123456')

<re.Match object; span=(0, 4), match='x123'>


re.search('^[^a-z]123','X123456')

<re.Match object; span=(0, 4), match='X123'>


re.match('pi+g', 'piiiig')  # one or more i's, as many as possible

<re.Match object; span=(0, 6), match='piiiig'>


match = re.search(r'i+', 'piigiiii')


match

<re.Match object; span=(1, 3), match='ii'>


re.search('\d\s*\d\s*\d', 'xx1 2 3xx')

<re.Match object; span=(2, 7), match='1 2 3'>


re.search(r'\d\s*\d\s*\d', 'xx12 3xx')

<re.Match object; span=(2, 6), match='12 3'>


re.search(r'\d\s*\d\s*\d', 'xx12        3xx')

<re.Match object; span=(2, 13), match='12        3'>


re.search(r'^b\w+', 'foobar')


re.search(r'b\w+', 'foobar')

<re.Match object; span=(3, 6), match='bar'>


re.match('^[aeiou]+$','aaaaeee')

<re.Match object; span=(0, 7), match='aaaaeee'>


re.match('^[aeiou]+$','aaaaxxxeee')


def retest(str = 'an example word:cat!!'):
       pat = 'word:\w\w\w'
       match = re.search(pat, str)
       # If-statement after search() tests if it succeeded
       if match:
              print ('found: {}'.format( match.group()))
       else:
              print ('Did not find {} in {}'.format(pat, str))


retest()

found: word:cat


retest('hello world')

Did not find word:\w\w\w in hello world


retest('this is word:123456')

found: word:123


patterns = ['aaa',         # contains aaa
            'abc',         # contains abc
            '...',         # contains three characters
            '^...$',       # starts with three characters
            '\.\.\.',         # contains three periods
            'abd',         # contains abd
            '^[aeiou]*$',  # contains only vowels
            '^[^aeiou]*$', # contains only NON vowels
            '\w\W\w',      # two word characters separated by a non-word char
            '\w\w\w',      # three word characters 
            '^\d+$',       # contains only decimal digits
            '^[0-7]+$',    # contains only octal digits
            '^[0-9A-Fa-f]+$', # contains only hexadecimal digits
            '^[a-z]*$',    # contains only lower case letters
            '^\s+',        # starts with a whitespace char
            '^\d\s?',      # starts with a digit followed by zero or one space
            '\w+@\w+',     # match email address
            '(b)*(a)*(c)',
            '^b*a*c$',
            '^(b|c)*(a|b)*$',
            '^bb*(ab|ba)*|(bbc|cbc)*$',
            '^(ab|ba|cb|bc|ca|ac)*$',
            '(bc|bcc)(bac|cba)(cba|aa)',
            '\AThe',       # \A is beginning of string
]


def retest2(patlist = patterns):
   while (True):
       str = input("\nEnter string: ")
       if str == "quit": break
       for pat in patlist:
           match = re.search(pat, str)
           # match = re.match(pat, str)
           if match:
              print ('Matched: {} with: {}'.format(pat, match.group(0)))
           else:
              print ('Did not find {} in {}'.format(pat, str))


retest2()

Enter string: abc
Did not find aaa in abc
Matched: abc with: abc
Matched: ... with: abc
Matched: ^...$ with: abc
Did not find \.\.\. in abc
Did not find abd in abc
Did not find ^[aeiou]*$ in abc
Did not find ^[^aeiou]*$ in abc
Did not find \w\W\w in abc
Matched: \w\w\w with: abc
Did not find ^\d+$ in abc
Did not find ^[0-7]+$ in abc
Matched: ^[0-9A-Fa-f]+$ with: abc
Matched: ^[a-z]*$ with: abc
Did not find ^\s+ in abc
Did not find ^\d\s? in abc
Did not find \w+@\w+ in abc
Matched: (b)*(a)*(c) with: bc
Did not find ^b*a*c$ in abc
Did not find ^(b|c)*(a|b)*$ in abc
Matched: ^bb*(ab|ba)*|(bbc|cbc)*$ with: 
Did not find ^(ab|ba|cb|bc|ca|ac)*$ in abc
Did not find (bc|bcc)(bac|cba)(cba|aa) in abc
Did not find \AThe in abc

Enter string: joe@yale
Did not find aaa in joe@yale
Did not find abc in joe@yale
Matched: ... with: joe
Did not find ^...$ in joe@yale
Did not find \.\.\. in joe@yale
Did not find abd in joe@yale
Did not find ^[aeiou]*$ in joe@yale
Did not find ^[^aeiou]*$ in joe@yale
Matched: \w\W\w with: e@y
Matched: \w\w\w with: joe
Did not find ^\d+$ in joe@yale
Did not find ^[0-7]+$ in joe@yale
Did not find ^[0-9A-Fa-f]+$ in joe@yale
Did not find ^[a-z]*$ in joe@yale
Did not find ^\s+ in joe@yale
Did not find ^\d\s? in joe@yale
Matched: \w+@\w+ with: joe@yale
Did not find (b)*(a)*(c) in joe@yale
Did not find ^b*a*c$ in joe@yale
Did not find ^(b|c)*(a|b)*$ in joe@yale
Matched: ^bb*(ab|ba)*|(bbc|cbc)*$ with: 
Did not find ^(ab|ba|cb|bc|ca|ac)*$ in joe@yale
Did not find (bc|bcc)(bac|cba)(cba|aa) in joe@yale
Did not find \AThe in joe@yale


def retest3(patlist = ['(\w+)@(\w+)', '^(\d\d\d).(\d\d\d)']):
   while (True):
       str = input("\nEnter string: ")
       if str == "quit": break
       for pat in patlist:
           match = re.search(pat, str)
           if match:
              print ('Matched: {} group 1: {} group 2: {}'.format(pat, match.group(1), match.group(2)))
           else:
              print ('Did not find {} in {}'.format(pat, str))


retest3()


def retest4(patlist = patterns):
   while (True):
       str = input("\nEnter string: ")
       if str == "quit": break
       for pat in patlist:
           match = re.findall(pat, str)
           if match:
              print ('Total matches for {}: {}'.format(pat, len(match)))
           else:
              print ('Did not find {} in {}'.format(pat, str))


retest4()

Enter string: aaaaaa
Total matches for aaa: 2
Did not find abc in aaaaaa
Total matches for ...: 2
Did not find ^...$ in aaaaaa
Did not find \.\.\. in aaaaaa
Did not find abd in aaaaaa
Total matches for ^[aeiou]*$: 1
Did not find ^[^aeiou]*$ in aaaaaa
Did not find \w\W\w in aaaaaa
Total matches for \w\w\w: 2
Did not find ^\d+$ in aaaaaa
Did not find ^[0-7]+$ in aaaaaa
Total matches for ^[0-9A-Fa-f]+$: 1
Total matches for ^[a-z]*$: 1
Did not find ^\s+ in aaaaaa
Did not find ^\d\s? in aaaaaa
Did not find \w+@\w+ in aaaaaa
Did not find (b)*(a)*(c) in aaaaaa
Did not find ^b*a*c$ in aaaaaa
Total matches for ^(b|c)*(a|b)*$: 1
Total matches for ^bb*(ab|ba)*|(bbc|cbc)*$: 1
Did not find ^(ab|ba|cb|bc|ca|ac)*$ in aaaaaa
Did not find (bc|bcc)(bac|cba)(cba|aa) in aaaaaa
Did not find \AThe in aaaaaa

CS 200: Regular Expressions in Python¶

Examples¶

Repetition¶

Leftmost & Largest¶

Examples¶

Testing regular expressions¶

Group Matching

Findall¶