Python regular expression

Shorthand character class:

  • \d - Any numeric digit from 0 to 9.
  • \D - Any character that is not a numeric digit from 0 to 9.
  • \w - Any letter, numeric digit, or the underscore character. (Think of this as matching “word” characters.)
  • \W - Any character that is not a letter, numeric digit, or the underscore character.
  • \s - Any space, tab, or newline character. (Think of this as matching “space” characters.)
  • \S - Any character that is not a space, tab, or newline.

[0-5] is the same as: (0|1|2|3|4|5)

>>> import re
>>> phoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
>>> test = phoneNumRegex.search('Phone number is 333-555-7777.')
>>> print('Phone number found: ' + test.group())
Phone number found: 333-555-7777
>>> import re
>>> phoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
>>> mo = phoneNumRegex.search('My number is 415-555-4242.')
>>> mo.group(1)
'415'
>>> mo.group(2)
'555-4242'
>>> mo.group(0)
'415-555-4242'
>>> mo.group()
'415-555-4242'
>>> import re
>>> mo.groups()
('415', '555-4242')
>>> areaCode, mainNumber = mo.groups()
>>> print(areaCode)
415
>>> print(mainNumber)
555-4242
>>> import re
>>> phoneNumRegex = re.compile(r'(\(\d\d\d\)) (\d\d\d-\d\d\d\d)')
>>> mo = phoneNumRegex.search('My phone number is (415) 555-4242.')
>>> mo.group(1)
'(415)'
>>> mo.group(2)
'555-4242'
>>> import re
>>> batRegex = re.compile(r'Bat(man|mobile|copter|bat)')
>>> mo = batRegex.search('Batmobile lost a wheel')
>>> mo.group()
'Batmobile'
>>> mo.group(1)
'mobile'
>>> batRegex = re.compile(r'Bat(wo){0,1}man')
>>> mo1 = batRegex.search('The Adventures of Batman')
>>> mo1.group()
'Batman'
>>> mo2 = batRegex.search('The Adventures of Batwoman')
>>> mo2.group()
'Batwoman'
>>> batRegex = re.compile(r'Bat(wo){0,}man')
>>> mo1 = batRegex.search('The Adventures of Batman')
>>> mo1.group()
'Batman'
>>> mo2 = batRegex.search('The Adventures of Batwoman')
>>> mo2.group()
'Batwoman'
>>> mo3 = batRegex.search('The Adventures of Batwowowowoman')
>>> mo3.group()
'Batwowowowoman'
>>> haRegex = re.compile(r'(Yo){3}')
>>> re1 = haRegex.search('YoYoYo')
>>> re1.group()
'YoYoYo'
>>> re2 = haRegex.search('Yo')
>>> re2 == None
True
 
>>> haRegex = re.compile(r'(Yo){2,4}')  # Same as: ((Yo)(Yo))|((Yo)(Yo)(Yo))|((Yo)(Yo)(Yo)(Yo))
>>> re1 = haRegex.search('YoYoYo')
>>> re1.group()
'YoYoYo'
>>> phoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d') # has no groups
>>> phoneNumRegex.findall('Cell: 415-555-9999 Work: 212-555-0000')
['415-555-9999', '212-555-0000']
>>> phoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d)-(\d\d\d\d)') # has groups
>>> phoneNumRegex.findall('Cell: 444-555-9999 Work: 333-555-0000')
[('444', '555', '1122'), ('333', '555', '0000')]
>>> fruRegex = re.compile(r'\d+\s\w+')
>>> fruRegex.findall('5 bananas, 7 apples, 9 oranges')
['5 bananas', '7 apples', '9 oranges']

Custom character classes

>>> vowelRegex = re.compile(r'[aeiouAEIOU]')
>>> vowelRegex.findall('ABC DE. abc de.')
['A', 'E', 'a', 'e']

String begins with custom characters

>>> beginsWithHello = re.compile(r'^Hello')
>>> beginsWithHello.search('Hello world!')
<_sre.SRE_Match object; span=(0, 5), match='Hello'>
>>> beginsWithHello.search('He said hello.') == None
True

String ends with custom characters

>>> endsWithNumber = re.compile(r'\d$')
>>> endsWithNumber.search('Your number is 42')
<_sre.SRE_Match object; span=(16, 17), match='2'>
>>> endsWithNumber.search('Your number is forty two.') == None
True

String begins and ends with custom characters

>>> wholeStringIsNum = re.compile(r'^\d+$')
>>> wholeStringIsNum.search('1234567890')
<_sre.SRE_Match object; span=(0, 10), match='1234567890'>
>>> wholeStringIsNum.search('12345xyz67890') == None
True
>>> wholeStringIsNum.search('12 34567890') == None
True

"Any character" wildcard

>>> atRegex = re.compile(r'.at')
>>> atRegex.findall('The cat in the hat sat on the flat mat.')
['cat', 'hat', 'sat', 'lat', 'mat']

"Matching Everything" wildcard

>>> nameRegex = re.compile(r'First Name: (.{0,}) Last Name: (.{0,})')
>>> mo = nameRegex.search('First Name: Jack Last Name: Fox')
>>> mo.group(1)
'Jack'
>>> mo.group(2)
'Fox'

Greedy and nongreedy matching

>>> nongreedyRegex = re.compile(r'<.*?>')
>>> mo = nongreedyRegex.search('<To serve man> for dinner.>')
>>> mo.group()
'<To serve man>'
>>> greedyRegex = re.compile(r'<.*>')
>>> mo = greedyRegex.search('<To serve man> for dinner.>')
>>> mo.group()
'<To serve man> for dinner.>'

Matching Newlines with the Dot Character

>>> noNewlineRegex = re.compile('.*')
>>> noNewlineRegex.search('Serve the public trust.\nProtect the innocent.
\nUphold the law.').group()
'Serve the public trust.'
>>> newlineRegex = re.compile('.*', re.DOTALL)
>>> newlineRegex.search('Serve the public trust.\nProtect the innocent.
\nUphold the law.').group()
'Serve the public trust.\nProtect the innocent.\nUphold the law.'

Strings Substitution with Regular Expression

>>> namesRegex = re.compile(r'Agent \w+')
>>> namesRegex.sub('CENSORED', 'Agent Alice gave the secret documents to Agent Bob.')
'CENSORED gave the secret documents to CENSORED.'

Strings Substitution with Regular Expression - Extended

>>> agentNamesRegex = re.compile(r'Agent (\w)\w*')
>>> agentNamesRegex.sub(r'\1****', 'Agent Alice told Agent Carol that Agent
Eve knew Agent Bob was a double agent.')
A**** told C**** that E**** knew B**** was a double agent.'

Leave a Reply

Your email address will not be published. Required fields are marked *