Shorthand character class:
- \d - Any numeric digit from 0 to 9.
- \D - Any character that is not a numeric digit from 0 to 9.
- \w - Any letter, numeric digit, or the underscore character. (Think of this as matching “word” characters.)
- \W - Any character that is not a letter, numeric digit, or the underscore character.
- \s - Any space, tab, or newline character. (Think of this as matching “space” characters.)
- \S - Any character that is not a space, tab, or newline.
. - Any Character Except New Line \d - Digit (0-9) \D - Not a Digit (0-9) \w - Word Character (a-z, A-Z, 0-9, _) \W - Not a Word Character \s - Whitespace (space, tab, newline) \S - Not Whitespace (space, tab, newline) \b - Word Boundary \B - Not a Word Boundary ^ - Beginning of a String $ - End of a String [] - Matches Characters in brackets [^ ] - Matches Characters NOT in brackets | - Either Or ( ) - Group Quantifiers: * - 0 or More + - 1 or More ? - 0 or One {3} - Exact Number {3,4} - Range of Numbers (Minimum, Maximum) #### Sample Regexs #### [a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+
[0-5] is the same as: (0|1|2|3|4|5)
Regexp example:
import re text_to_search = 'abc defg hij abcd efg hijk' pattern = re.compile(r'abc') matches = pattern.finditer(text_to_search) # print('Phone number found: ' + test.group()) for match in matches: print(match) # <re.Match object; span=(0, 3), match='abc'> # <re.Match object; span=(13, 16), match='abc'> urls = ''' https://www.google.com http://youtube.comHome Page''' pattern_url = re.compile(r'https?://(www\.)?(\w+)(\.\w+)') # optional 's' and 'www' subbed_urls = pattern_url.sub(r'\2\3', urls) # replaces matches with group 2 and 3 print(subbed_urls) # google.com youtube.com nasa.gov matches = pattern_url.finditer(urls) for match in matches: print(match.group(2)) # google # youtube # nasa matches = pattern_url.findall(urls) for match in matches: print(match) # ('www.', 'google', '.com') # ('', 'youtube', '.com') # ('www.', 'nasa', '.gov')
>>> import re >>> phoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d') >>> test = phoneNumRegex.search('Phone number is 333-555-7777.') >>> print('Phone number found: ' + test.group()) Phone number found: 333-555-7777
>>> import re >>> phoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)') >>> mo = phoneNumRegex.search('My number is 415-555-4242.') >>> mo.group(1) '415' >>> mo.group(2) '555-4242' >>> mo.group(0) '415-555-4242' >>> mo.group() '415-555-4242'
>>> import re >>> mo.groups() ('415', '555-4242') >>> areaCode, mainNumber = mo.groups() >>> print(areaCode) 415 >>> print(mainNumber) 555-4242
>>> import re >>> phoneNumRegex = re.compile(r'(\(\d\d\d\)) (\d\d\d-\d\d\d\d)') >>> mo = phoneNumRegex.search('My phone number is (415) 555-4242.') >>> mo.group(1) '(415)' >>> mo.group(2) '555-4242'
>>> import re >>> batRegex = re.compile(r'Bat(man|mobile|copter|bat)') >>> mo = batRegex.search('Batmobile lost a wheel') >>> mo.group() 'Batmobile' >>> mo.group(1) 'mobile'
>>> batRegex = re.compile(r'Bat(wo){0,1}man') >>> mo1 = batRegex.search('The Adventures of Batman') >>> mo1.group() 'Batman' >>> mo2 = batRegex.search('The Adventures of Batwoman') >>> mo2.group() 'Batwoman'
>>> batRegex = re.compile(r'Bat(wo){0,}man') >>> mo1 = batRegex.search('The Adventures of Batman') >>> mo1.group() 'Batman' >>> mo2 = batRegex.search('The Adventures of Batwoman') >>> mo2.group() 'Batwoman' >>> mo3 = batRegex.search('The Adventures of Batwowowowoman') >>> mo3.group() 'Batwowowowoman'
>>> haRegex = re.compile(r'(Yo){3}') >>> re1 = haRegex.search('YoYoYo') >>> re1.group() 'YoYoYo' >>> re2 = haRegex.search('Yo') >>> re2 == None True >>> haRegex = re.compile(r'(Yo){2,4}') # Same as: ((Yo)(Yo))|((Yo)(Yo)(Yo))|((Yo)(Yo)(Yo)(Yo)) >>> re1 = haRegex.search('YoYoYo') >>> re1.group() 'YoYoYo'
>>> phoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d') # has no groups >>> phoneNumRegex.findall('Cell: 415-555-9999 Work: 212-555-0000') ['415-555-9999', '212-555-0000']
>>> phoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d)-(\d\d\d\d)') # has groups >>> phoneNumRegex.findall('Cell: 444-555-9999 Work: 333-555-0000') [('444', '555', '1122'), ('333', '555', '0000')]
>>> fruRegex = re.compile(r'\d+\s\w+') >>> fruRegex.findall('5 bananas, 7 apples, 9 oranges') ['5 bananas', '7 apples', '9 oranges']
Custom character classes
>>> vowelRegex = re.compile(r'[aeiouAEIOU]') >>> vowelRegex.findall('ABC DE. abc de.') ['A', 'E', 'a', 'e']
String begins with custom characters
>>> beginsWithHello = re.compile(r'^Hello') >>> beginsWithHello.search('Hello world!') <_sre.SRE_Match object; span=(0, 5), match='Hello'> >>> beginsWithHello.search('He said hello.') == None True
String ends with custom characters
>>> endsWithNumber = re.compile(r'\d$') >>> endsWithNumber.search('Your number is 42') <_sre.SRE_Match object; span=(16, 17), match='2'> >>> endsWithNumber.search('Your number is forty two.') == None True
String begins and ends with custom characters
>>> wholeStringIsNum = re.compile(r'^\d+$') >>> wholeStringIsNum.search('1234567890') <_sre.SRE_Match object; span=(0, 10), match='1234567890'> >>> wholeStringIsNum.search('12345xyz67890') == None True >>> wholeStringIsNum.search('12 34567890') == None True
"Any character" wildcard
>>> atRegex = re.compile(r'.at') >>> atRegex.findall('The cat in the hat sat on the flat mat.') ['cat', 'hat', 'sat', 'lat', 'mat']
"Matching Everything" wildcard
>>> nameRegex = re.compile(r'First Name: (.{0,}) Last Name: (.{0,})') >>> mo = nameRegex.search('First Name: Jack Last Name: Fox') >>> mo.group(1) 'Jack' >>> mo.group(2) 'Fox'
Greedy and nongreedy matching
>>> nongreedyRegex = re.compile(r'<.*?>') >>> mo = nongreedyRegex.search('<To serve man> for dinner.>') >>> mo.group() '<To serve man>' >>> greedyRegex = re.compile(r'<.*>') >>> mo = greedyRegex.search('<To serve man> for dinner.>') >>> mo.group() '<To serve man> for dinner.>'
Matching Newlines with the Dot Character
>>> noNewlineRegex = re.compile('.*') >>> noNewlineRegex.search('Serve the public trust.\nProtect the innocent. \nUphold the law.').group() 'Serve the public trust.' >>> newlineRegex = re.compile('.*', re.DOTALL) >>> newlineRegex.search('Serve the public trust.\nProtect the innocent. \nUphold the law.').group() 'Serve the public trust.\nProtect the innocent.\nUphold the law.'
Strings Substitution with Regular Expression
>>> namesRegex = re.compile(r'Agent \w+') >>> namesRegex.sub('CENSORED', 'Agent Alice gave the secret documents to Agent Bob.') 'CENSORED gave the secret documents to CENSORED.'
Strings Substitution with Regular Expression - Extended
>>> agentNamesRegex = re.compile(r'Agent (\w)\w*') >>> agentNamesRegex.sub(r'\1****', 'Agent Alice told Agent Carol that Agent Eve knew Agent Bob was a double agent.') A**** told C**** that E**** knew B**** was a double agent.'