import re
# Regex Cheat sheet : https://www.dataquest.io/blog/regex-cheatsheet/
# Regex python tester : https://pythex.org/
# re doc : https://docs.python.org/3/library/re.html
text = "i like train"
reg = r"[a-c]" #the group of char a to c
if re.match(reg, text): #Check if regex is correct
print(text)
else:
print("Not any match")
# You need to - (import re)
# ^ - Matches the beginning of the line
# $ - Matches the end of the line
# . - Matches any character
# s - Matches whitespace
# S - Matches any non-whitespace character
# * - Repeat a character zero or more times
# *? - Repeat a character zero or more times (non-greedy)
# + - Repeat a character one or more times
# +? - Repeat a character one or more times (non-greedy)
# [aeiou] - Matches a single character in the listed set
# [^XYZ] - Matches a single character not in the listed set
# [a-z0-9] - The set of characters can include a range
# ( - Indicates where string extraction is to start
# ) - Indicates where string extraction is to end
1. A fixed string -> abc123
2. Arbitrary repetition -> a*b ( "*" means that you can have an arbitrary
number (possibly 0) of the previous char
3. Repeat character at least once -> a+b # ab, aaaab
4. Repeat character at most once -> a?b # b, ab
5. Repeat a character a fixed number of timers -> a{5} # aaaaa
6. Repeat a pattern a fixed number of times -> (a*b){3} # baabab, ababaaaab
7. Repeat a character or pattern a variable number of times -> a{2,4} # aa, aaa, aaaa
8. Choice of several characters -> [ab]c # ac, bc
9. Arbitrary mixture of several characters -> [ab]*c # c, aac, abbac
10. Ranges of characters -> [A-H][a-z]* # Aasdfalsd, Hb, G
11. Characters OTHER than particular one -> [^AB] # C, D
12. Choice of several expressions -> Dr|Mr|Ms|Mrs # Dr, Mr, Mrs, Ms
13. Nesting expressions -> ([A-Z][a-z][0-9])* # A, AzSDFcvfg
14. Start of a line -> ^ab
15. End of a line -> ab$
#Type of pattern
1. Special characters -> [ # [
2. Any charactter 'except' newline -> . # a, *, -
3. Nongreedy evaluation -> <.*>? # <h1></h2 name = "foo">
4. Whitespace -> s
import re
# The string you want to find a pattern within
test_string = 'Hello greppers!'
# Creating a regular expression pattern
# This is a simple one which finds "Hello"
pattern = re.compile(r'Hello')
# This locates and returns all the occurences of the pattern
# within the test_string
match = pattern.finditer(test_string)
# Outputs all the ocurrences which were returned as
# as match objects
for match in matches:
print(match)
import re
# returns a match object if found else None
txt = "Hello world"
x = re.search(r"[a-zA-z]+", txt)
if x:
print("YES! We have a match!", x)
else:
print("No match")
# output YES! We have a match! <re.Match object; span=(0, 5), match='Hello'>
# returns a list of all matches found - regular express finds all vowels in this example
txt = "This is a test"
x = re.findall(r"[aeiou]", txt)
print(x)
# output ['i', 'i', 'a', 'e']
# returns a list of all matches found - regular expression find is or test in string case-insensitive
txt = "This iS a Test"
x = re.findall("(is|test)", txt, flags=re.IGNORECASE)
print(x)
# output ['is', 'iS', 'Test']
txt = "This is a silly string"
# splits a string into a list using regular expression
x = re.split(r"silly", txt)
print(x)
# output ['This is a ', ' string']
# replace concatenated tototo with to
txt = "We need tototo run "
x = re.sub(r"(to)+", "to", txt)
print(x)
# output We need to run
# Step-By-Step breakdown:
import re # We need this module
# First make a regex object containing your regex search pattern. Replace REGEX_GOES_HERE with your regex search. Use either of these:
regex_obj = re.compile(r'REGEX_GOES_HERE', flags=re.IGNORECASE) # Case-insensitive search:
regex_obj = re.compile(r'REGEX_GOES_HERE') # Case-sensitive search
# Define the string you want to search inside:
search_txt = "These are oranges and apples and pears"
# Combine the two to find your result/s:
regex_obj.findall(search_txt)
#And it wrapped in print:
print(regex_obj.findall(search_txt)) # Will return a LIST of all matches. Will return empty list on no matches.
1. A fixed string -> abc123
2. Arbitrary repetition -> a*b ( "*" means that you can have an arbitrary
number (possibly 0) of the previous char
3. Repeat character at least once -> a+b # ab, aaaab
4. Repeat character at most once -> a?b # b, ab
5. Repeat a character a fixed number of timers -> a{5} # aaaaa
6. Repeat a pattern a fixed number of times -> (a*b){3} # baabab, ababaaaab
7. Repeat a character or pattern a variable number of times -> a{2,4} # aa, aaa, aaaa
8. Choice of several characters -> [ab]c # ac, bc
9. Arbitrary mixture of several characters -> [ab]*c # c, aac, abbac
10. Ranges of characters -> [A-H][a-z]* # Aasdfalsd, Hb, G
11. Characters OTHER than particular one -> [^AB] # C, D
12. Choice of several expressions -> Dr|Mr|Ms|Mrs # Dr, Mr, Mrs, Ms
13. Nesting expressions -> ([A-Z][a-z][0-9])* # A, AzSDFcvfg
14. Start of a line -> ^ab
15. End of a line -> ab$
#Type of pattern
1. Special characters -> [ # [
2. Any charactter 'except' newline -> . # a, *, -
3. Nongreedy evaluation -> <.*>? # <h1></h2 name = "foo">
4. Whitespace -> s
# You need to - (import re)
# ^ - Matches the beginning of the line
# $ - Matches the end of the line
# . - Matches any character
# s - Matches whitespace
# S - Matches any non-whitespace character
# * - Repeat a character zero or more times
# *? - Repeat a character zero or more times (non-greedy)
# + - Repeat a character one or more times
# +? - Repeat a character one or more times (non-greedy)
# [aeiou] - Matches a single character in the listed set
# [^XYZ] - Matches a single character not in the listed set
# [a-z0-9] - The set of characters can include a range
# ( - Indicates where string extraction is to start
# ) - Indicates where string extraction is to end
import re
text = "test1, test2, test3"
regex = re.compile(r"test1")
# Returns range of first match
print(regex.match(text).span())
# Returns text with all matches replaces with other text
print(regex.sub("replace", text))
# Returns every match
print(regex.findall(text))
# OUT:
#
# (0, 5)
# replace, replace, replace
# ['test1', 'test2', 'test3']
re.search(pattern, string, flags=0)
# pattern: The first argument is the regular expression pattern we want to search inside the target string.
# string: The second argument is the variable pointing to the target string (In which we want to look for occurrences of the pattern).
# flags: Finally, the third argument is optional and it refers to regex flags by default no flags are applied.
'''
Regex (Regular Expression) are incredibly powerful,
and can do much more than regular text search.
'''
import re
# a. The dot Regex, how to know how to match an arbitrary character
# by using the dot regex.
text = '''A blockchain, originally block chain,
is a growing list of records, called blocks,
which are linked using cryptography.
'''
print(re.findall('b...k', text)) # Output: ['block', 'block', 'block']
# b. The asterisk Regex, match text that begins and ends with the character
# and an arbitrary number of characters. We also can use
# the asterisk operator in combination
print(re.findall('cr.*', text)) # Output: ['cryptography.']
print(re.findall('y.*y', text)) # Output: ['yptography']
# c. The Zero-or-one Regex / '?' chracter, to know how to match zero
# or one characters.
print(re.findall('blocks?', text)) # Output: ['block', 'block', 'blocks']
# Let's say you want to check for a phone number in a string
# Note: Remove indentation
import re
phone_num_regex = re.compile(r'ddd-ddd-dddd')
mobile_string = 'My number is 415-555-4242' # Not real number
any_phone_numbers = phone_num_regex.search(mobile_string)
print(any_phone_numbers)
The r in front of the string means it's a raw string (/n, /t, etc doesn't work)
In regex, if we use d, it will look for any digit in your string (0-9)
If we search for ddd-ddd-dddd, it will look for anywhere in the
string where there is a digit, followed by a digit, followed by a digit, followed
by a hyphen, ...
You can also use it in an if statement to check if there is a match or not
between a regex and a string with 're.match(regex, string)'
# A Python program to demonstrate working of re.match().
import re
# Lets use a regular expression to match a date string
# in the form of Month name followed by day number
regex = r"([a-zA-Z]+) (d+)"
match = re.search(regex, "I was born on June 24")
if match != None:
# We reach here when the expression "([a-zA-Z]+) (d+)"
# matches the date string.
# This will print [14, 21), since it matches at index 14
# and ends at 21.
print ("Match at index %s, %s" % (match.start(), match.end()))
# We us group() method to get all the matches and
# captured groups. The groups contain the matched values.
# In particular:
# match.group(0) always returns the fully matched string
# match.group(1) match.group(2), ... return the capture
# groups in order from left to right in the input string
# match.group() is equivalent to match.group(0)
# So this will print "June 24"
print ("Full match: %s" % (match.group(0)))
# So this will print "June"
print ("Month: %s" % (match.group(1)))
# So this will print "24"
print ("Day: %s" % (match.group(2)))
else:
print ("The regex pattern does not match.")
# Recursive Python3 program to find if a given pattern is
# present in a text
def exactMatch(text, pat, text_index, pat_index):
if text_index == len(text) and pat_index != len(pat):
return 0
# Else If last character of pattern reaches
if pat_index == len(pat):
return 1
if text[text_index] == pat[pat_index]:
return exactMatch(text, pat, text_index+1, pat_index+1)
return 0
# This function returns true if 'text' contain 'pat'
def contains(text, pat, text_index, pat_index):
# If last character of text reaches
if text_index == len(text):
return 0
# If current characters of pat and text match
if text[text_index] == pat[pat_index]:
if exactMatch(text, pat, text_index, pat_index):
return 1
else:
return contains(text, pat, text_index+1, pat_index)
# If current characters of pat and tex don't match
return contains(text , pat, text_index+1, pat_index)
# Driver program to test the above function
print(contains("geeksforgeeks", "geeks", 0, 0))
print(contains("geeksforgeeks", "geeksquiz", 0, 0))
print(contains("geeksquizgeeksquiz", "quiz", 0, 0))
# This code is contributed by ankush_953.
import re
s = 'GeeksforGeeks: A computer science portal for geeks'
match = re.search(r'portal', s)
print('Start Index:', match.start())
print('End Index:', match.end())