I have a python program that searches through a file for valid phone numbers according to a regex peattern. It then, if it finds a match, parses the number out and prints it on the screen. I want to modify it to make it recognize an extension if there is one. I added in a second pattern (patStringExten) but I am unsure how to make it parse out the extension. Any help with this would be greatly appreciated!
import sys
import re
DEF_A_CODE = "None"
def usage() :
print "Usage:"
print "\t" + sys.argv[0] + " [<file>]"
def searchFile( fileName, pattern ) :
fh = open( fileName, "r" )
for l in fh :
l = l.strip()
# Here's the actual search
match = pattern.search( l )
if match :
nr = match.groups()
# Note, from the pattern, that 0 may be null, but 1 and 2 must exist
if not nr[0] :
aCode = DEF_A_CODE
else :
aCode = nr[0]
print "area code: " + aCode + \
", exchange: " + nr[1] + ", trunk: " + nr[2]+ ", extension: " + nr[3]
else :
print "NO MATCH: " + l
fh.close()
def main() :
# stick filename
if len( sys.argv ) < 2 : # no file name
# assume telNrs.txt
fileName = "telNrs.txt"
else :
fileName = sys.argv[1]
# for legibility, Python supplies a 'verbose' pattern
# requires a special flag
#patString = '(\d{3})*[ .\-)]*(\d{3})[ .\-]*(\d{4})'
patString = r'''
# don't match beginning of string (takes care of 1-)
(\d{3})? # area code (3 digits) (optional)
[ .\-)]* # optional separator (any # of space, dash, or dot,
# or closing ')' )
(\d{3}) # exchange, 3 digits
[ .\-]* # optional separator (any # of space, dash, or dot)
(\d{4}) # number, 4 digits
'''
patStringExten = r'''
# don't match beginning of string (takes care of 1-)
(\d{3})? # area code (3 digits) (optional)
[ .\-)]* # optional separator (any # of space, dash, or dot,
# or closing ')' )
(\d{3}) # exchange, 3 digits
[ .\-]* # optional separator (any # of space, dash, or dot)
(\d{4}) # number, 4 digits
[ .\-x]*
[0-9]{1,4}
'''
# Here is what the pattern would look like as a regular pattern:
#patString = r'(\d{3})\D*(\d{3})\D*(\d{4})'
# Instead of creating a temporary object each time, we will compile this
# regexp once, and store this object
pattern = re.compile( patString, re.VERBOSE )
searchFile( fileName, pattern )
main()

searchFilewithpatStringExteninstead ofpatString? How to call it twice, once with each? How to merge the two into a single pattern that accepts either version? How to break the matches into groups that you can pull out by name or number?