I'm trying to parse a user input that each word/name/number gets seperated by whitespace (except for strings which are defined by double quotes) and gets pushed into a list. The list gets printed along the way. I previously made a version of this code but this time I want to used Tokens to make things cleaner. Here's what I have so far but it's not printing anything.
#!/util/bin/python
import re
def main ():
for i in tokenizer('abcd xvc 23432 "exampe" 366'):
print (i);
tokens = (
('STRING', re.compile('"[^"]+"')), # longest match
('NAME', re.compile('[a-zA-Z_]+')),
('SPACE', re.compile('\s+')),
('NUMBER', re.compile('\d+')),
)
def tokenizer(s):
i = 0
lexeme = []
while i < len(s):
match = False
for token, regex in tokens:
result = regex.match(s, i)
if result:
lexeme.append((token, result.group(0)))
i = result.end()
match = True
break
if not match:
raise Exception('lexical error at {0}'.format(i))
return lexeme
main()