#!/usr/bin/python3 import ply.lex as lex import sys ''' keep track of how many open brackets have been encountered so far ''' brackets = 0 ''' keep current string in memory ''' qstring = '' dqstring = '' tokens = ( 'EOL', 'NUMBER', 'LASTINDEX', 'VAR', 'DOT', 'OPERATOR', 'COMPARISON', 'COMMA', 'OPENSQBRACKET', 'CLOSESQBRACKET', 'OPENBRACKET', 'CLOSEBRACKET', 'SEMICOLON', 'TRUE', 'FALSE', 'NOT', 'LOGICAL', 'ASSIGNMENT', 'COLON', 'QSTRING', 'DQSTRING', ) def t_COMMENT(t): r'\.\.+[ \t]*(//.*)?(\n|$)|//.*' pass def t_EOL(t): r'\n' global brackets if brackets == 0: t.state = 'EOL' t.value = t.lexer.lexmatch.group() return t t_NUMBER = r'-?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?' t_LASTINDEX = r'\$' t_VAR = r'[a-zA-Z_][a-zA-Z0-9_]*' t_DOT = r'\.' t_OPERATOR = r'[+\-*/^\\]' t_COMPARISON = r'<>|[<>]=|==|[<>]' t_COMMA = r',' def t_OPENSQBRACKET(t): r'\[' global brackets brackets += 1 t.state = 'OPENSQBRACKET' t.value = t.lexer.lexmatch.group() return t def t_CLOSESQBRACKET(t): r'\]' global brackets brackets -= 1 t.state = 'CLOSESQBRACKET' t.value = t.lexer.lexmatch.group() return t def t_OPENBRACKET(t): r'\(' global brackets brackets += 1 t.state = 'OPENBRACKET' t.value = t.lexer.lexmatch.group() return t def t_CLOSEBRACKET(t): r'\)' global brackets brackets -= 1 t.state = 'CLOSEBRACKET' t.value = t.lexer.lexmatch.group() return t t_SEMICOLON = r';' t_TRUE = r'%t' t_FALSE = r'%f' t_NOT = r'~' t_LOGICAL = r'[&|]' t_ASSIGNMENT = r'=' t_COLON = r':' t_ignore = ' \t' def t_error(t): print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) states = ( ('qstring', 'exclusive'), ('dqstring', 'exclusive'), ) def t_begin_qstring(t): r"'" global qstring t.lexer.begin('qstring') qstring = t.lexer.lexmatch.group() def t_begin_dqstring(t): r'"' global dqstring t.lexer.begin('dqstring') dqstring = t.lexer.lexmatch.group() def t_qstring_COMMENT(t): r'\.\.+[ \t]*(//.*)?\n' pass def t_dqstring_COMMENT(t): r'\.\.+[ \t]*(//.*)?\n' pass def t_qstring_end(t): r"'" global qstring t.lexer.begin('INITIAL') qstring += t.lexer.lexmatch.group() t.type = 'QSTRING' t.value = qstring return t def t_dqstring_end(t): r'"' global dqstring t.lexer.begin('INITIAL') dqstring += t.lexer.lexmatch.group() t.type = 'DQSTRING' t.value = dqstring return t def t_qstring_char(t): r"\\.|\.|[^'\\.]+" global qstring qstring += t.lexer.lexmatch.group() def t_dqstring_char(t): r'\\.|\.|[^"\\.]+' global dqstring dqstring += t.lexer.lexmatch.group() t_qstring_ignore = '' t_dqstring_ignore = '' def t_qstring_error(t): print("Illegal character '%s' in qstring" % t.value[0]) t.lexer.skip(1) def t_dqstring_error(t): print("Illegal character '%s' in dqstring" % t.value[0]) t.lexer.skip(1) lexer = lex.lex() if len(sys.argv) <= 1: print("Usage: %s filename" % sys.argv[0]) sys.exit(1) filename = sys.argv[1] data = '' with open(filename, 'r') as infile: for line in infile: data += line lexer.input(data) for tok in lexer: print(tok)