diff options
author | Sunil Shetye | 2018-06-07 17:48:41 +0530 |
---|---|---|
committer | Sunil Shetye | 2018-06-07 17:48:41 +0530 |
commit | f97e60278c61e42308c32047109fcb983ded8dcd (patch) | |
tree | 7b97bd5fc96c763f072f6cf4fda8d6342dfe065c /sci2jslex.py | |
parent | b44dbfbeab7778e4c5bf1a91910192b3bbbcdad1 (diff) | |
download | sci2js-f97e60278c61e42308c32047109fcb983ded8dcd.tar.gz sci2js-f97e60278c61e42308c32047109fcb983ded8dcd.tar.bz2 sci2js-f97e60278c61e42308c32047109fcb983ded8dcd.zip |
Initial commit of lexer for sci files
Diffstat (limited to 'sci2jslex.py')
-rwxr-xr-x | sci2jslex.py | 183 |
1 files changed, 183 insertions, 0 deletions
diff --git a/sci2jslex.py b/sci2jslex.py new file mode 100755 index 00000000..054e62a1 --- /dev/null +++ b/sci2jslex.py @@ -0,0 +1,183 @@ +#!/usr/bin/python3 + +import ply.lex as lex +import sys + +''' keep track of how many open brackets have been encountered so far ''' +brackets = 0 + +''' keep current string in memory ''' +qstring = '' +dqstring = '' + +tokens = ( + 'EOL', + 'NUMBER', + 'VAR', + 'DOT', + 'OPERATOR', + 'NOTEQUALTO', + 'COMPARISON', + 'COMMA', + 'OPENSQBRACKET', + 'CLOSESQBRACKET', + 'OPENBRACKET', + 'CLOSEBRACKET', + 'SEMICOLON', + 'TRUE', + 'FALSE', + 'NOT', + 'AND', + 'ASSIGNMENT', + 'COLON', + 'QSTRING', + 'DQSTRING', +) + +def t_COMMENT(t): + r'\.\.+[ \t]*(//.*)?(\n|$)|//.*' + pass + +def t_EOL(t): + r'\n' + global brackets + if brackets == 0: + t.state = 'EOL' + t.value = t.lexer.lexmatch.group() + return t + +t_NUMBER = r'-?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?' +t_VAR = r'[a-zA-Z][a-zA-Z0-9_]*' +t_DOT = r'\.' +t_OPERATOR = r'[+\-*/]' +t_NOTEQUALTO = r'<>' +t_COMPARISON = r'[<>]=?|==' +t_COMMA = r',' + +def t_OPENSQBRACKET(t): + r'\[' + global brackets + brackets += 1 + t.state = 'OPENSQBRACKET' + t.value = t.lexer.lexmatch.group() + return t + +def t_CLOSESQBRACKET(t): + r'\]' + global brackets + brackets -= 1 + t.state = 'CLOSESQBRACKET' + t.value = t.lexer.lexmatch.group() + return t + +def t_OPENBRACKET(t): + r'\(' + global brackets + brackets += 1 + t.state = 'OPENBRACKET' + t.value = t.lexer.lexmatch.group() + return t + +def t_CLOSEBRACKET(t): + r'\)' + global brackets + brackets -= 1 + t.state = 'CLOSEBRACKET' + t.value = t.lexer.lexmatch.group() + return t + +t_SEMICOLON = r';' +t_TRUE = r'%t' +t_FALSE = r'%f' +t_NOT = r'~' +t_AND = r'&' +t_ASSIGNMENT = r'=' +t_COLON = r':' + +t_ignore = ' \t' + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +states = ( + ('qstring', 'exclusive'), + ('dqstring', 'exclusive'), +) + +def t_begin_qstring(t): + r"'" + global qstring + t.lexer.begin('qstring') + qstring = t.lexer.lexmatch.group() + +def t_begin_dqstring(t): + r'"' + global dqstring + t.lexer.begin('dqstring') + dqstring = t.lexer.lexmatch.group() + +def t_qstring_COMMENT(t): + r'\.\.+[ \t]*(//.*)?\n' + pass + +def t_dqstring_COMMENT(t): + r'\.\.+[ \t]*(//.*)?\n' + pass + +def t_qstring_end(t): + r"'" + global qstring + t.lexer.begin('INITIAL') + qstring += t.lexer.lexmatch.group() + t.type = 'QSTRING' + t.value = qstring + return t + +def t_dqstring_end(t): + r'"' + global dqstring + t.lexer.begin('INITIAL') + dqstring += t.lexer.lexmatch.group() + t.type = 'DQSTRING' + t.value = dqstring + return t + +def t_qstring_char(t): + r"\\.|\.|[^'\\.]+" + global qstring + qstring += t.lexer.lexmatch.group() + +def t_dqstring_char(t): + r'\\.|\.|[^"\\.]+' + global dqstring + dqstring += t.lexer.lexmatch.group() + +t_qstring_ignore = '' + +t_dqstring_ignore = '' + +def t_qstring_error(t): + print("Illegal character '%s' in qstring" % t.value[0]) + t.lexer.skip(1) + +def t_dqstring_error(t): + print("Illegal character '%s' in dqstring" % t.value[0]) + t.lexer.skip(1) + +lexer = lex.lex() + +if len(sys.argv) <= 1: + print("Usage: %s filename" % sys.argv[0]) + sys.exit(1) + +filename = sys.argv[1] +data = '' +with open(filename, 'r') as infile: + for line in infile: + data += line + + lexer.input(data) + + for tok in lexer: + print(tok) |