diff options
Diffstat (limited to 'include/dsnlexer.h')
-rw-r--r-- | include/dsnlexer.h | 544 |
1 files changed, 544 insertions, 0 deletions
diff --git a/include/dsnlexer.h b/include/dsnlexer.h new file mode 100644 index 0000000..ded81c7 --- /dev/null +++ b/include/dsnlexer.h @@ -0,0 +1,544 @@ +/* + * This program source code file is part of KICAD, a free EDA CAD application. + * + * Copyright (C) 2007-2010 SoftPLC Corporation, Dick Hollenbeck <dick@softplc.com> + * Copyright (C) 2007-2015 Kicad Developers, see change_log.txt for contributors. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you may find one here: + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.html + * or you may search the http://www.gnu.org website for the version 2 license, + * or you may write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef DSNLEXER_H_ +#define DSNLEXER_H_ + +#include <stdio.h> +#include <string> +#include <vector> +#include <hashtables.h> + +#include <richio.h> + +#ifndef SWIG +/** + * Struct KEYWORD + * holds a keyword string and its unique integer token. + */ +struct KEYWORD +{ + const char* name; ///< unique keyword. + int token; ///< a zero based index into an array of KEYWORDs +}; +#endif + +// something like this macro can be used to help initialize a KEYWORD table. +// see SPECCTRA_DB::keywords[] as an example. + +//#define TOKDEF(x) { #x, T_##x } + + +/** + * Enum DSN_SYNTAX_T + * lists all the DSN lexer's tokens that are supported in lexing. It is up + * to the parser if it wants also to support them. + */ +enum DSN_SYNTAX_T { + DSN_NONE = -11, + DSN_COMMENT = -10, + DSN_STRING_QUOTE = -9, + DSN_QUOTE_DEF = -8, + DSN_DASH = -7, + DSN_SYMBOL = -6, + DSN_NUMBER = -5, + DSN_RIGHT = -4, // right bracket, ')' + DSN_LEFT = -3, // left bracket, '(' + DSN_STRING = -2, // a quoted string, stripped of the quotes + DSN_EOF = -1 // special case for end of file +}; + + +/** + * Class DSNLEXER + * implements a lexical analyzer for the SPECCTRA DSN file format. It + * reads lexical tokens from the current LINE_READER through the NextTok() + * function. + */ +class DSNLEXER +{ +#ifndef SWIG +protected: + bool iOwnReaders; ///< on readerStack, should I delete them? + const char* start; + const char* next; + const char* limit; + char dummy[1]; ///< when there is no reader. + + typedef std::vector<LINE_READER*> READER_STACK; + + READER_STACK readerStack; ///< all the LINE_READERs by pointer. + LINE_READER* reader; ///< no ownership. ownership is via readerStack, maybe, if iOwnReaders + + bool specctraMode; ///< if true, then: + ///< 1) stringDelimiter can be changed + ///< 2) Kicad quoting protocol is not in effect + ///< 3) space_in_quoted_tokens is functional + ///< else not. + + char stringDelimiter; + bool space_in_quoted_tokens; ///< blank spaces within quoted strings + + bool commentsAreTokens; ///< true if should return comments as tokens + + int prevTok; ///< curTok from previous NextTok() call. + int curOffset; ///< offset within current line of the current token + + int curTok; ///< the current token obtained on last NextTok() + std::string curText; ///< the text of the current token + + const KEYWORD* keywords; ///< table sorted by CMake for bsearch() + unsigned keywordCount; ///< count of keywords table + KEYWORD_MAP keyword_hash; ///< fast, specialized "C string" hashtable + + void init(); + + int readLine() throw( IO_ERROR ) + { + if( reader ) + { + reader->ReadLine(); + + unsigned len = reader->Length(); + + // start may have changed in ReadLine(), which can resize and + // relocate reader's line buffer. + start = reader->Line(); + + next = start; + limit = next + len; + + return len; + } + return 0; + } + + /** + * Function findToken + * takes aToken string and looks up the string in the keywords table. + * + * @param aToken is a string to lookup in the keywords table. + * @return int - with a value from the enum DSN_T matching the keyword text, + * or DSN_SYMBOL if @a aToken is not in the kewords table. + */ + int findToken( const std::string& aToken ); + + bool isStringTerminator( char cc ) + { + if( !space_in_quoted_tokens && cc==' ' ) + return true; + + if( cc == stringDelimiter ) + return true; + + return false; + } + +#endif + +public: + + /** + * Constructor ( FILE*, const wxString& ) + * intializes a DSN lexer and prepares to read from aFile which + * is already open and has aFilename. + * + * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount. This + * token table need not contain the lexer separators such as '(' ')', etc. + * @param aKeywordCount is the count of tokens in aKeywordTable. + * @param aFile is an open file, which will be closed when this is destructed. + * @param aFileName is the name of the file + */ + DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount, + FILE* aFile, const wxString& aFileName ); + + /** + * Constructor ( const KEYWORD*, unsigned, const std::string&, const wxString& ) + * intializes a DSN lexer and prepares to read from @a aSExpression. + * + * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount. This + * token table need not contain the lexer separators such as '(' ')', etc. + * @param aKeywordCount is the count of tokens in aKeywordTable. + * @param aSExpression is text to feed through a STRING_LINE_READER + * @param aSource is a description of aSExpression, used for error reporting. + */ + DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount, + const std::string& aSExpression, const wxString& aSource = wxEmptyString ); + + /** + * Constructor ( const std::string&, const wxString& ) + * intializes a DSN lexer and prepares to read from @a aSExpression. Use this + * one without a keyword table with the DOM parser in ptree.h. + * + * @param aSExpression is text to feed through a STRING_LINE_READER + * @param aSource is a description of aSExpression, used for error reporting. + */ + DSNLEXER( const std::string& aSExpression, const wxString& aSource = wxEmptyString ); + + /** + * Constructor ( LINE_READER* ) + * intializes a DSN lexer and prepares to read from @a aLineReader which + * is already open, and may be in use by other DSNLEXERs also. No ownership + * is taken of @a aLineReader. This enables it to be used by other DSNLEXERs also. + * + * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount. This + * token table need not contain the lexer separators such as '(' ')', etc. + * + * @param aKeywordCount is the count of tokens in aKeywordTable. + * + * @param aLineReader is any subclassed instance of LINE_READER, such as + * STRING_LINE_READER or FILE_LINE_READER. No ownership is taken. + */ + DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount, + LINE_READER* aLineReader = NULL ); + + virtual ~DSNLEXER(); + + /** + * Useable only for DSN lexers which share the same LINE_READER + * Synchronizes the pointers handling the data read by the LINE_READER + * Allows 2 DNSLEXER to share the same current line, when switching from a + * DNSLEXER to an other DNSLEXER + * @param aLexer = the model + * @return true if the sync can be made ( at least the same line reader ) + */ + bool SyncLineReaderWith( DSNLEXER& aLexer ); + + /** + * Function SetSpecctraMode + * changes the behavior of this lexer into or out of "specctra mode". If + * specctra mode, then: + * 1) stringDelimiter can be changed + * 2) Kicad quoting protocol is not in effect + * 3) space_in_quoted_tokens is functional + * else none of the above are true. The default mode is non-specctra mode, meaning: + * 1) stringDelimiter cannot be changed + * 2) Kicad quoting protocol is in effect + * 3) space_in_quoted_tokens is not functional + */ + void SetSpecctraMode( bool aMode ); + + /** + * Function PushReader + * manages a stack of LINE_READERs in order to handle nested file inclusion. + * This function pushes aLineReader onto the top of a stack of LINE_READERs and makes + * it the current LINE_READER with its own GetSource(), line number and line text. + * A grammar must be designed such that the "include" token (whatever its various names), + * and any of its parameters are not followed by anything on that same line, + * because PopReader always starts reading from a new line upon returning to + * the original LINE_READER. + */ + void PushReader( LINE_READER* aLineReader ); + + /** + * Function PopReader + * deletes the top most LINE_READER from an internal stack of LINE_READERs and + * in the case of FILE_LINE_READER this means the associated FILE is closed. + * The most recently used former LINE_READER on the stack becomes the + * current LINE_READER and its previous position in its input stream and the + * its latest line number should pertain. PopReader always starts reading + * from a new line upon returning to the previous LINE_READER. A pop is only + * possible if there are at least 2 LINE_READERs on the stack, since popping + * the last one is not supported. + * + * @return LINE_READER* - is the one that was in use before the pop, or NULL + * if there was not at least two readers on the stack and therefore the + * pop failed. + */ + LINE_READER* PopReader(); + + // Some functions whose return value is best overloaded to return an enum + // in a derived class. + //-----<overload return values to tokens>------------------------------ + + /** + * Function NextTok + * returns the next token found in the input file or DSN_EOF when reaching + * the end of file. Users should wrap this function to return an enum + * to aid in grammar debugging while running under a debugger, but leave + * this lower level function returning an int (so the enum does not collide + * with another usage). + * @return int - the type of token found next. + * @throw IO_ERROR - only if the LINE_READER throws it. + */ + int NextTok() throw( IO_ERROR ); + + /** + * Function NeedSYMBOL + * calls NextTok() and then verifies that the token read in + * satisfies bool IsSymbol(). + * If not, an IO_ERROR is thrown. + * @return int - the actual token read in. + * @throw IO_ERROR, if the next token does not satisfy IsSymbol() + */ + int NeedSYMBOL() throw( IO_ERROR ); + + /** + * Function NeedSYMBOLorNUMBER + * calls NextTok() and then verifies that the token read in + * satisfies bool IsSymbol() or tok==DSN_NUMBER. + * If not, an IO_ERROR is thrown. + * @return int - the actual token read in. + * @throw IO_ERROR, if the next token does not satisfy the above test + */ + int NeedSYMBOLorNUMBER() throw( IO_ERROR ); + + /** + * Function NeedNUMBER + * calls NextTok() and then verifies that the token read is type DSN_NUMBER. + * If not, and IO_ERROR is thrown using text from aExpectation. + * @return int - the actual token read in. + * @throw IO_ERROR, if the next token does not satisfy the above test + */ + int NeedNUMBER( const char* aExpectation ) throw( IO_ERROR ); + + /** + * Function CurTok + * returns whatever NextTok() returned the last time it was called. + */ + int CurTok() + { + return curTok; + } + + /** + * Function PrevTok + * returns whatever NextTok() returned the 2nd to last time it was called. + */ + int PrevTok() + { + return prevTok; + } + + //-----</overload return values to tokens>----------------------------- + + + /** + * Function SetStringDelimiter + * changes the string delimiter from the default " to some other character + * and returns the old value. + * @param aStringDelimiter The character in lowest 8 bits. + * @return int - The old delimiter in the lowest 8 bits. + */ + char SetStringDelimiter( char aStringDelimiter ) + { + int old = stringDelimiter; + if( specctraMode ) + stringDelimiter = aStringDelimiter; + return old; + } + + /** + * Function SetSpaceInQuotedTokens + * changes the setting controlling whether a space in a quoted string is + * a terminator. + * @param val If true, means + */ + bool SetSpaceInQuotedTokens( bool val ) + { + bool old = space_in_quoted_tokens; + if( specctraMode ) + space_in_quoted_tokens = val; + return old; + } + + /** + * Function SetCommentsAreTokens + * changes the handling of comments. If set true, comments are returns + * as single line strings with a terminating newline, else they are + * consumed by the lexer and not returned. + */ + bool SetCommentsAreTokens( bool val ) + { + bool old = commentsAreTokens; + commentsAreTokens = val; + return old; + } + + /** + * Function ReadCommentLines + * checks the next sequence of tokens and reads them into a wxArrayString + * if they are comments. Reading continues until a non-comment token is + * encountered, and such last read token remains as CurTok() and as CurText(). + * No push back or "un get" mechanism is used for this support. Upon return + * you simply avoid calling NextTok() for the next token, but rather CurTok(). + * + * @return wxArrayString* - heap allocated block of comments, or NULL if none; + * caller owns the allocation and must delete if not NULL. + */ + wxArrayString* ReadCommentLines() throw( IO_ERROR ); + + /** + * Function IsSymbol + * tests a token to see if it is a symbol. This means it cannot be a + * special delimiter character such as DSN_LEFT, DSN_RIGHT, DSN_QUOTE, etc. It may + * however, coincidentally match a keyword and still be a symbol. + */ + static bool IsSymbol( int aTok ); + + /** + * Function Expecting + * throws an IO_ERROR exception with an input file specific error message. + * @param aTok is the token/keyword type which was expected at the current input location. + * @throw IO_ERROR with the location within the input file of the problem. + */ + void Expecting( int aTok ) throw( IO_ERROR ); + + /** + * Function Expecting + * throws an IO_ERROR exception with an input file specific error message. + * @param aTokenList is the token/keyword type which was expected at the + * current input location, e.g. "pin|graphic|property" + * @throw IO_ERROR with the location within the input file of the problem. + */ + void Expecting( const char* aTokenList ) throw( IO_ERROR ); + + /** + * Function Unexpected + * throws an IO_ERROR exception with an input file specific error message. + * @param aTok is the token/keyword type which was not expected at the + * current input location. + * @throw IO_ERROR with the location within the input file of the problem. + */ + void Unexpected( int aTok ) throw( IO_ERROR ); + + /** + * Function Unexpected + * throws an IO_ERROR exception with an input file specific error message. + * @param aToken is the token which was not expected at the + * current input location. + * @throw IO_ERROR with the location within the input file of the problem. + */ + void Unexpected( const char* aToken ) throw( IO_ERROR ); + + /** + * Function Duplicate + * throws an IO_ERROR exception with a message saying specifically that aTok + * is a duplicate of one already seen in current context. + * @param aTok is the token/keyword type which was not expected at the + * current input location. + * @throw IO_ERROR with the location within the input file of the problem. + */ + void Duplicate( int aTok ) throw( IO_ERROR ); + + /** + * Function NeedLEFT + * calls NextTok() and then verifies that the token read in is a DSN_LEFT. + * If it is not, an IO_ERROR is thrown. + * @throw IO_ERROR, if the next token is not a DSN_LEFT + */ + void NeedLEFT() throw( IO_ERROR ); + + /** + * Function NeedRIGHT + * calls NextTok() and then verifies that the token read in is a DSN_RIGHT. + * If it is not, an IO_ERROR is thrown. + * @throw IO_ERROR, if the next token is not a DSN_RIGHT + */ + void NeedRIGHT() throw( IO_ERROR ); + + /** + * Function GetTokenText + * returns the C string representation of a DSN_T value. + */ + const char* GetTokenText( int aTok ); + + /** + * Function GetTokenString + * returns a quote wrapped wxString representation of a token value. + */ + wxString GetTokenString( int aTok ); + + static const char* Syntax( int aTok ); + + /** + * Function CurText + * returns a pointer to the current token's text. + */ + const char* CurText() + { + return curText.c_str(); + } + + /** + * Function CurStr + * returns a reference to current token in std::string form. + */ + const std::string& CurStr() + { + return curText; + } + + /** + * Function FromUTF8 + * returns the current token text as a wxString, assuming that the input + * byte stream is UTF8 encoded. + */ + wxString FromUTF8() + { + return wxString::FromUTF8( curText.c_str() ); + } + + /** + * Function CurLineNumber + * returns the current line number within my LINE_READER + */ + int CurLineNumber() + { + return reader->LineNumber(); + } + + /** + * Function CurLine + * returns the current line of text, from which the CurText() would return + * its token. + */ + const char* CurLine() + { + return (const char*)(*reader); + } + + /** + * Function CurFilename + * returns the current LINE_READER source. + * @return const wxString& - the source of the lines of text, + * e.g. a filename or "clipboard". + */ + const wxString& CurSource() + { + return reader->GetSource(); + } + + /** + * Function CurOffset + * returns the byte offset within the current line, using a 1 based index. + * @return int - a one based index into the current line. + */ + int CurOffset() + { + return curOffset + 1; + } +}; + +#endif // DSNLEXER_H_ |