summaryrefslogtreecommitdiff
path: root/CMakeModules/TokenList2DsnLexer.cmake
blob: d11bc5c741a72f19b37721e01f6b308cc5682c24 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387

#  This program source code file is part of KICAD, a free EDA CAD application.
#
#  Copyright (C) 2010 Wayne Stambaugh <stambaughw@verizon.net>
#  Copyright (C) 2010 Kicad Developers, see AUTHORS.txt for contributors.
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License
#  as published by the Free Software Foundation; either version 2
#  of the License, or (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, you may find one here:
#  http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
#  or you may search the http://www.gnu.org website for the version 2 license,
#  or you may write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
#
#
# This script converts a plain text file with a line feed separated list
# of token names into the appropriate source and header files required by
# the DSN lexer.  See files "<base_source_path>/common/dsnlexer.cpp" and
# "<base_source_path>/include/dsnlexer.h" for more information about how
# the DSN lexer works.  The token list file format requires a single token
# per line.  Tokens can only contain lower case letters, numbers, and
# underscores.  The first letter of each token must be a lower case letter.
# Tokens must be unique.  If any of the above criteria are not met, the
# source and header files will not be generated and a build error will
# occur.
#
# Valid tokens:    a a1 foo_1 foo_bar2
# Invalid tokens:  1 A _foo bar_ foO
#
# Invocation Parameters are:  enum, inputFile, outCppFile, outHeaderFile
#
#     enum       - Required, namespace in which the enum T will be placed.
#                  Keep it short because from outside the class you want a short enum name
#                  like enum::T.   Enums are contained in their own namespace to avoid
#                  collisions on enum value names, a problem with C++ unless the enum
#                  itself is in a separate namespace.
#
#     inputFile  - Required, name of the token list file, or "*.keywords" file.
#                  Choose the basefilename carefully, it decides the class name
#                  used in the generated *_lexer.h file.
#
#     outCppFile - Optional, full path and file name of where to save the generated
#                  cpp keywords file.  If not defined, the output path is the same
#                  path as the token list file path, with a file name of *_keywords.cpp
#
#  outHeaderFile - Optional, full path and file name of where to save the generated
#                  *.h lexfer file.  If not defined, the output path is the same
#                  path as the token list file path, with a file name of *_lexer.h
#
# Use the max_lexer() CMake function from functions.cmake for invocation convenience.


#message( STATUS "TokenList2DsnLexer.cmake" )    # indicate we are running

set( tokens "" )
set( lineCount 0 )
set( dsnErrorMsg "TokenList2DsnLexer.cmake failure:" )

if( NOT EXISTS ${inputFile} )
    message( FATAL_ERROR "${dsnErrorMsg} file ${inputFile} cannot be found." )
endif()

if( NOT DEFINED enum )
    message( FATAL_ERROR "${dsnErrorMsg} missing \"enum\" processing ${inputFile}." )
endif()

get_filename_component( outputPath "${inputFile}" PATH )

# the keywords filename without extension is important, it sets the classname into RESULT
get_filename_component( result "${inputFile}" NAME_WE )
string( TOUPPER "${result}" RESULT )

set( LEXERCLASS "${RESULT}_LEXER" )
set( PARSERCLASS "${RESULT}_PARSER" )

#message( "enum:'${enum}' result:'${result}' outputPath:'${outputPath}' inputFile:'${inputFile}'" )

if( NOT DEFINED outCppFile )
    set( outCppFile "${outputPath}/${result}_keywords.cpp" )
endif()

if( NOT DEFINED outHeaderFile )
    set( outHeaderFile "${outputPath}/${result}_lexer.h" )
endif()

# Create tag for generating header file.
set( headerTag "${LEXERCLASS}_H_" )

set( includeFileHeader
"
/* Do not modify this file it was automatically generated by the
 * TokenList2DsnLexer CMake script.
 */

#ifndef ${headerTag}
#define ${headerTag}

#include <dsnlexer.h>

/**
 * C++ does not put enum _values_ in separate namespaces unless the enum itself
 * is in a separate namespace.  All the token enums must be in separate namespaces
 * otherwise the C++ compiler will eventually complain if it sees more than one
 * DSNLEXER in the same compilation unit, say by mutliple header file inclusion.
 * Plus this also enables re-use of the same enum name T.  A typedef can always be used
 * to clarify which enum T is in play should that ever be a problem.  This is
 * unlikely since Parse() functions will usually only be exposed to one header
 * file like this one.  But if there is a problem, then use:
 *   typedef ${enum}::T T;
 * within that problem area.
 */
namespace ${enum}
{
    /// enum T contains all this lexer's tokens.
    enum T
    {
        // these first few are negative special ones for syntax, and are
        // inherited from DSNLEXER.
        T_NONE          = DSN_NONE,
        T_COMMENT       = DSN_COMMENT,
        T_STRING_QUOTE  = DSN_STRING_QUOTE,
        T_QUOTE_DEF     = DSN_QUOTE_DEF,
        T_DASH          = DSN_DASH,
        T_SYMBOL        = DSN_SYMBOL,
        T_NUMBER        = DSN_NUMBER,
        T_RIGHT         = DSN_RIGHT,        // right bracket: ')'
        T_LEFT          = DSN_LEFT,         // left bracket:  '('
        T_STRING        = DSN_STRING,       // a quoted string, stripped of the quotes
        T_EOF           = DSN_EOF,          // special case for end of file

"
)


set( sourceFileHeader
"
/* Do not modify this file it was automatically generated by the
 * TokenList2DsnLexer CMake script.
 *
 * Include this file in your lexer class to provide the keywords for
 * your DSN lexer.
 */

#include <${result}_lexer.h>

using namespace ${enum};

#define TOKDEF(x)    { #x, T_##x }

const KEYWORD ${LEXERCLASS}::keywords[] = {
"
)

file( STRINGS ${inputFile} lines NO_HEX_CONVERSION )

foreach( line ${lines} )
    math( EXPR lineCount "${lineCount} + 1" )

    # strip any comment from # to end of line
    string( REGEX REPLACE "#.*$" "" tmpToken "${line}" )
    string( STRIP "${tmpToken}" token )

    # Ignore empty lines.
    if( NOT token STREQUAL "" )           # if token is "off" simple if( token) does not work
        # Make sure token is valid.

        #message( "token=${token}" )

        string( REGEX MATCH "[a-z][_0-9a-z]*" validToken "${token}" )
        #message( "validToken=${validToken}" )

        if( validToken STREQUAL token )
            list( APPEND tokens "${validToken}" )
        else()
            message( FATAL_ERROR
                     "Invalid token string \"${tmpToken}\" at line ${lineCount} in file "
                     "<${inputFile}>." )
        endif()
    endif()
endforeach()

list( SORT tokens )

# Check for duplicates.
list( LENGTH tokens tokensBefore )
list( REMOVE_DUPLICATES tokens )
list( LENGTH tokens tokensAfter )

if( NOT ( tokensBefore EQUAL tokensAfter ) )
    message( FATAL_ERROR "Duplicate tokens found in file <${inputFile}>." )
endif()

file( WRITE "${outHeaderFile}" "${includeFileHeader}" )
file( WRITE "${outCppFile}" "${sourceFileHeader}" )

set( lineCount 1 )

foreach( token ${tokens} )
    if( lineCount EQUAL 1 )
        file( APPEND "${outHeaderFile}" "        T_${token} = 0" )
    else( lineCount EQUAL 1 )
        file( APPEND "${outHeaderFile}" "        T_${token}" )
    endif( lineCount EQUAL 1 )

    file(APPEND "${outCppFile}" "    TOKDEF( ${token} )" )

    if( lineCount EQUAL tokensAfter )
        file( APPEND "${outHeaderFile}" "\n" )
        file( APPEND "${outCppFile}" "\n" )
    else( lineCount EQUAL tokensAfter )
        file( APPEND "${outHeaderFile}" ",\n" )
        file( APPEND "${outCppFile}" ",\n" )
    endif( lineCount EQUAL tokensAfter )
    math( EXPR lineCount "${lineCount} + 1" )
endforeach()

file( APPEND "${outHeaderFile}"
"    };
}   // namespace ${enum}


/**
 * Class ${LEXERCLASS}
 * is an automatically generated class using the TokenList2DnsLexer.cmake
 * technology, based on keywords provided by file:
 *    ${inputFile}
 */
class ${LEXERCLASS} : public DSNLEXER
{
    /// Auto generated lexer keywords table and length:
    static const KEYWORD  keywords[];
    static const unsigned keyword_count;

public:
    /**
     * Constructor ( const std::string&, const wxString& )
     * @param aSExpression is (utf8) text possibly from the clipboard that you want to parse.
     * @param aSource is a description of the origin of @a aSExpression, such as a filename.
     *   If left empty, then _(\"clipboard\") is used.
     */
    ${LEXERCLASS}( const std::string& aSExpression, const wxString& aSource = wxEmptyString ) :
        DSNLEXER( keywords, keyword_count, aSExpression, aSource )
    {
    }

    /**
     * Constructor ( FILE* )
     * takes @a aFile already opened for reading and @a aFilename as parameters.
     * The opened file is assumed to be positioned at the beginning of the file
     * for purposes of accurate line number reporting in error messages.  The
     * FILE is closed by this instance when its destructor is called.
     * @param aFile is a FILE already opened for reading.
     * @param aFilename is the name of the opened file, needed for error reporting.
     */
    ${LEXERCLASS}( FILE* aFile, const wxString& aFilename ) :
        DSNLEXER( keywords, keyword_count, aFile, aFilename )
    {
    }

    /**
     * Constructor ( LINE_READER* )
     * intializes a lexer and prepares to read from @a aLineReader which
     * is assumed ready, and may be in use by other DSNLEXERs also.  No ownership
     * is taken of @a aLineReader. This enables it to be used by other lexers also.
     * The transition between grammars in such a case, must happen on a text
     * line boundary, not within the same line of text.
     *
     * @param aLineReader is any subclassed instance of LINE_READER, such as
     *  STRING_LINE_READER or FILE_LINE_READER.  No ownership is taken of aLineReader.
     */
    ${LEXERCLASS}( LINE_READER* aLineReader ) :
        DSNLEXER( keywords, keyword_count, aLineReader )
    {
    }

    /**
     * Function TokenName
     * returns the name of the token in ASCII form.
     */
    static const char* TokenName( ${enum}::T aTok );

    /**
     * Function NextTok
     * returns the next token found in the input file or T_EOF when reaching
     * the end of file.  Users should wrap this function to return an enum
     * to aid in grammar debugging while running under a debugger, but leave
     * this lower level function returning an int (so the enum does not collide
     * with another usage).
     * @return ${enum}::T - the type of token found next.
     * @throw IO_ERROR - only if the LINE_READER throws it.
     */
    ${enum}::T NextTok() throw( IO_ERROR )
    {
        return (${enum}::T) DSNLEXER::NextTok();
    }

    /**
     * Function NeedSYMBOL
     * calls NextTok() and then verifies that the token read in
     * satisfies bool IsSymbol().
     * If not, an IO_ERROR is thrown.
     * @return int - the actual token read in.
     * @throw IO_ERROR, if the next token does not satisfy IsSymbol()
     */
    ${enum}::T NeedSYMBOL() throw( IO_ERROR )
    {
        return (${enum}::T) DSNLEXER::NeedSYMBOL();
    }

    /**
     * Function NeedSYMBOLorNUMBER
     * calls NextTok() and then verifies that the token read in
     * satisfies bool IsSymbol() or tok==T_NUMBER.
     * If not, an IO_ERROR is thrown.
     * @return int - the actual token read in.
     * @throw IO_ERROR, if the next token does not satisfy the above test
     */
    ${enum}::T NeedSYMBOLorNUMBER() throw( IO_ERROR )
    {
        return (${enum}::T) DSNLEXER::NeedSYMBOLorNUMBER();
    }

    /**
     * Function CurTok
     * returns whatever NextTok() returned the last time it was called.
     */
    ${enum}::T CurTok()
    {
        return (${enum}::T) DSNLEXER::CurTok();
    }

    /**
     * Function PrevTok
     * returns whatever NextTok() returned the 2nd to last time it was called.
     */
    ${enum}::T PrevTok()
    {
        return (${enum}::T) DSNLEXER::PrevTok();
    }
};

// example usage

/**
 * Class ${LEXCLASS}_PARSER
 * holds data and functions pertinent to parsing a S-expression file .
 *
class ${PARSERCLASS} : public ${LEXERCLASS}
{

};
*/

#endif   // ${headerTag}
"
)

file( APPEND "${outCppFile}"
"};

const unsigned ${LEXERCLASS}::keyword_count = unsigned( sizeof( ${LEXERCLASS}::keywords )/sizeof( ${LEXERCLASS}::keywords[0] ) );


const char* ${LEXERCLASS}::TokenName( T aTok )
{
    const char* ret;

    if( aTok < 0 )
        ret = DSNLEXER::Syntax( aTok );
    else if( (unsigned) aTok < keyword_count )
        ret = keywords[aTok].name;
    else
        ret = \"token too big\";

    return ret;
}
"
)