-- VHDL lexical scanner. -- Copyright (C) 2002 - 2014 Tristan Gingold -- -- GHDL is free software; you can redistribute it and/or modify it under -- the terms of the GNU General Public License as published by the Free -- Software Foundation; either version 2, or (at your option) any later -- version. -- -- GHDL is distributed in the hope that it will be useful, but WITHOUT ANY -- WARRANTY; without even the implied warranty of MERCHANTABILITY or -- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- for more details. -- -- You should have received a copy of the GNU General Public License -- along with GHDL; see the file COPYING. If not, write to the Free -- Software Foundation, 59 Temple Place - Suite 330, Boston, MA -- 02111-1307, USA. with Ada.Characters.Latin_1; use Ada.Characters.Latin_1; with Ada.Characters.Handling; with Errorout; use Errorout; with Name_Table; with Files_Map; use Files_Map; with Std_Names; with Str_Table; with Flags; use Flags; package body Scanner is -- This classification is a simplification of the categories of LRM93 13.1 -- LRM93 13.1 -- The only characters allowed in the text of a VHDL description are the -- graphic characters and format effector. type Character_Kind_Type is ( -- Neither a format effector nor a graphic character. Invalid, Format_Effector, Upper_Case_Letter, Digit, Special_Character, Space_Character, Lower_Case_Letter, Other_Special_Character); -- LRM93 13.1 -- BASIC_GRAPHIC_CHARACTER ::= -- UPPER_CASE_LETTER | DIGIT | SPECIAL_CHARACTER | SPACE_CHARACTER --subtype Basic_Graphic_Character is -- Character_Kind_Type range Upper_Case_Letter .. Space_Character; -- LRM93 13.1 -- GRAPHIC_CHARACTER ::= -- BASIC_GRAPHIC_CHARACTER | LOWER_CASE_LETTER | OTHER_SPECIAL_CHARACTER -- Note: There is 191 graphic character. subtype Graphic_Character is Character_Kind_Type range Upper_Case_Letter .. Other_Special_Character; -- LRM93 13.1 -- The characters included in each of the categories of basic graphic -- characters are defined as follows: type Character_Array is array (Character) of Character_Kind_Type; Characters_Kind : constant Character_Array := (NUL .. BS => Invalid, -- Format effectors are the ISO (and ASCII) characters called horizontal -- tabulation, vertical tabulation, carriage return, line feed, and form -- feed. HT | LF | VT | FF | CR => Format_Effector, SO .. US => Invalid, -- 1. upper case letters 'A' .. 'Z' | UC_A_Grave .. UC_O_Diaeresis | UC_O_Oblique_Stroke .. UC_Icelandic_Thorn => Upper_Case_Letter, -- 2. digits '0' .. '9' => Digit, -- 3. special characters '"' | '#' | '&' | ''' | '(' | ')' | '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '[' | ']' | '_' | '|' | '*' => Special_Character, -- 4. the space characters ' ' | No_Break_Space => Space_Character, -- 5. lower case letters 'a' .. 'z' | LC_German_Sharp_S .. LC_O_Diaeresis | LC_O_Oblique_Stroke .. LC_Y_Diaeresis => Lower_Case_Letter, -- 6. other special characters '!' | '$' | '%' | '@' | '?' | '\' | '^' | '`' | '{' | '}' | '~' | Inverted_Exclamation .. Inverted_Question | Multiplication_Sign | Division_Sign => Other_Special_Character, -- '¡' -- INVERTED EXCLAMATION MARK -- '¢' -- CENT SIGN -- '£' -- POUND SIGN -- '¤' -- CURRENCY SIGN -- '¥' -- YEN SIGN -- '¦' -- BROKEN BAR -- '§' -- SECTION SIGN -- '¨' -- DIAERESIS -- '©' -- COPYRIGHT SIGN -- 'ª' -- FEMININE ORDINAL INDICATOR -- '«' -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK -- '¬' -- NOT SIGN -- '­' -- SOFT HYPHEN -- '®' -- REGISTERED SIGN -- '¯' -- MACRON -- '°' -- DEGREE SIGN -- '±' -- PLUS-MINUS SIGN -- '²' -- SUPERSCRIPT TWO -- '³' -- SUPERSCRIPT THREE -- '´' -- ACUTE ACCENT -- 'µ' -- MICRO SIGN -- '¶' -- PILCROW SIGN -- '·' -- MIDDLE DOT -- '¸' -- CEDILLA -- '¹' -- SUPERSCRIPT ONE -- 'º' -- MASCULINE ORDINAL INDICATOR -- '»' -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -- '¼' -- VULGAR FRACTION ONE QUARTER -- '½' -- VULGAR FRACTION ONE HALF -- '¾' -- VULGAR FRACTION THREE QUARTERS -- '¿' -- INVERTED QUESTION MARK -- '×' -- MULTIPLICATION SIGN -- '÷' -- DIVISION SIGN DEL .. APC => Invalid); -- The context contains the whole internal state of the scanner, ie -- it can be used to push/pop a lexical analysis, to restart the -- scanner from a context marking a previous point. type Scan_Context is record Source: File_Buffer_Acc; Source_File: Source_File_Entry; Line_Number: Natural; Line_Pos: Source_Ptr; Pos: Source_Ptr; Token_Pos: Source_Ptr; File_Len: Source_Ptr; File_Name: Name_Id; Token: Token_Type; Prev_Token: Token_Type; Str_Id : String8_Id; Str_Len : Nat32; Identifier: Name_Id; Int64: Iir_Int64; Fp64: Iir_Fp64; end record; -- The current context. -- Default value is an invalid context. Current_Context: Scan_Context := (Source => null, Source_File => No_Source_File_Entry, Line_Number => 0, Line_Pos => 0, Pos => 0, Token_Pos => 0, File_Len => 0, File_Name => Null_Identifier, Token => Tok_Invalid, Prev_Token => Tok_Invalid, Identifier => Null_Identifier, Str_Id => Null_String8, Str_Len => 0, Int64 => 0, Fp64 => 0.0); Source: File_Buffer_Acc renames Current_Context.Source; Pos: Source_Ptr renames Current_Context.Pos; -- When CURRENT_TOKEN is an identifier, its name_id is stored into -- this global variable. -- Function current_text can be used to convert it into an iir. function Current_Identifier return Name_Id is begin return Current_Context.Identifier; end Current_Identifier; procedure Invalidate_Current_Identifier is begin Current_Context.Identifier := Null_Identifier; end Invalidate_Current_Identifier; procedure Invalidate_Current_Token is begin if Current_Token /= Tok_Invalid then Current_Context.Prev_Token := Current_Token; Current_Token := Tok_Invalid; end if; end Invalidate_Current_Token; function Current_String_Id return String8_Id is begin return Current_Context.Str_Id; end Current_String_Id; function Current_String_Length return Nat32 is begin return Current_Context.Str_Len; end Current_String_Length; function Current_Iir_Int64 return Iir_Int64 is begin return Current_Context.Int64; end Current_Iir_Int64; function Current_Iir_Fp64 return Iir_Fp64 is begin return Current_Context.Fp64; end Current_Iir_Fp64; function Get_Current_File return Name_Id is begin return Current_Context.File_Name; end Get_Current_File; function Get_Current_Source_File return Source_File_Entry is begin return Current_Context.Source_File; end Get_Current_Source_File; function Get_Current_Line return Natural is begin return Current_Context.Line_Number; end Get_Current_Line; function Get_Current_Column return Natural is Col : Natural; Name : Name_Id; begin Coord_To_Position (Current_Context.Source_File, Current_Context.Line_Pos, Integer (Current_Context.Pos - Current_Context.Line_Pos), Name, Col); return Col; end Get_Current_Column; function Get_Token_Column return Natural is Col : Natural; Name : Name_Id; begin Coord_To_Position (Current_Context.Source_File, Current_Context.Line_Pos, Integer (Current_Context.Token_Pos - Current_Context.Line_Pos), Name, Col); return Col; end Get_Token_Column; function Get_Token_Position return Source_Ptr is begin return Current_Context.Token_Pos; end Get_Token_Position; function Get_Position return Source_Ptr is begin return Current_Context.Pos; end Get_Position; procedure Set_File (Source_File : Source_File_Entry) is N_Source: File_Buffer_Acc; begin pragma Assert (Current_Context.Source = null); pragma Assert (Source_File /= No_Source_File_Entry); N_Source := Get_File_Source (Source_File); Current_Context := (Source => N_Source, Source_File => Source_File, Line_Number => 1, Line_Pos => 0, Pos => N_Source'First, Token_Pos => 0, -- should be invalid, File_Len => Get_File_Length (Source_File), File_Name => Get_File_Name (Source_File), Token => Tok_Invalid, Prev_Token => Tok_Invalid, Identifier => Null_Identifier, Str_Id => Null_String8, Str_Len => 0, Int64 => -1, Fp64 => 0.0); Current_Token := Tok_Invalid; end Set_File; function Detect_Encoding_Errors return Boolean is C : constant Character := Source (Pos); begin -- No need to check further if first character is plain ASCII-7 if C >= ' ' and C < Character'Val (127) then return False; end if; -- UTF-8 BOM is EF BB BF if Source (Pos + 0) = Character'Val (16#ef#) and then Source (Pos + 1) = Character'Val (16#bb#) and then Source (Pos + 2) = Character'Val (16#bf#) then Error_Msg_Scan ("source encoding must be latin-1 (UTF-8 BOM detected)"); return True; end if; -- UTF-16 BE BOM is FE FF if Source (Pos + 0) = Character'Val (16#fe#) and then Source (Pos + 1) = Character'Val (16#ff#) then Error_Msg_Scan ("source encoding must be latin-1 (UTF-16 BE BOM detected)"); return True; end if; -- UTF-16 LE BOM is FF FE if Source (Pos + 0) = Character'Val (16#ff#) and then Source (Pos + 1) = Character'Val (16#fe#) then Error_Msg_Scan ("source encoding must be latin-1 (UTF-16 LE BOM detected)"); return True; end if; -- Certainly weird, but scanner/parser will catch it. return False; end Detect_Encoding_Errors; procedure Set_Current_Position (Position: Source_Ptr) is Loc : Location_Type; Offset: Natural; File_Entry : Source_File_Entry; begin if Current_Context.Source = null then raise Internal_Error; end if; Current_Token := Tok_Invalid; Current_Context.Pos := Position; Loc := File_Pos_To_Location (Current_Context.Source_File, Current_Context.Pos); Location_To_Coord (Loc, File_Entry, Current_Context.Line_Pos, Current_Context.Line_Number, Offset); end Set_Current_Position; procedure Close_File is begin Current_Context.Source := null; end Close_File; -- Emit an error when a character above 128 was found. -- This must be called only in vhdl87. procedure Error_8bit is begin Error_Msg_Scan ("8 bits characters not allowed in vhdl87"); end Error_8bit; -- Emit an error when a separator is expected. procedure Error_Separator is begin Error_Msg_Scan ("a separator is required here"); end Error_Separator; -- scan a decimal literal or a based literal. -- -- LRM93 13.4.1 -- DECIMAL_LITERAL ::= INTEGER [ . INTEGER ] [ EXPONENT ] -- EXPONENT ::= E [ + ] INTEGER | E - INTEGER -- -- LRM93 13.4.2 -- BASED_LITERAL ::= BASE # BASED_INTEGER [ . BASED_INTEGER ] # EXPONENT -- BASE ::= INTEGER procedure Scan_Literal is separate; -- Scan a string literal. -- -- LRM93 13.6 / LRM08 15.7 -- A string literal is formed by a sequence of graphic characters -- (possibly none) enclosed between two quotation marks used as string -- brackets. -- STRING_LITERAL ::= " { GRAPHIC_CHARACTER } " -- -- IN: for a string, at the call of this procedure, the current character -- must be either '"' or '%'. procedure Scan_String is -- The quotation character (can be " or %). Mark: Character; -- Current character. C : Character; -- Current length. Length : Nat32; begin -- String delimiter. Mark := Source (Pos); pragma Assert (Mark = '"' or else Mark = '%'); Pos := Pos + 1; Length := 0; Current_Context.Str_Id := Str_Table.Create_String8; loop C := Source (Pos); if C = Mark then -- LRM93 13.6 -- If a quotation mark value is to be represented in the sequence -- of character values, then a pair of adjacent quoatation -- characters marks must be written at the corresponding place -- within the string literal. -- LRM93 13.10 -- Any pourcent sign within the sequence of characters must then -- be doubled, and each such doubled percent sign is interpreted -- as a single percent sign value. -- The same replacement is allowed for a bit string literal, -- provieded that both bit string brackets are replaced. Pos := Pos + 1; exit when Source (Pos) /= Mark; end if; case Characters_Kind (C) is when Format_Effector => Error_Msg_Scan ("format effector not allowed in a string"); exit; when Invalid => Error_Msg_Scan ("invalid character not allowed, even in a string"); when Graphic_Character => if Vhdl_Std = Vhdl_87 and then C > Character'Val (127) then Error_8bit; end if; end case; if C = '"' and Mark = '%' then -- LRM93 13.10 -- The quotation marks (") used as string brackets at both ends of -- a string literal can be replaced by percent signs (%), provided -- that the enclosed sequence of characters constains no quotation -- marks, and provided that both string brackets are replaced. Error_Msg_Scan ("'""' cannot be used in a string delimited with '%'"); end if; Length := Length + 1; Str_Table.Append_String8 (Character'Pos (C)); Pos := Pos + 1; end loop; Current_Token := Tok_String; Current_Context.Str_Len := Length; end Scan_String; -- Scan a bit string literal. -- -- LRM93 13.7 -- A bit string literal is formed by a sequence of extended digits -- (possibly none) enclosed between two quotations used as bit string -- brackets, preceded by a base specifier. -- BIT_STRING_LITERAL ::= BASE_SPECIFIER " [ BIT_VALUE ] " -- BIT_VALUE ::= EXTENDED_DIGIT { [ UNDERLINE ] EXTENDED_DIGIT } -- -- The current character must be a base specifier, followed by '"' or '%'. -- The base must be valid. procedure Scan_Bit_String (Base_Log : Nat32) is -- Position of character '0'. Pos_0 : constant Nat8 := Character'Pos ('0'); -- Used for the base. subtype Nat4 is Natural range 1 .. 4; Base : constant Nat32 := 2 ** Nat4 (Base_Log); -- The quotation character (can be " or %). Mark : constant Character := Source (Pos); -- Current character. C : Character; -- Current length. Length : Nat32; -- Digit value. V, D : Nat8; begin pragma Assert (Mark = '"' or else Mark = '%'); Pos := Pos + 1; Length := 0; Current_Context.Str_Id := Str_Table.Create_String8; loop << Again >> null; C := Source (Pos); Pos := Pos + 1; exit when C = Mark; -- LRM93 13.7 -- If the base specifier is 'B', the extended digits in the bit -- value are restricted to 0 and 1. -- If the base specifier is 'O', the extended digits int the bit -- value are restricted to legal digits in the octal number -- system, ie, the digits 0 through 7. -- If the base specifier is 'X', the extended digits are all digits -- together with the letters A through F. case C is when '0' .. '9' => V := Character'Pos (C) - Character'Pos ('0'); when 'A' .. 'F' => V := Character'Pos (C) - Character'Pos ('A') + 10; when 'a' .. 'f' => -- LRM93 13.7 -- A letter in a bit string literal (...) can be written either -- in lowercase or in upper case, with the same meaning. V := Character'Pos (C) - Character'Pos ('a') + 10; when '_' => if Source (Pos) = '_' then Error_Msg_Scan ("double underscore not allowed in a bit string"); end if; if Source (Pos - 2) = Mark then Error_Msg_Scan ("underscore not allowed at the start of a bit string"); elsif Source (Pos) = Mark then Error_Msg_Scan ("underscore not allowed at the end of a bit string"); end if; goto Again; when '"' => pragma Assert (Mark = '%'); Error_Msg_Scan ("'""' cannot close a bit string opened by '%'"); exit; when '%' => pragma Assert (Mark = '"'); Error_Msg_Scan ("'%' cannot close a bit string opened by '""'"); exit; when others => if Characters_Kind (C) in Graphic_Character then if Vhdl_Std >= Vhdl_08 then V := Nat8'Last; else Error_Msg_Scan ("invalid character in bit string"); -- Continue the bit string V := 0; end if; else Error_Msg_Scan ("bit string not terminated"); Pos := Pos - 1; exit; end if; end case; -- Expand bit value. if Vhdl_Std >= Vhdl_08 and V > Base then -- Expand as graphic character. for I in 1 .. Base_Log loop Str_Table.Append_String8_Char (C); end loop; else -- Expand as extended digits. case Base_Log is when 1 => if V > 1 then Error_Msg_Scan ("invalid character in a binary bit string"); V := 1; end if; Str_Table.Append_String8 (Pos_0 + V); when 3 => if V > 7 then Error_Msg_Scan ("invalid character in a octal bit string"); V := 7; end if; for I in 1 .. 3 loop D := V / 4; Str_Table.Append_String8 (Pos_0 + D); V := (V - 4 * D) * 2; end loop; when 4 => for I in 1 .. 4 loop D := V / 8; Str_Table.Append_String8 (Pos_0 + D); V := (V - 8 * D) * 2; end loop; when others => raise Internal_Error; end case; end if; Length := Length + Base_Log; end loop; -- Note: the length of the bit string may be 0. Current_Token := Tok_Bit_String; Current_Context.Str_Len := Length; end Scan_Bit_String; -- Scan a decimal bit string literal. For base specifier D the algorithm -- is rather different: all the graphic characters shall be digits, and we -- need to use a (not very efficient) arbitrary precision multiplication. procedure Scan_Dec_Bit_String is use Str_Table; Id : String8_Id; -- Position of character '0'. Pos_0 : constant Nat8 := Character'Pos ('0'); -- Current character. C : Character; -- Current length. Length : Nat32; -- Digit value. V, D : Nat8; type Carries_Type is array (0 .. 3) of Nat8; Carries : Carries_Type; No_Carries : constant Carries_Type := (others => Pos_0); -- Shift right carries. Note the Carries (0) is the LSB. procedure Shr_Carries is begin Carries := (Carries (1), Carries (2), Carries (3), Pos_0); end Shr_Carries; procedure Append_Carries is begin -- Expand the bit string. Note that position 1 of the string8 is -- the MSB. while Carries /= No_Carries loop Append_String8 (Pos_0); Length := Length + 1; for I in reverse 2 .. Length loop Set_Element_String8 (Id, I, Element_String8 (Id, I - 1)); end loop; Set_Element_String8 (Id, 1, Carries (0)); Shr_Carries; end loop; end Append_Carries; -- Add 1 to Carries. Overflow is not allowed and should be prevented by -- construction. procedure Add_One_To_Carries is begin for I in Carries'Range loop if Carries (I) = Pos_0 then Carries (I) := Pos_0 + 1; -- End of propagation. exit; else Carries (I) := Pos_0; -- Continue propagation. pragma Assert (I < Carries'Last); end if; end loop; end Add_One_To_Carries; begin pragma Assert (Source (Pos) = '"'); Pos := Pos + 1; Length := 0; Id := Create_String8; Current_Context.Str_Id := Id; loop << Again >> null; C := Source (Pos); Pos := Pos + 1; exit when C = '"'; if C in '0' .. '9' then V := Character'Pos (C) - Character'Pos ('0'); elsif C = '_' then if Source (Pos) = '_' then Error_Msg_Scan ("double underscore not allowed in a bit string"); end if; if Source (Pos - 2) = '"' then Error_Msg_Scan ("underscore not allowed at the start of a bit string"); elsif Source (Pos) = '"' then Error_Msg_Scan ("underscore not allowed at the end of a bit string"); end if; goto Again; else if Characters_Kind (C) in Graphic_Character then Error_Msg_Scan ("graphic character not allowed in decimal bit string"); -- Continue the bit string V := 0; else Error_Msg_Scan ("bit string not terminated"); Pos := Pos - 1; exit; end if; end if; -- Multiply by 10. Carries := (others => Pos_0); for I in reverse 1 .. Length loop -- Shift by 1 (*2). D := Element_String8 (Id, I); Set_Element_String8 (Id, I, Carries (0)); Shr_Carries; -- Add D and D * 4. if D /= Pos_0 then Add_One_To_Carries; -- Add_Four_To_Carries: for I in 2 .. 3 loop if Carries (I) = Pos_0 then Carries (I) := Pos_0 + 1; -- End of propagation. exit; else Carries (I) := Pos_0; -- Continue propagation. end if; end loop; end if; end loop; Append_Carries; -- Add V. for I in Carries'Range loop D := V / 2; Carries (I) := Pos_0 + (V - 2 * D); V := D; end loop; for I in reverse 1 .. Length loop D := Element_String8 (Id, I); if D /= Pos_0 then Add_One_To_Carries; end if; Set_Element_String8 (Id, I, Carries (0)); Shr_Carries; exit when Carries = No_Carries; end loop; Append_Carries; end loop; Current_Token := Tok_Bit_String; Current_Context.Str_Len := Length; end Scan_Dec_Bit_String; -- LRM93 13.3.1 -- Basic Identifiers -- A basic identifier consists only of letters, digits, and underlines. -- BASIC_IDENTIFIER ::= LETTER { [ UNDERLINE ] LETTER_OR_DIGIT } -- LETTER_OR_DIGIT ::= LETTER | DIGIT -- LETTER ::= UPPER_CASE_LETTER | LOWER_CASE_LETTER -- -- NB: At the call of this procedure, the current character must be a legal -- character for a basic identifier. procedure Scan_Identifier is use Name_Table; C : Character; Len : Natural; begin -- This is an identifier or a key word. Len := 0; loop -- Source (pos) is correct. -- LRM93 13.3.1 -- All characters if a basic identifier are signifiant, including -- any underline character inserted between a letter or digit and -- an adjacent letter or digit. -- Basic identifiers differing only in the use of the corresponding -- upper and lower case letters are considered as the same. -- This is achieved by converting all upper case letters into -- equivalent lower case letters. -- The opposite (converting in lower case letters) is not possible, -- because two characters have no upper-case equivalent. C := Source (Pos); case C is when 'A' .. 'Z' => C := Character'Val (Character'Pos (C) + Character'Pos ('a') - Character'Pos ('A')); when 'a' .. 'z' | '0' .. '9' => null; when '_' => if Source (Pos + 1) = '_' then Error_Msg_Scan ("two underscores can't be consecutive"); end if; when ' ' | ')' | '.' | ';' | ':' => exit; when others => -- Non common case. case Characters_Kind (C) is when Upper_Case_Letter | Lower_Case_Letter => if Vhdl_Std = Vhdl_87 then Error_8bit; end if; Len := Len + 1; C := Ada.Characters.Handling.To_Lower (C); when Digit => raise Internal_Error; when others => exit; end case; end case; -- Put character in name buffer. FIXME: compute the hash at the same -- time ? Len := Len + 1; Nam_Buffer (Len) := C; -- Next character. Pos := Pos + 1; end loop; if Source (Pos - 1) = '_' then if not Flag_Psl then -- Some PSL reserved words finish with '_'. This case is handled -- later. Error_Msg_Scan ("identifier cannot finish with '_'"); end if; Pos := Pos - 1; Len := Len - 1; C := '_'; end if; Nam_Length := Len; -- LRM93 13.2 -- At least one separator is required between an identifier or an -- abstract literal and an adjacent identifier or abstract literal. case Characters_Kind (C) is when Digit | Upper_Case_Letter | Lower_Case_Letter => raise Internal_Error; when Other_Special_Character | Special_Character => if (C = '"' or C = '%') and then Len <= 2 then if C = '%' and Vhdl_Std >= Vhdl_08 then Error_Msg_Scan ("'%' not allowed in vhdl 2008 " & "(was replacement character)"); -- Continue as a bit string. end if; -- Good candidate for bit string. -- LRM93 13.7 -- BASE_SPECIFIER ::= B | O | X -- -- A letter in a bit string literal (either an extended digit -- or the base specifier) can be written either in lower case -- or in upper case, with the same meaning. -- -- LRM08 15.8 Bit string literals -- BASE_SPECICIER ::= -- B | O | X | UB | UO | UX | SB | SO | SX | D -- -- An extended digit and the base specifier in a bit string -- literal can be written either in lowercase or in uppercase, -- with the same meaning. declare Base : Nat32; Cl : constant Character := Nam_Buffer (Len); Cf : constant Character := Nam_Buffer (1); begin if Cl = 'b' then Base := 1; elsif Cl = 'o' then Base := 3; elsif Cl = 'x' then Base := 4; elsif Vhdl_Std >= Vhdl_08 and Len = 1 and Cf = 'd' then Scan_Dec_Bit_String; return; else Base := 0; end if; if Base > 0 then if Len = 1 then Scan_Bit_String (Base); return; elsif Vhdl_Std >= Vhdl_08 and then (Cf = 's' or Cf = 'u') then Scan_Bit_String (Base); return; end if; end if; end; end if; if Vhdl_Std > Vhdl_87 and then C = '\' then -- Start of extended identifier. Cannot follow an identifier. Error_Separator; end if; when Invalid | Format_Effector | Space_Character => null; end case; -- Hash it. Current_Context.Identifier := Name_Table.Get_Identifier; if Current_Identifier in Std_Names.Name_Id_Keywords then -- LRM93 13.9 -- The identifiers listed below are called reserved words and are -- reserved for signifiances in the language. -- IN: this is also achieved in packages std_names and tokens. Current_Token := Token_Type'Val (Token_Type'Pos (Tok_First_Keyword) + Current_Identifier - Std_Names.Name_First_Keyword); case Current_Identifier is when Std_Names.Name_Id_AMS_Reserved_Words => if not AMS_Vhdl then if Flags.Warn_Reserved_Word then Warning_Msg_Scan ("using """ & Nam_Buffer (1 .. Nam_Length) & """ AMS-VHDL reserved word as an identifier"); end if; Current_Token := Tok_Identifier; end if; when Std_Names.Name_Id_Vhdl00_Reserved_Words => if Vhdl_Std < Vhdl_00 then if Flags.Warn_Reserved_Word then Warning_Msg_Scan ("using """ & Nam_Buffer (1 .. Nam_Length) & """ vhdl00 reserved word as an identifier"); end if; Current_Token := Tok_Identifier; end if; when Std_Names.Name_Id_Vhdl93_Reserved_Words => if Vhdl_Std = Vhdl_87 then if Flags.Warn_Reserved_Word then Warning_Msg_Scan ("using """ & Nam_Buffer (1 .. Nam_Length) & """ vhdl93 reserved word as a vhdl87 identifier"); Warning_Msg_Scan ("(use option --std=93 to compile as vhdl93)"); end if; Current_Token := Tok_Identifier; end if; when Std_Names.Name_Id_Vhdl87_Reserved_Words => null; when others => raise Program_Error; end case; elsif Flag_Psl then case Current_Identifier is when Std_Names.Name_Clock => Current_Token := Tok_Psl_Clock; when Std_Names.Name_Const => Current_Token := Tok_Psl_Const; when Std_Names.Name_Boolean => Current_Token := Tok_Psl_Boolean; when Std_Names.Name_Sequence => Current_Token := Tok_Psl_Sequence; when Std_Names.Name_Property => Current_Token := Tok_Psl_Property; when Std_Names.Name_Endpoint => Current_Token := Tok_Psl_Endpoint; when Std_Names.Name_Cover => Current_Token := Tok_Psl_Cover; when Std_Names.Name_Default => Current_Token := Tok_Psl_Default; when Std_Names.Name_Inf => Current_Token := Tok_Inf; when Std_Names.Name_Within => Current_Token := Tok_Within; when Std_Names.Name_Abort => Current_Token := Tok_Abort; when Std_Names.Name_Before => Current_Token := Tok_Before; when Std_Names.Name_Always => Current_Token := Tok_Always; when Std_Names.Name_Never => Current_Token := Tok_Never; when Std_Names.Name_Eventually => Current_Token := Tok_Eventually; when Std_Names.Name_Next_A => Current_Token := Tok_Next_A; when Std_Names.Name_Next_E => Current_Token := Tok_Next_E; when Std_Names.Name_Next_Event => Current_Token := Tok_Next_Event; when Std_Names.Name_Next_Event_A => Current_Token := Tok_Next_Event_A; when Std_Names.Name_Next_Event_E => Current_Token := Tok_Next_Event_E; when Std_Names.Name_Until => Current_Token := Tok_Until; when others => Current_Token := Tok_Identifier; if C = '_' then Error_Msg_Scan ("identifiers cannot finish with '_'"); end if; end case; else Current_Token := Tok_Identifier; end if; end Scan_Identifier; -- LRM93 13.3.2 -- EXTENDED_IDENTIFIER ::= \ GRAPHIC_CHARACTER { GRAPHIC_CHARACTER } \ -- -- Create an (extended) indentifier. -- Extended identifiers are stored as they appear (leading and tailing -- backslashes, doubling backslashes inside). procedure Scan_Extended_Identifier is use Name_Table; begin -- LRM93 13.3.2 -- Moreover, every extended identifiers is distinct from any basic -- identifier. -- This is satisfied by storing '\' in the name table. Nam_Length := 1; Nam_Buffer (1) := '\'; loop -- Next character. Pos := Pos + 1; if Source (Pos) = '\' then -- LRM93 13.3.2 -- If a backslash is to be used as one of the graphic characters -- of an extended literal, it must be doubled. -- LRM93 13.3.2 -- (a doubled backslash couting as one character) Nam_Length := Nam_Length + 1; Nam_Buffer (Nam_Length) := '\'; Pos := Pos + 1; exit when Source (Pos) /= '\'; end if; -- source (pos) is correct. case Characters_Kind (Source (Pos)) is when Format_Effector => Error_Msg_Scan ("format effector in extended identifier"); exit; when Graphic_Character => null; when Invalid => Error_Msg_Scan ("invalid character in extended identifier"); end case; Nam_Length := Nam_Length + 1; -- LRM93 13.3.2 -- Extended identifiers differing only in the use of corresponding -- upper and lower case letters are distinct. Nam_Buffer (Nam_Length) := Source (Pos); end loop; if Nam_Length <= 2 then Error_Msg_Scan ("empty extended identifier is not allowed"); end if; -- LRM93 13.2 -- At least one separator is required between an identifier or an -- abstract literal and an adjacent identifier or abstract literal. case Characters_Kind (Source (Pos)) is when Digit | Upper_Case_Letter | Lower_Case_Letter => Error_Separator; when Invalid | Format_Effector | Space_Character | Special_Character | Other_Special_Character => null; end case; -- Hash it. Current_Context.Identifier := Name_Table.Get_Identifier; Current_Token := Tok_Identifier; end Scan_Extended_Identifier; procedure Convert_Identifier is procedure Error_Bad is begin Error_Msg_Option ("bad character in identifier"); end Error_Bad; procedure Error_8bit is begin Error_Msg_Option ("8 bits characters not allowed in vhdl87"); end Error_8bit; use Name_Table; C : Character; begin if Nam_Length = 0 then Error_Msg_Option ("identifier required"); return; end if; if Nam_Buffer (1) = '\' then -- Extended identifier. if Vhdl_Std = Vhdl_87 then Error_Msg_Option ("extended identifiers not allowed in vhdl87"); return; end if; if Nam_Length < 3 then Error_Msg_Option ("extended identifier is too short"); return; end if; if Nam_Buffer (Nam_Length) /= '\' then Error_Msg_Option ("extended identifier must finish with a '\'"); return; end if; for I in 2 .. Nam_Length - 1 loop C := Nam_Buffer (I); case Characters_Kind (C) is when Format_Effector => Error_Msg_Option ("format effector in extended identifier"); return; when Graphic_Character => if C = '\' then if Nam_Buffer (I + 1) /= '\' or else I = Nam_Length - 1 then Error_Msg_Option ("anti-slash must be doubled " & "in extended identifier"); return; end if; end if; when Invalid => Error_Bad; end case; end loop; else -- Identifier for I in 1 .. Nam_Length loop C := Nam_Buffer (I); case Characters_Kind (C) is when Upper_Case_Letter => if Vhdl_Std = Vhdl_87 and C > 'Z' then Error_8bit; end if; Nam_Buffer (I) := Ada.Characters.Handling.To_Lower (C); when Lower_Case_Letter | Digit => if Vhdl_Std = Vhdl_87 and C > 'z' then Error_8bit; end if; when Special_Character => -- The current character is legal in an identifier. if C = '_' then if I = 1 then Error_Msg_Option ("identifier cannot start with an underscore"); return; end if; if Nam_Buffer (I - 1) = '_' then Error_Msg_Option ("two underscores can't be consecutive"); return; end if; if I = Nam_Length then Error_Msg_Option ("identifier cannot finish with an underscore"); return; end if; else Error_Bad; end if; when others => Error_Bad; end case; end loop; end if; end Convert_Identifier; -- Scan an identifier within a comment. Only lower case letters are -- allowed. function Scan_Comment_Identifier return Boolean is use Name_Table; Len : Natural; C : Character; begin -- Skip spaces. while Source (Pos) = ' ' or Source (Pos) = HT loop Pos := Pos + 1; end loop; -- The identifier shall start with a lower case letter. if Source (Pos) not in 'a' .. 'z' then return False; end if; -- Scan the identifier (in lower cases). Len := 0; loop C := Source (Pos); exit when C not in 'a' .. 'z' and C /= '_'; Len := Len + 1; Nam_Buffer (Len) := C; Pos := Pos + 1; end loop; -- Shall be followed by a space or a new line. case C is when ' ' | HT | LF | CR => null; when others => return False; end case; Nam_Length := Len; return True; end Scan_Comment_Identifier; -- Scan tokens within a comment. Return TRUE if Current_Token was set, -- return FALSE to discard the comment (ie treat it like a real comment). function Scan_Comment return Boolean is use Std_Names; Id : Name_Id; begin if not Scan_Comment_Identifier then return False; end if; -- Hash it. Id := Name_Table.Get_Identifier; case Id is when Name_Psl => -- Accept tokens after '-- psl'. if Flag_Psl_Comment then Flag_Psl := True; Flag_Scan_In_Comment := True; return True; end if; when others => null; end case; return False; end Scan_Comment; function Scan_Exclam_Mark return Boolean is begin if Source (Pos) = '!' then Pos := Pos + 1; return True; else return False; end if; end Scan_Exclam_Mark; function Scan_Underscore return Boolean is begin if Source (Pos) = '_' then Pos := Pos + 1; return True; else return False; end if; end Scan_Underscore; -- The Scan_Next_Line procedure must be called after each end-of-line to -- register to next line number. This is called by Scan_CR_Newline and -- Scan_LF_Newline. procedure Scan_Next_Line is begin Current_Context.Line_Number := Current_Context.Line_Number + 1; Current_Context.Line_Pos := Pos; File_Add_Line_Number (Current_Context.Source_File, Current_Context.Line_Number, Pos); end Scan_Next_Line; -- Scan a CR end-of-line. procedure Scan_CR_Newline is begin -- Accept CR or CR+LF as line separator. if Source (Pos + 1) = LF then Pos := Pos + 2; else Pos := Pos + 1; end if; Scan_Next_Line; end Scan_CR_Newline; -- Scan a LF end-of-line. procedure Scan_LF_Newline is begin -- Accept LF or LF+CR as line separator. if Source (Pos + 1) = CR then Pos := Pos + 2; else Pos := Pos + 1; end if; Scan_Next_Line; end Scan_LF_Newline; -- Get a new token. procedure Scan is begin if Current_Token /= Tok_Invalid then Current_Context.Prev_Token := Current_Token; end if; << Again >> null; -- Skip commonly used separators. while Source(Pos) = ' ' or Source(Pos) = HT loop Pos := Pos + 1; end loop; Current_Context.Token_Pos := Pos; Current_Context.Identifier := Null_Identifier; case Source (Pos) is when HT | ' ' => -- Must have already been skipped just above. raise Internal_Error; when NBSP => if Vhdl_Std = Vhdl_87 then Error_Msg_Scan ("NBSP character not allowed in vhdl87"); end if; Pos := Pos + 1; goto Again; when VT | FF => Pos := Pos + 1; goto Again; when LF => Scan_LF_Newline; if Flag_Newline then Current_Token := Tok_Newline; return; end if; goto Again; when CR => Scan_CR_Newline; if Flag_Newline then Current_Token := Tok_Newline; return; end if; goto Again; when '-' => if Source (Pos + 1) = '-' then -- This is a comment. -- LRM93 13.8 -- A comment starts with two adjacent hyphens and extends up -- to the end of the line. -- A comment can appear on any line line of a VHDL -- description. -- The presence or absence of comments has no influence on -- wether a description is legal or illegal. -- Futhermore, comments do not influence the execution of a -- simulation module; their sole purpose is the enlightenment -- of the human reader. -- GHDL note: As a consequence, an obfruscating comment -- is out of purpose, and a warning could be reported :-) Pos := Pos + 2; -- Scan inside a comment. So we just ignore the two dashes. if Flag_Scan_In_Comment then goto Again; end if; -- Handle keywords in comment (PSL). if Flag_Comment_Keyword and then Scan_Comment then goto Again; end if; -- LRM93 13.2 -- In any case, a sequence of one or more format -- effectors other than horizontal tabulation must -- cause at least one end of line. while Source (Pos) /= CR and Source (Pos) /= LF and Source (Pos) /= VT and Source (Pos) /= FF and Source (Pos) /= Files_Map.EOT loop if not Flags.Mb_Comment and then Characters_Kind (Source (Pos)) = Invalid then Error_Msg_Scan ("invalid character, even in a comment"); end if; Pos := Pos + 1; end loop; if Flag_Comment then Current_Token := Tok_Comment; return; end if; goto Again; elsif Flag_Psl and then Source (Pos + 1) = '>' then Current_Token := Tok_Minus_Greater; Pos := Pos + 2; return; else Current_Token := Tok_Minus; Pos := Pos + 1; return; end if; when '+' => Current_Token := Tok_Plus; Pos := Pos + 1; return; when '*' => if Source (Pos + 1) = '*' then Current_Token := Tok_Double_Star; Pos := Pos + 2; else Current_Token := Tok_Star; Pos := Pos + 1; end if; return; when '/' => if Source (Pos + 1) = '=' then Current_Token := Tok_Not_Equal; Pos := Pos + 2; elsif Source (Pos + 1) = '*' then -- LRM08 15.9 Comments -- A delimited comment start with a solidus (slash) character -- immediately followed by an asterisk character and extends up -- to the first subsequent occurrence of an asterisk character -- immediately followed by a solidus character. if Vhdl_Std < Vhdl_08 then Error_Msg_Scan ("block comment are not allowed before vhdl 2008"); end if; -- Skip '/*'. Pos := Pos + 2; loop case Source (Pos) is when '/' => -- LRM08 15.9 -- Moreover, an occurrence of a solidus character -- immediately followed by an asterisk character -- within a delimited comment is not interpreted as -- the start of a nested delimited comment. if Source (Pos + 1) = '*' then Warning_Msg_Scan ("'/*' found within a block comment"); end if; Pos := Pos + 1; when '*' => if Source (Pos + 1) = '/' then Pos := Pos + 2; exit; else Pos := Pos + 1; end if; when CR => Scan_CR_Newline; when LF => Scan_LF_Newline; when Files_Map.EOT => if Pos >= Current_Context.File_Len then -- Point at the start of the comment. Error_Msg_Scan ("block comment not terminated at end of file", File_Pos_To_Location (Current_Context.Source_File, Current_Context.Token_Pos)); exit; end if; Pos := Pos + 1; when others => Pos := Pos + 1; end case; end loop; if Flag_Comment then Current_Token := Tok_Comment; return; end if; goto Again; else Current_Token := Tok_Slash; Pos := Pos + 1; end if; return; when '(' => Current_Token := Tok_Left_Paren; Pos := Pos + 1; return; when ')' => Current_Token := Tok_Right_Paren; Pos := Pos + 1; return; when '|' => if Flag_Psl then if Source (Pos + 1) = '|' then Current_Token := Tok_Bar_Bar; Pos := Pos + 2; elsif Source (Pos + 1) = '-' and then Source (Pos + 2) = '>' then Current_Token := Tok_Bar_Arrow; Pos := Pos + 3; elsif Source (Pos + 1) = '=' and then Source (Pos + 2) = '>' then Current_Token := Tok_Bar_Double_Arrow; Pos := Pos + 3; else Current_Token := Tok_Bar; Pos := Pos + 1; end if; else Current_Token := Tok_Bar; Pos := Pos + 1; end if; return; when '!' => if Flag_Psl then Current_Token := Tok_Exclam_Mark; else -- LRM93 13.10 -- A vertical line (|) can be replaced by an exclamation -- mark (!) where used as a delimiter. Current_Token := Tok_Bar; end if; Pos := Pos + 1; return; when ':' => if Source (Pos + 1) = '=' then Current_Token := Tok_Assign; Pos := Pos + 2; else Current_Token := Tok_Colon; Pos := Pos + 1; end if; return; when ';' => Current_Token := Tok_Semi_Colon; Pos := Pos + 1; return; when ',' => Current_Token := Tok_Comma; Pos := Pos + 1; return; when '.' => if Source (Pos + 1) = '.' then -- Be Ada friendly... Error_Msg_Scan ("'..' is invalid in vhdl, replaced by 'to'"); Current_Token := Tok_To; Pos := Pos + 2; return; end if; Current_Token := Tok_Dot; Pos := Pos + 1; return; when '&' => if Flag_Psl and then Source (Pos + 1) = '&' then Current_Token := Tok_And_And; Pos := Pos + 2; else Current_Token := Tok_Ampersand; Pos := Pos + 1; end if; return; when '<' => if Source (Pos + 1) = '=' then Current_Token := Tok_Less_Equal; Pos := Pos + 2; elsif Source (Pos + 1) = '>' then Current_Token := Tok_Box; Pos := Pos + 2; else Current_Token := Tok_Less; Pos := Pos + 1; end if; return; when '>' => if Source (Pos + 1) = '=' then Current_Token := Tok_Greater_Equal; Pos := Pos + 2; else Current_Token := Tok_Greater; Pos := Pos + 1; end if; return; when '=' => if Source (Pos + 1) = '=' then if AMS_Vhdl then Current_Token := Tok_Equal_Equal; else Error_Msg_Scan ("'==' is not the vhdl equality, replaced by '='"); Current_Token := Tok_Equal; end if; Pos := Pos + 2; elsif Source (Pos + 1) = '>' then Current_Token := Tok_Double_Arrow; Pos := Pos + 2; else Current_Token := Tok_Equal; Pos := Pos + 1; end if; return; when ''' => -- Handle cases such as character'('a') -- FIXME: what about f ()'length ? or .all'length if Current_Context.Prev_Token /= Tok_Identifier and then Current_Context.Prev_Token /= Tok_Character and then Source (Pos + 2) = ''' then -- LRM93 13.5 -- A character literal is formed by enclosing one of the 191 -- graphic character (...) between two apostrophe characters. -- CHARACTER_LITERAL ::= ' GRAPHIC_CHARACTER ' if Characters_Kind (Source (Pos + 1)) not in Graphic_Character then Error_Msg_Scan ("a character literal can only be a graphic character"); elsif Vhdl_Std = Vhdl_87 and then Source (Pos + 1) > Character'Val (127) then Error_8bit; end if; Current_Token := Tok_Character; Current_Context.Identifier := Name_Table.Get_Identifier (Source (Pos + 1)); Pos := Pos + 3; return; else Current_Token := Tok_Tick; Pos := Pos + 1; end if; return; when '0' .. '9' => Scan_Literal; -- LRM93 13.2 -- At least one separator is required between an identifier or -- an abstract literal and an adjacent identifier or abstract -- literal. case Characters_Kind (Source (Pos)) is when Digit => raise Internal_Error; when Upper_Case_Letter | Lower_Case_Letter => -- Could call Error_Separator, but use a clearer message -- for this common case. -- Note: the term "unit name" is not correct here, since -- it can be any identifier or even a keyword; however it -- is probably the most common case (eg 10ns). if Vhdl_Std >= Vhdl_08 and then Current_Token = Tok_Integer then Current_Token := Tok_Integer_Letter; else Error_Msg_Scan ("space is required between number and unit name"); end if; when Other_Special_Character => if Vhdl_Std > Vhdl_87 and then Source (Pos) = '\' then -- Start of extended identifier. Error_Separator; end if; when Invalid | Format_Effector | Space_Character | Special_Character => null; end case; return; when '#' => Error_Msg_Scan ("'#' is used for based literals and " & "must be preceded by a base"); -- Cannot easily continue. raise Compilation_Error; when '"' => Scan_String; return; when '%' => if Vhdl_Std >= Vhdl_08 then Error_Msg_Scan ("'%' not allowed in vhdl 2008 (was replacement character)"); -- Continue as a string. end if; Scan_String; return; when '[' => if Flag_Psl then if Source (Pos + 1) = '*' then Current_Token := Tok_Brack_Star; Pos := Pos + 2; elsif Source (Pos + 1) = '+' and then Source (Pos + 2) = ']' then Current_Token := Tok_Brack_Plus_Brack; Pos := Pos + 3; elsif Source (Pos + 1) = '-' and then Source (Pos + 2) = '>' then Current_Token := Tok_Brack_Arrow; Pos := Pos + 3; elsif Source (Pos + 1) = '=' then Current_Token := Tok_Brack_Equal; Pos := Pos + 2; else Current_Token := Tok_Left_Bracket; Pos := Pos + 1; end if; else if Vhdl_Std = Vhdl_87 then Error_Msg_Scan ("'[' is an invalid character in vhdl87, replaced by '('"); Current_Token := Tok_Left_Paren; else Current_Token := Tok_Left_Bracket; end if; Pos := Pos + 1; end if; return; when ']' => if Vhdl_Std = Vhdl_87 and not Flag_Psl then Error_Msg_Scan ("']' is an invalid character in vhdl87, replaced by ')'"); Current_Token := Tok_Right_Paren; else Current_Token := Tok_Right_Bracket; end if; Pos := Pos + 1; return; when '{' => if Flag_Psl then Current_Token := Tok_Left_Curly; else Error_Msg_Scan ("'{' is an invalid character, replaced by '('"); Current_Token := Tok_Left_Paren; end if; Pos := Pos + 1; return; when '}' => if Flag_Psl then Current_Token := Tok_Right_Curly; else Error_Msg_Scan ("'}' is an invalid character, replaced by ')'"); Current_Token := Tok_Right_Paren; end if; Pos := Pos + 1; return; when '\' => if Vhdl_Std = Vhdl_87 then Error_Msg_Scan ("extended identifiers are not allowed in vhdl87"); end if; Scan_Extended_Identifier; return; when '^' => Error_Msg_Scan ("'^' is not a VHDL operator, use 'xor'"); Pos := Pos + 1; Current_Token := Tok_Xor; return; when '~' => Error_Msg_Scan ("'~' is not a VHDL operator, use 'not'"); Pos := Pos + 1; Current_Token := Tok_Not; return; when '?' => if Vhdl_Std < Vhdl_08 then Error_Msg_Scan ("'?' can only be used in strings or comments"); Pos := Pos + 1; goto Again; else if Source (Pos + 1) = '<' then if Source (Pos + 2) = '=' then Current_Token := Tok_Match_Less_Equal; Pos := Pos + 3; else Current_Token := Tok_Match_Less; Pos := Pos + 2; end if; elsif Source (Pos + 1) = '>' then if Source (Pos + 2) = '=' then Current_Token := Tok_Match_Greater_Equal; Pos := Pos + 3; else Current_Token := Tok_Match_Greater; Pos := Pos + 2; end if; elsif Source (Pos + 1) = '?' then Current_Token := Tok_Condition; Pos := Pos + 2; elsif Source (Pos + 1) = '=' then Current_Token := Tok_Match_Equal; Pos := Pos + 2; elsif Source (Pos + 1) = '/' and then Source (Pos + 2) = '=' then Current_Token := Tok_Match_Not_Equal; Pos := Pos + 3; else Error_Msg_Scan ("unknown matching operator"); Pos := Pos + 1; goto Again; end if; end if; return; when '$' | '`' | Inverted_Exclamation .. Inverted_Question | Multiplication_Sign | Division_Sign => Error_Msg_Scan ("character """ & Source (Pos) & """ can only be used in strings or comments"); Pos := Pos + 1; goto Again; when '@' => if Flag_Psl then Current_Token := Tok_Arobase; Pos := Pos + 1; return; else Error_Msg_Scan ("character """ & Source (Pos) & """ can only be used in strings or comments"); Pos := Pos + 1; goto Again; end if; when '_' => Error_Msg_Scan ("an identifier can't start with '_'"); Pos := Pos + 1; goto Again; when 'A' .. 'Z' | 'a' .. 'z' => Scan_Identifier; return; when UC_A_Grave .. UC_O_Diaeresis | UC_O_Oblique_Stroke .. UC_Icelandic_Thorn => if Vhdl_Std = Vhdl_87 then Error_Msg_Scan ("upper case letters above 128 are not allowed in vhdl87"); end if; Scan_Identifier; return; when LC_German_Sharp_S .. LC_O_Diaeresis | LC_O_Oblique_Stroke .. LC_Y_Diaeresis => if Vhdl_Std = Vhdl_87 then Error_Msg_Scan ("lower case letters above 128 are not allowed in vhdl87"); end if; Scan_Identifier; return; when NUL .. ETX | ENQ .. BS | SO .. US | DEL .. APC => Error_Msg_Scan ("control character that is not CR, LF, FF, HT or VT " & "is not allowed"); Pos := Pos + 1; goto Again; when Files_Map.EOT => if Pos >= Current_Context.File_Len then -- FIXME: should conditionnaly emit a warning if the file -- is not terminated by an end of line. Current_Token := Tok_Eof; else Error_Msg_Scan ("EOT is not allowed inside the file"); Pos := Pos + 1; goto Again; end if; return; end case; end Scan; function Get_Token_Location return Location_Type is begin return File_Pos_To_Location (Current_Context.Source_File, Current_Context.Token_Pos); end Get_Token_Location; end Scanner;