libadalang_24.0.0_a1358075/src/libadalang-pp_lexer.adb

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
--
--  Copyright (C) 2014-2022, AdaCore
--  SPDX-License-Identifier: Apache-2.0
--

with Ada.Characters.Handling; use Ada.Characters.Handling;
with Ada.Exceptions;          use Ada.Exceptions;
with Ada.IO_Exceptions;

with GNATCOLL.Mmap;

with Langkit_Support.Errors; use Langkit_Support.Errors;

package body Libadalang.PP_Lexer is

   subtype Symbol_Set is Character
   with Static_Predicate =>
     Symbol_Set in 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_';

   subtype Chars_Sequence_Set is Character
   with Static_Predicate => Chars_Sequence_Set in Symbol_Set | '.';

   ----------
   -- Read --
   ----------

   function Read (Filename : String) return String_Access is
      use GNATCOLL.Mmap;

      File   : Mapped_File;
      Region : Mapped_Region;
      Buffer : Str_Access;
   begin
      begin
         File := Open_Read (Filename);
      exception
         when Exc : Ada.IO_Exceptions.Name_Error =>
            raise File_Read_Error with Exception_Message (Exc);
      end;
      Region := Read (File);
      Buffer := Data (Region);

      return Result : constant String_Access := new String (1 .. Last (Region))
      do
         Result.all := Buffer.all (Result.all'Range);
         Free (Region);
         Close (File);
      end return;
   end Read;

   -------------------------
   -- Is_Valid_Identifier --
   -------------------------

   function Is_Valid_Identifier (Text : String) return Boolean is
      Last_Was_Underscore : Boolean := False;
   begin
      if Text = "" then
         return False;
      elsif Text (Text'First) = '_'
         or else Text (Text'Last) = '_'
         or else Text (Text'First) in '0' .. '9'
      then
         return False;
      end if;

      for I in Text'Range loop
         if Text (I) = '_' then
            if Last_Was_Underscore then
               return False;
            else
               Last_Was_Underscore := True;
            end if;

         elsif Text (I) not in 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' then
            return False;

         else
            Last_Was_Underscore := False;
         end if;
      end loop;

      return True;
   end Is_Valid_Identifier;

   ------------------
   -- Create_Lexer --
   ------------------

   function Create_Lexer (Diagnostic_Filename, Filename : String) return Lexer
   is
   begin
      return Result : Lexer do
         Result.Filename := US.To_Unbounded_String (Diagnostic_Filename);
         Result.Buffer := Read (Filename);
         Result.Next_Char := Result.Buffer.all'First;
         Result.Next_Sloc := (1, 1);
      end return;
   end Create_Lexer;

   ----------
   -- Next --
   ----------

   procedure Next (Self : in out Lexer; Token : out Token_Type) is

      type Peeked_Char is record
         EOF   : Boolean;
         Value : Character;
      end record;

      function Peek return Peeked_Char;

      procedure Next_Char;

      procedure Scan_String_Literal;

      procedure Error (Message : String) with No_Return;

      PC : Peeked_Char;

      ----------
      -- Peek --
      ----------

      function Peek return Peeked_Char is
      begin
         if Self.Next_Char > Self.Buffer.all'Last then
            return (EOF => True, Value => ASCII.NUL);
         else
            return (EOF => False, Value => Self.Buffer.all (Self.Next_Char));
         end if;
      end Peek;

      ---------------
      -- Next_Char --
      ---------------

      procedure Next_Char is
      begin
         pragma Assert (Self.Next_Char <= Self.Buffer.all'Last);
         Token.Last := Self.Next_Char;
         if Self.Buffer.all (Self.Next_Char) = ASCII.LF then
            Self.Next_Sloc.Line := Self.Next_Sloc.Line + 1;
            Self.Next_Sloc.Column := 1;
         else
            Self.Next_Sloc.Column := Self.Next_Sloc.Column + 1;
         end if;
         Self.Next_Char := Self.Next_Char + 1;
      end Next_Char;

      -------------------------
      -- Scan_String_Literal --
      -------------------------

      procedure Scan_String_Literal is
      begin
         loop
            PC := Peek;
            Next_Char;
            if PC.EOF or else PC.Value = ASCII.LF then
               Error ("unterminated string literal");
            elsif PC.Value = '"' then
               PC := Peek;
               if not PC.EOF and then PC.Value = '"' then
                  Next_Char;
                  PC := Peek;
               else
                  return;
               end if;
            end if;
         end loop;
      end Scan_String_Literal;

      -----------
      -- Error --
      -----------

      procedure Error (Message : String) is
      begin
         raise Syntax_Error with
            US.To_String (Self.Filename)
            & ":" & Image (Self.Next_Sloc)
            & ": " & Message;
      end Error;
   begin
      loop
         Token.First := Self.Next_Char;
         Token.Sloc := Self.Next_Sloc;

         PC := Peek;
         if PC.EOF then
            Token.Kind := EOF;
            return;
         end if;
         Next_Char;

         case PC.Value is
            when ASCII.LF =>
               Token.Kind := EOL;
               return;

            when ' ' | ASCII.CR =>
               null;

            when '-' =>
               PC := Peek;
               if PC.EOF then
                  Error ("stray dash");

               elsif PC.Value = '-' then
                  while not PC.EOF and then PC.Value /= ASCII.LF loop
                     Next_Char;
                     PC := Peek;
                  end loop;

               --  Past this point, we can only be parsing a gnatprep switch

               elsif PC.Value not in 'a' .. 'z' | 'A' .. 'Z' then
                  Error ("stray dash");
               else
                  Token.Kind := Switch;

                  --  First take all alphanumeric characters possible (switch
                  --  name plus potentially a symbol name).

                  loop
                     PC := Peek;
                     if not PC.EOF and then PC.Value = '=' then
                        Next_Char;
                        exit;

                     elsif PC.EOF or else PC.Value not in Symbol_Set then
                        return;
                     end if;

                     Next_Char;
                  end loop;

                  --  We just got a "=" after the switch name: we expect either
                  --  nothing, the equivalent of a chars sequence, or a string
                  --  literal.

                  PC := Peek;
                  if PC.EOF or else PC.Value = ASCII.LF then
                     null;

                  elsif PC.Value in Chars_Sequence_Set then
                     while not PC.EOF and then PC.Value in Chars_Sequence_Set
                     loop
                        Next_Char;
                        PC := Peek;
                     end loop;

                  elsif PC.Value = '"' then
                     Next_Char;
                     Scan_String_Literal;
                  end if;

                  return;
               end if;

            when ':' =>
               PC := Peek;
               if PC.EOF then
                  Error ("stray colon");

               elsif PC.Value = '=' then
                  Token.Kind := Assign;
                  Next_Char;
                  return;

               else
                  Error ("invalid token");
               end if;

            when '*' =>
               Token.Kind := Star;
               return;

            when '"' =>
               Token.Kind := String_Literal;
               Scan_String_Literal;
               return;

            when Chars_Sequence_Set =>
               Token.Kind := Chars_Sequence;
               loop
                  PC := Peek;
                  if PC.EOF or else PC.Value not in Chars_Sequence_Set then
                     return;
                  end if;
                  Next_Char;
               end loop;

            when others =>
               Error ("invalid token");
         end case;
      end loop;
   end Next;

   -------------------------
   -- Is_Valid_Identifier --
   -------------------------

   function Is_Valid_Identifier
     (Self : Lexer; Token : Token_Type) return Boolean is
   begin
      return Is_Valid_Identifier (Self.Buffer.all (Token.First .. Token.Last));
   end Is_Valid_Identifier;

   --------------
   -- Get_Text --
   --------------

   function Get_Text
     (Self : Lexer; Token : Token_Type) return US.Unbounded_String is
   begin
      return US.To_Unbounded_String
        (Self.Buffer.all (Token.First .. Token.Last));
   end Get_Text;

   ------------------------
   -- Get_Text_Lowercase --
   ------------------------

   function Get_Text_Lowercase
     (Self : Lexer; Token : Token_Type) return US.Unbounded_String is
   begin
      return US.To_Unbounded_String
        (To_Lower (Self.Buffer.all (Token.First .. Token.Last)));
   end Get_Text_Lowercase;

   --------------------
   -- Denoted_String --
   --------------------

   function Denoted_String
     (Self : Lexer; Token : Token_Type) return US.Unbounded_String
   is
      Text   : String renames Self.Buffer.all (Token.First .. Token.Last);
      Result : US.Unbounded_String;
      I      : Positive := Text'First + 1;
   begin
      pragma Assert (Token.Kind = String_Literal);
      pragma Assert (Text'Length >= 2);
      pragma Assert (Text (Text'First) = '"' and then Text (Text'Last) = '"');
      while I < Text'Last loop
         US.Append (Result, Text (I));
         I := I + (if Text (I) = '"' then 2 else 1);
      end loop;
      return Result;
   end Denoted_String;

   --------------
   -- Finalize --
   --------------

   overriding procedure Finalize (Self : in out Lexer) is
   begin
      Free (Self.Buffer);
   end Finalize;

end Libadalang.PP_Lexer;