-- SPDX-License-Identifier: Apache-2.0 -- -- Copyright (c) 2016 onox -- -- Licensed under the Apache License, Version 2.0 (the "License"); -- you may not use this file except in compliance with the License. -- You may obtain a copy of the License at -- -- http://www.apache.org/licenses/LICENSE-2.0 -- -- Unless required by applicable law or agreed to in writing, software -- distributed under the License is distributed on an "AS IS" BASIS, -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- See the License for the specific language governing permissions and -- limitations under the License. with Ada.Characters.Latin_1; with Ada.IO_Exceptions; with Ada.Strings.Bounded; package body JSON.Tokenizers is procedure Read_String (Stream : in out Streams.Stream; Next_Token : out Token) is C : Character; Index, Length : Streams.AS.Stream_Element_Offset := 0; Escaped : Boolean := False; use type Streams.AS.Stream_Element_Offset; use Ada.Characters.Latin_1; begin loop C := Stream.Read_Character (Index); -- An unescaped '"' character denotes the end of the string exit when not Escaped and C = '"'; Length := Length + 1; if Escaped then case C is when '"' | '\' | '/' | 'b' | 'f' | 'n' | 'r' | 't' => null; when 'u' => -- TODO Support escaped unicode raise Program_Error with "Escaped unicode not supported yet"; when others => raise Tokenizer_Error with "Unexpected escaped character in string"; end case; elsif C /= '\' then -- Check C is not a control character if C in NUL .. US then raise Tokenizer_Error with "Unexpected control character in string"; end if; end if; Escaped := not Escaped and C = '\'; end loop; Next_Token := Token' (Kind => String_Token, String_Offset => Index - Length, String_Length => Length); end Read_String; procedure Test_Leading_Zeroes (First : Character; Value : String) is Leading_Zero_Message : constant String := "Leading zeroes in number are not allowed"; Minus_Digit_Message : constant String := "Expected at least one digit after - sign"; begin if First = '-' then if Value'Length >= 3 and then Value (Value'First .. Value'First + 1) = "-0" then raise Tokenizer_Error with Leading_Zero_Message; elsif Value'Length = 1 then raise Tokenizer_Error with Minus_Digit_Message; end if; elsif First = '0' and Value'Length >= 2 then raise Tokenizer_Error with Leading_Zero_Message; end if; end Test_Leading_Zeroes; procedure Read_Number (Stream : in out Streams.Stream; First : Character; Next_Token : out Token) is package SB is new Ada.Strings.Bounded.Generic_Bounded_Length (Max => Types.Maximum_String_Length_Numbers); Value : SB.Bounded_String; C : Character; Is_Float, Checked_Leading_Zeroes : Boolean := False; Error_Dot_Message : constant String := "Number must contain at least one digit after decimal point"; Error_Exp_Message : constant String := "Expected optional +/- sign after e/E and then at least one digit"; Error_Plus_Message : constant String := "Prefixing number with '+' character is not allowed"; Error_One_Digit_Message : constant String := "Expected at least one digit after +/- sign in number"; Error_Length_Message : constant String := "Number is longer than" & Types.Maximum_String_Length_Numbers'Image & " characters"; procedure Create_Token_From_Number is Number : constant String := SB.To_String (Value); begin if Is_Float then Next_Token := Token'(Kind => Float_Token, Float_Value => Types.Float_Type'Value (Number)); else Next_Token := Token'(Kind => Integer_Token, Integer_Value => Types.Integer_Type'Value (Number)); end if; end Create_Token_From_Number; begin if First = '+' then raise Tokenizer_Error with Error_Plus_Message; end if; SB.Append (Value, First); -- Accept sequence of digits, including leading zeroes loop C := Stream.Read_Character; exit when C not in '0' .. '9'; SB.Append (Value, C); end loop; -- Test whether value contains leading zeroes Test_Leading_Zeroes (First, SB.To_String (Value)); Checked_Leading_Zeroes := True; -- Tokenize fraction part if C = '.' then Is_Float := True; -- Append the dot SB.Append (Value, C); -- Require at least one digit after decimal point begin C := Stream.Read_Character; if C not in '0' .. '9' then raise Tokenizer_Error with Error_Dot_Message; end if; SB.Append (Value, C); exception when Ada.IO_Exceptions.End_Error => raise Tokenizer_Error with Error_Dot_Message; end; -- Accept sequence of digits loop C := Stream.Read_Character; exit when C not in '0' .. '9'; SB.Append (Value, C); end loop; end if; -- Tokenize exponent part if C in 'e' | 'E' then -- Append the 'e' or 'E' character SB.Append (Value, C); begin C := Stream.Read_Character; -- Append optional '+' or '-' character if C in '+' | '-' then -- If exponent is negative, number will be a float if C = '-' then Is_Float := True; end if; SB.Append (Value, C); -- Require at least one digit after +/- sign C := Stream.Read_Character; if C not in '0' .. '9' then raise Tokenizer_Error with Error_One_Digit_Message; end if; SB.Append (Value, C); elsif C in '0' .. '9' then SB.Append (Value, C); else raise Tokenizer_Error with Error_Exp_Message; end if; exception when Ada.IO_Exceptions.End_Error => raise Tokenizer_Error with Error_Exp_Message; end; -- Accept sequence of digits loop C := Stream.Read_Character; exit when C not in '0' .. '9'; SB.Append (Value, C); end loop; end if; Create_Token_From_Number; Stream.Write_Character (C); exception -- End_Error is raised if the number if followed by an EOF when Ada.IO_Exceptions.End_Error => -- Test whether value contains leading zeroes if not Checked_Leading_Zeroes then Test_Leading_Zeroes (First, SB.To_String (Value)); end if; Create_Token_From_Number; when Ada.Strings.Length_Error => raise Tokenizer_Error with Error_Length_Message; end Read_Number; procedure Read_Literal (Stream : in out Streams.Stream; First : Character; Next_Token : out Token) is package SB is new Ada.Strings.Bounded.Generic_Bounded_Length (Max => 5); Value : SB.Bounded_String; C : Character; Unexpected_Literal_Message : constant String := "Expected literal 'true', 'false', or 'null'"; procedure Create_Token_From_Literal is Literal : constant String := SB.To_String (Value); begin if Literal = "true" then Next_Token := Token'(Kind => Boolean_Token, Boolean_Value => True); elsif Literal = "false" then Next_Token := Token'(Kind => Boolean_Token, Boolean_Value => False); elsif Literal = "null" then Next_Token := Token'(Kind => Null_Token); else raise Tokenizer_Error with Unexpected_Literal_Message; end if; end Create_Token_From_Literal; begin SB.Append (Value, First); loop C := Stream.Read_Character; exit when C not in 'a' .. 'z' or else SB.Length (Value) = SB.Max_Length; SB.Append (Value, C); end loop; Create_Token_From_Literal; Stream.Write_Character (C); exception -- End_Error is raised if the literal if followed by an EOF when Ada.IO_Exceptions.End_Error => Create_Token_From_Literal; when Ada.Strings.Length_Error => raise Tokenizer_Error with Unexpected_Literal_Message; end Read_Literal; procedure Read_Token (Stream : in out Streams.Stream; Next_Token : out Token; Expect_EOF : Boolean := False) is C : Character; use Ada.Characters.Latin_1; begin loop -- Read the first next character and decide which token it could be part of C := Stream.Read_Character; -- Skip whitespace exit when C not in Space | HT | LF | CR; end loop; if Expect_EOF then raise Tokenizer_Error with "Expected to read EOF"; end if; case C is when '[' => Next_Token := Token'(Kind => Begin_Array_Token); when '{' => Next_Token := Token'(Kind => Begin_Object_Token); when ']' => Next_Token := Token'(Kind => End_Array_Token); when '}' => Next_Token := Token'(Kind => End_Object_Token); when ':' => Next_Token := Token'(Kind => Name_Separator_Token); when ',' => Next_Token := Token'(Kind => Value_Separator_Token); when '"' => Read_String (Stream, Next_Token); when '0' .. '9' | '+' | '-' => Read_Number (Stream, C, Next_Token); when 'a' .. 'z' => Read_Literal (Stream, C, Next_Token); when others => raise Tokenizer_Error with "Unexpected character"; end case; exception when Ada.IO_Exceptions.End_Error => if Expect_EOF then Next_Token := Token'(Kind => EOF_Token); else raise Tokenizer_Error with "Unexpectedly read EOF"; end if; end Read_Token; end JSON.Tokenizers;