-- -- Copyright (C) 2021-2023, AdaCore -- -- SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -- with Ada.Strings.UTF_Encoding.Wide_Wide_Strings; package body UCD.Data_File_Loaders is use Ada.Strings.UTF_Encoding.Wide_Wide_Strings; procedure Scan_Next_Line (Self : in out File_Loader'Class); function To_Code_Point (Item : Wide_Wide_String) return UCD.Code_Point; function Parse_Sequence_Of_Code_Unit (Buffer : Wide_Wide_String) return UCD.Code_Point_Vectors.Vector; ----------- -- Close -- ----------- procedure Close (Self : in out File_Loader) is begin if Is_Open (Self.File) then Close (Self.File); end if; end Close; ----------------- -- End_Of_File -- ----------------- function End_Of_File (Self : File_Loader) return Boolean is begin return not Is_Open (Self.File) or Self.Buffer'First > Self.Line_Last; end End_Of_File; -------------- -- Finalize -- -------------- overriding procedure Finalize (Self : in out File_Loader) is begin Self.Close; end Finalize; -------------------------- -- Get_Code_Point_Range -- -------------------------- procedure Get_Code_Point_Range (Self : in out File_Loader; First_Code : out UCD.Code_Point; Last_Code : out UCD.Code_Point) is function Is_Start_Of_Unicode_Data_Range return Boolean; -- Returns True when current line opens range in UnicodeData.txt format, -- thus first field ends with ", First>" string. ------------------------------------ -- Is_Start_Of_Unicode_Data_Range -- ------------------------------------ function Is_Start_Of_Unicode_Data_Range return Boolean is Buffer : constant Wide_Wide_String := Self.Get_Field (1); Suffix : constant Wide_Wide_String := ", First>"; begin return Buffer'Length > Suffix'Length and then Buffer (Buffer'First) = '<' and then Buffer (Buffer'Last - Suffix'Length + 1 .. Buffer'Last) = Suffix; end Is_Start_Of_Unicode_Data_Range; begin if Is_Start_Of_Unicode_Data_Range then First_Code := To_Code_Point (Self.Get_Field (0)); Self.Skip_Line; Last_Code := To_Code_Point (Self.Get_Field (0)); else declare Buffer : constant Wide_Wide_String := Self.Get_Field (0); Current : Positive; First : Positive; Last : Natural; begin First := Buffer'First; Last := Buffer'Last; Current := First; while Current <= Buffer'Last loop if Buffer (Current) not in '0' .. '9' | 'A' .. 'F' then Last := Current - 1; exit; end if; Current := Current + 1; end loop; First_Code := To_Code_Point (Buffer (First .. Last)); Last_Code := First_Code; if Last = Buffer'Last then return; end if; if Last + 3 < Buffer'Last and then Buffer (Last + 1) /= '.' and then Buffer (Last + 2) /= '.' then raise Program_Error; end if; First := Last + 3; Last := Buffer'Last; Current := First; while Current <= Buffer'Last loop if Buffer (Current) not in '0' .. '9' | 'A' .. 'F' then Last := Current - 1; exit; end if; Current := Current + 1; end loop; Last_Code := To_Code_Point (Buffer (First .. Last)); if Last /= Buffer'Last then raise Program_Error; end if; end; end if; end Get_Code_Point_Range; --------------- -- Get_Field -- --------------- function Get_Field (Self : File_Loader; Index : Field_Index) return Wide_Wide_String is begin return Self.Buffer (Self.Fields (Index).First .. Self.Fields (Index).Last); end Get_Field; --------------- -- Get_Field -- --------------- function Get_Field (Self : File_Loader; Index : Field_Index) return UCD.Code_Point_Vectors.Vector is begin return Parse_Sequence_Of_Code_Unit (Self.Get_Field (Index)); end Get_Field; --------------- -- Get_Field -- --------------- procedure Get_Field (Self : File_Loader; Index : Field_Index; Tag : out Unbounded_Wide_Wide_String; Data : out UCD.Code_Point_Vectors.Vector) is Buffer : constant Wide_Wide_String := Self.Get_Field (Index); First : Positive; Current : Positive; begin if Buffer'First > Buffer'Last then Tag := Null_Unbounded_Wide_Wide_String; Data := UCD.Code_Point_Vectors.Empty_Vector; return; end if; First := Buffer'First; if Buffer (First) = '<' then Current := First + 1; while Buffer (Current) /= '>' loop Current := Current + 1; end loop; Tag := To_Unbounded_Wide_Wide_String (Buffer (First .. Current)); First := Current + 1; -- Skip spaces for J in First .. Buffer'Last loop First := J; exit when Buffer (J) /= ' '; end loop; else Tag := Null_Unbounded_Wide_Wide_String; end if; Data := Parse_Sequence_Of_Code_Unit (Buffer (First .. Buffer'Last)); end Get_Field; --------------- -- Has_Field -- --------------- function Has_Field (Self : File_Loader; Index : Field_Index) return Boolean is begin return Self.Fields (Index).First /= Self.Buffer'First or else Self.Fields (Index).First <= Self.Fields (Index).Last; end Has_Field; ---------- -- Open -- ---------- procedure Open (Self : in out File_Loader; UCD_Root : Wide_Wide_String; File_Name : Wide_Wide_String) is begin Put_Line ("Loading " & File_Name & "..."); Open (Self.File, In_File, Encode (UCD_Root & '/' & File_Name), "wcem=8"); Self.Scan_Next_Line; end Open; --------------------------------- -- Parse_Sequence_Of_Code_Unit -- --------------------------------- function Parse_Sequence_Of_Code_Unit (Buffer : Wide_Wide_String) return UCD.Code_Point_Vectors.Vector is First : Positive; Last : Positive; Current : Positive; begin return Result : UCD.Code_Point_Vectors.Vector do First := Buffer'First; while First <= Buffer'Last loop Last := Buffer'Last; Current := First; while Current <= Buffer'Last loop if Buffer (Current) not in '0' .. '9' | 'A' .. 'F' then Last := Current - 1; exit; end if; Current := Current + 1; end loop; Result.Append (To_Code_Point (Buffer (First .. Last))); First := Last + 1; -- Skip spaces for J in First .. Buffer'Last loop First := J; exit when Buffer (J) /= ' '; end loop; end loop; end return; end Parse_Sequence_Of_Code_Unit; -------------------- -- Scan_Next_Line -- -------------------- procedure Scan_Next_Line (Self : in out File_Loader'Class) is Current_Field : Field_Index := Field_Index'First; Field_First : Positive; Field_Last : Natural; Current : Positive; Has_Separator : Boolean; begin Self.Fields := [others => (1, 0)]; loop Self.Line_Last := 0; exit when End_Of_File (Self.File); Get_Line (Self.File, Self.Buffer, Self.Line_Last); -- Remove comments for J in Self.Buffer'First .. Self.Line_Last loop if Self.Buffer (J) = '#' then Self.Line_Last := J - 1; exit; end if; end loop; if Self.Buffer'First <= Self.Line_Last then Current := Self.Buffer'First; loop Field_First := Current; Field_Last := Self.Line_Last; Has_Separator := False; for J in Field_First .. Self.Line_Last loop if Self.Buffer (J) = ';' then Field_Last := J - 1; Current := J + 1; Has_Separator := True; exit; end if; end loop; -- Remove leading spaces for J in Field_First .. Field_Last loop Field_First := J; exit when Self.Buffer (J) /= ' '; end loop; -- Remove trailing spaces if Field_First = Field_Last and then Self.Buffer (Field_Last) = ' ' then Field_First := Field_First + 1; else for J in reverse Field_First .. Field_Last loop Field_Last := J; exit when Self.Buffer (J) /= ' '; end loop; end if; Self.Fields (Current_Field) := (Field_First, Field_Last); Current_Field := Current_Field + 1; exit when not Has_Separator; end loop; exit; end if; end loop; end Scan_Next_Line; --------------- -- Skip_Line -- --------------- procedure Skip_Line (Self : in out File_Loader) is begin Self.Scan_Next_Line; end Skip_Line; ------------------- -- To_Code_Point -- ------------------- function To_Code_Point (Item : Wide_Wide_String) return UCD.Code_Point is begin return Result : UCD.Code_Point := 0 do for J in Item'Range loop Result := Result * 16; case Item (J) is when '0' .. '9' => Result := Result + Wide_Wide_Character'Pos (Item (J)) - Wide_Wide_Character'Pos ('0'); when 'A' .. 'F' => Result := Result + Wide_Wide_Character'Pos (Item (J)) - Wide_Wide_Character'Pos ('A') + 10; when others => raise Constraint_Error; end case; end loop; end return; end To_Code_Point; end UCD.Data_File_Loaders;