xmlada_24.0.0_ae5a015b/unicode/importer/convert.adb

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
------------------------------------------------------------------------------
--                     XML/Ada - An XML suite for Ada95                     --
--                                                                          --
--                     Copyright (C) 2016, Nicolas Boulenguez               --
--                     Copyright (C) 2016-2022, AdaCore                     --
--                                                                          --
-- This library is free software;  you can redistribute it and/or modify it --
-- under terms of the  GNU General Public License  as published by the Free --
-- Software  Foundation;  either version 3,  or (at your  option) any later --
-- version. This library is distributed in the hope that it will be useful, --
-- but WITHOUT ANY WARRANTY;  without even the implied warranty of MERCHAN- --
-- TABILITY or FITNESS FOR A PARTICULAR PURPOSE.                            --
--                                                                          --
-- As a special exception under Section 7 of GPL version 3, you are granted --
-- additional permissions described in the GCC Runtime Library Exception,   --
-- version 3.1, as published by the Free Software Foundation.               --
--                                                                          --
-- You should have received a copy of the GNU General Public License and    --
-- a copy of the GCC Runtime Library Exception along with this program;     --
-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
-- <http://www.gnu.org/licenses/>.                                          --
--                                                                          --
------------------------------------------------------------------------------

with Ada.Command_Line;
with Ada.Containers.Indefinite_Vectors;
with Ada.Containers.Vectors;
with Ada.Strings.Bounded;
with Ada.Strings.Fixed;
with Ada.Strings.Maps.Constants;
with Ada.Text_IO;

with Translators.Alias;
with Translators.Block;

procedure Convert is

   use Ada.Text_IO;
   use type Translators.A_Translation;
   package ASB is new Ada.Strings.Bounded.Generic_Bounded_Length (256);

   --  Must be compiled with gnata and called with three paths.
   pragma Assert (Ada.Command_Line.Argument_Count = 3);
   Path_To_Blocks_Txt       : String renames Ada.Command_Line.Argument (1);
   Path_To_Name_Aliases_Txt : String renames Ada.Command_Line.Argument (2);
   Path_To_Unicode_Data_Txt : String renames Ada.Command_Line.Argument (3);

   Path_To_License : constant String := "license.txt";
   Output_Dir : constant String := "generated/";

   type A_Code is range 0 .. 16#10FFFF# + 1;

   function Value (Hexadecimal_Digits : String) return A_Code;
   --  The given string must only contain hexadecimal_digits.

   function Image (Code : A_Code) return String;
   --  16#4_hexadecimal_digits# if Code <= 16#FFFF#,
   --  else add the required digits count.

   package Code_IO is new Integer_IO (A_Code);
   --  Default base is set to 16.

   package Translation_Vectors is new Ada.Containers.Indefinite_Vectors
     (Positive, Translators.A_Translation);

   type A_Point is record
      Code  : A_Code;
      Names : Translation_Vectors.Vector;
   end record;

   package Point_Vectors is new Ada.Containers.Vectors (Positive, A_Point);

   procedure Parse_Block_Line (Line : String);
   procedure Process_Block (Start_Code : A_Code;
                            End_Code   : A_Code;
                            Block_Name : String)
     with Pre => Start_Code <= End_Code;
   procedure Output_Ada_Package (Block_Name : String;
                                 Points     : Point_Vectors.Vector);
   procedure Put_Maybe_Split (File             : File_Type;
                              Before_Semicolon : String;
                              After_Semicolon  : String);
   procedure Put_Unused_Exception (Replaced    : String;
                                   Replacement : String);

   type A_Name_File is record
      File : File_Type;
      Code : A_Code;
      Name : ASB.Bounded_String;
   end record;

   procedure Next (Name_File : in out A_Name_File);

   ----------------------------------------------------------------------

   function Image (Code : A_Code) return String is
      Tmp : String (1 .. 3 + 32 / 4 + 1);
      I   : Integer := Tmp'Last - 1;
   begin
      Code_IO.Put (Tmp, Code, Base => 16);
      while Tmp (I) /= '#' loop
         I := I - 1;
      end loop;
      return Tmp (I - 2 .. I) & (Tmp'Last .. I + 4 => '0')
        & Tmp (I + 1 .. Tmp'Last);
   end Image;

   procedure Next (Name_File : in out A_Name_File) is
   begin
      while not End_Of_File (Name_File.File) loop
         declare
            Line : constant String := Get_Line (Name_File.File);
         begin
            if Line'Length /= 0 and then Line (Line'First) /= '#' then
               declare
                  I : constant Natural := Ada.Strings.Fixed.Index (Line, ";");
                  J : constant Natural := Ada.Strings.Fixed.Index (Line, ";",
                                                                   I + 1);
                  pragma Assert (I in 2 .. J - 2);
               begin
                  Name_File.Code := Value (Line (Line'First .. I - 1));
                  ASB.Set_Bounded_String (Name_File.Name,
                                          Line (I + 1 .. J - 1));
                  exit;
               end;
            end if;
         end;
      end loop;
   end Next;

   procedure Parse_Block_Line (Line : String) is
      First_Dot : Integer;
      Semicolon : Integer;
      I         : Integer := Line'First;
   begin
      if I <= Line'Last and then Line (I) /= '#' then
         while Line (I) in '0' .. '9' | 'A' .. 'F' loop
            I := I + 1;
         end loop;
         First_Dot := I;
         pragma Assert (Line'First + 4 <= First_Dot
                          and Line (First_Dot .. First_Dot + 1) = "..");
         I := I + 2;
         while Line (I) in  '0' .. '9' | 'A' .. 'F' loop
            I := I + 1;
         end loop;
         Semicolon := I;
         pragma Assert (First_Dot + 5 < Semicolon
                          and Line (Semicolon .. Semicolon + 1) = "; ");
         Process_Block
           (Start_Code => Value (Line (Line'First .. First_Dot - 1)),
            End_Code   => Value (Line (First_Dot + 2 .. Semicolon - 1)),
            Block_Name => Line (Semicolon + 2 .. Line'Last));
      end if;
   end Parse_Block_Line;

   procedure Put_Maybe_Split (File             : File_Type;
                              Before_Semicolon : String;
                              After_Semicolon  : String) is
      S : constant String := "   " & Before_Semicolon
        & (Before_Semicolon'Length + 1 .. 39 => ' ') & " :";
   begin
      if S'Length + 1 + After_Semicolon'Length <= 79 then
         Put_Line (File, S & ' ' & After_Semicolon);
      else
         Put_Line (File, S);
         Put_Line (File, "      " & After_Semicolon);
      end if;
   end Put_Maybe_Split;

   procedure Put_Unused_Exception (Replaced    : String;
                                   Replacement : String) is
   begin
      Put_Line
        ("Unused exception: " & Replaced & " -> " & Replacement);
   end Put_Unused_Exception;

   function Value (Hexadecimal_Digits : String) return A_Code is
   begin
      return A_Code'Value ("16#" & Hexadecimal_Digits & '#');
   end Value;

   ----------------------------------------------------------------------

   --  Now it is convenient to share some variable among the last procedures.

   Name_Aliases     : A_Name_File;
   Unicode_Data     : A_Name_File;
   Alias_Translator : Translators.Alias.An_Alias_Translator;
   Block_Translator : Translators.Block.A_Block_Translator;
   Unicode_Version  : ASB.Bounded_String;

   procedure Output_Ada_Package (Block_Name : String;
                                 Points     : Point_Vectors.Vector) is
      Pkg  : constant String := Block_Translator.Translated
        (Block_Translator.New_Translation (Block_Name));
      File :  File_Type;
      License : File_Type;
   begin
      --  On VMS, Filename lengths are limited to 39.39 characters.
      pragma Assert (14 + Pkg'Length <= 39, "file name too long: " & Pkg);
      Create (File, Out_File,
              Output_Dir & "unicode-names-" & Ada.Strings.Fixed.Translate
                (Pkg, Ada.Strings.Maps.Constants.Lower_Case_Map) & ".ads");
      Put_Line (File,
                "--  This file is built automatically from data found on the");
      Put_Line (File, "--  unicode web site (http://www.unicode.org)");
      Put (File, "--  in version ");
      Put (File, ASB.To_String (Unicode_Version));
      Put (File, " and thus is a subject to unicode license:");
      New_Line (File);

      Open (License, In_File, Path_To_License);
      while not End_Of_File (License) loop
         Put_Line (File, Get_Line (License));
      end loop;
      Close (License);

      Put (File, "package Unicode.Names.");
      Put (File, Pkg);
      Put (File, " is");
      New_Line (File);
      Put_Line (File, "   pragma Preelaborate;");
      Put_Line (File, "   pragma Style_Checks (Off);");
      New_Line (File);
      for Point of Points loop
         if not Point.Names.Is_Empty then
            if Translators.Is_Exception (Point.Names.Element (1)) then
               Put (File, "   --  Real Unicode name is ");
               Put (File, Translators.Original (Point.Names.Element (1)));
               New_Line (File);
            end if;
            Put_Maybe_Split
              (File, Alias_Translator.Translated (Point.Names.Element (1)),
               "constant Unicode_Char := " & Image (Point.Code) & ";");
            for A in 2 .. Integer (Point.Names.Length) loop
               if Translators.Is_Exception (Point.Names.Element (A)) then
                  Put (File, "   --  Real Unicode name is ");
                  Put (File, Translators.Original (Point.Names.Element (A)));
                  New_Line (File);
               end if;
               Put_Maybe_Split
                 (File,
                  Alias_Translator.Translated (Point.Names.Element (A)),
                  "Unicode_Char renames "
                    & Alias_Translator.Translated (Point.Names.Element (1))
                    & ';');
            end loop;
         end if;
      end loop;
      Put (File, "end Unicode.Names.");
      Put (File, Pkg);
      Put (File, ";");
      New_Line (File);
      Close (File);
   end Output_Ada_Package;

   procedure Process_Block (Start_Code : A_Code;
                            End_Code   : A_Code;
                            Block_Name : String) is
      Points : Point_Vectors.Vector;
   begin
      while (not End_Of_File (Unicode_Data.File))
        and then Unicode_Data.Code < Start_Code loop
         Put ("Code without block: ");
         Code_IO.Put (Unicode_Data.Code);
         New_Line;
         Next (Unicode_Data);
      end loop;
      while (not End_Of_File (Unicode_Data.File))
        and then Unicode_Data.Code <= End_Code loop
         declare
            Point : A_Point;
            Name  : constant Translators.A_Translation
              := Alias_Translator.New_Translation
              (ASB.To_String (Unicode_Data.Name));
         begin
            Point.Code := Unicode_Data.Code;
            if Alias_Translator.Translated (Name) /= "" then
               Point.Names.Append (Name);
            end if;
            while (not End_Of_File (Name_Aliases.File))
              and then Name_Aliases.Code = Point.Code loop
               Point.Names.Append (Alias_Translator.New_Translation
                                     (ASB.To_String (Name_Aliases.Name)));
               Next (Name_Aliases);
            end loop;
            if Point.Names.Is_Empty then
               Put ("Unnamed code: ");
               Code_IO.Put (Point.Code);
               New_Line;
            else
               Points.Append (Point);
            end if;
         end;
         Next (Unicode_Data);
      end loop;
      if Points.Is_Empty then
         Put ("Empty block: ");
         Put (Block_Name);
         New_Line;
      else
         Output_Ada_Package (Block_Name, Points);
      end if;
   end Process_Block;

   Blocks : File_Type;
begin
   Code_IO.Default_Base := 16;

   Alias_Translator.Set_Exceptions;
   Block_Translator.Set_Exceptions;

   Open (Blocks, In_File, Path_To_Blocks_Txt);
   declare
      Line : constant String := Get_Line (Blocks);
      pragma Assert (Line (Line'First .. Line'First + 8) = "# Blocks-"
           and then Line (Line'Last - 3 .. Line'Last) = ".txt",
           "Unable to parse unicode version in " & Name (Blocks));
   begin
      ASB.Set_Bounded_String (Unicode_Version,
                              Line (Line'First + 9 .. Line'Last - 4));
   end;

   Open (Name_Aliases.File, In_File, Path_To_Name_Aliases_Txt);
   Next (Name_Aliases);

   Open (Unicode_Data.File, In_File, Path_To_Unicode_Data_Txt);
   Next (Unicode_Data);

   while not End_Of_File (Blocks) loop
      Parse_Block_Line (Get_Line (Blocks));
   end loop;

   Close (Blocks);

   if not End_Of_File (Unicode_Data.File) then
      Put (Path_To_Unicode_Data_Txt);
      Put (" only parsed until code ");
      Code_IO.Put (Unicode_Data.Code);
      New_Line;
   end if;
   Close (Unicode_Data.File);

   if not End_Of_File (Name_Aliases.File) then
      Put (Path_To_Name_Aliases_Txt);
      Put (" only parsed until code ");
      Code_IO.Put (Name_Aliases.Code);
      New_Line;
   end if;
   Close (Name_Aliases.File);

   Alias_Translator.Iterate_On_Unused_Exceptions (Put_Unused_Exception'Access);

   Block_Translator.Iterate_On_Unused_Exceptions (Put_Unused_Exception'Access);
end Convert;