vss_22.0.0_b38582ef/tools/ucd/ucd-characters.adb

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
------------------------------------------------------------------------------
--                        M A G I C   R U N T I M E                         --
--                                                                          --
--                       Copyright (C) 2021, AdaCore                        --
--                                                                          --
-- This library is free software;  you can redistribute it and/or modify it --
-- under terms of the  GNU General Public License  as published by the Free --
-- Software  Foundation;  either version 3,  or (at your  option) any later --
-- version. This library is distributed in the hope that it will be useful, --
-- but WITHOUT ANY WARRANTY;  without even the implied warranty of MERCHAN- --
-- TABILITY or FITNESS FOR A PARTICULAR PURPOSE.                            --
--                                                                          --
-- As a special exception under Section 7 of GPL version 3, you are granted --
-- additional permissions described in the GCC Runtime Library Exception,   --
-- version 3.1, as published by the Free Software Foundation.               --
--                                                                          --
-- You should have received a copy of the GNU General Public License and    --
-- a copy of the GCC Runtime Library Exception along with this program;     --
-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
-- <http://www.gnu.org/licenses/>.                                          --
--                                                                          --
------------------------------------------------------------------------------

with Ada.Containers.Hashed_Maps;
with Ada.Strings.Wide_Wide_Unbounded; use Ada.Strings.Wide_Wide_Unbounded;
with Ada.Strings.Wide_Wide_Unbounded.Wide_Wide_Hash;

with Interfaces;

with Ada.Wide_Wide_Text_IO; use Ada.Wide_Wide_Text_IO;

package body UCD.Characters is

   type Boolean_Array is array (1 .. 96) of Boolean with Pack;

   type Unsigned_16_Array is
     array (1 .. 24) of Interfaces.Unsigned_16 with Pack;

   type String_Array is
     array (1 .. 16) of Properties.Property_Value_Access;

   type Character_Record is record
      Boolean     : Boolean_Array;
      Enumeration : Unsigned_16_Array;
      String      : String_Array;
   end record;

   type Character_Array is array (UCD.Code_Point) of aliased Character_Record;

   type Character_Array_Access is access all Character_Array;

   Database : Character_Array_Access;

   function Hash
     (Item : Properties.Property_Access) return Ada.Containers.Hash_Type;

   package Property_Integer_Maps is
     new Ada.Containers.Hashed_Maps
       (Properties.Property_Access,
        Positive,
        Hash,
        Properties."=");

   Boolean_Properties            : Properties.Property_Vectors.Vector;
   Boolean_Property_To_Index     : Property_Integer_Maps.Map;
   Enumeration_Properties        : Properties.Property_Vectors.Vector;
   Enumeration_Property_To_Index : Property_Integer_Maps.Map;
   String_Properties             : Properties.Property_Vectors.Vector;
   String_Property_To_Index      : Property_Integer_Maps.Map;

   function Internal_Enumeration_Value
     (Property  : not null Properties.Property_Access;
      Value     : not null Properties.Property_Value_Access)
      return Interfaces.Unsigned_16;

   procedure Register_String_Property
     (Property : not null Properties.Property_Access);
   --  Register property of "string" type.

   ---------
   -- Get --
   ---------

   function Get
     (Character : Code_Point;
      Property  : not null UCD.Properties.Property_Access)
      return UCD.Properties.Property_Value_Access is
   begin
      if Property.Is_Binary then
         return
           Property.Name_To_Value.Element
             ((if Database (Character).Boolean
                    (Boolean_Property_To_Index.Element (Property))
               then To_Unbounded_Wide_Wide_String ("Y")
               else To_Unbounded_Wide_Wide_String ("N")));

      elsif Property.Is_Enumeration then
         return
           Property.All_Values
             (Positive
                (Database (Character).Enumeration
                   (Enumeration_Property_To_Index.Element (Property))));

      elsif Property.Is_String then
         return
           Database (Character).String
             (String_Property_To_Index.Element (Property));

      else
         raise Program_Error;
      end if;
   end Get;

   ----------
   -- Hash --
   ----------

   function Hash
     (Item : Properties.Property_Access) return Ada.Containers.Hash_Type is
   begin
      return Wide_Wide_Hash (Item.Names.First_Element);
   end Hash;

   -----------------------------------
   -- Initialize_Character_Database --
   -----------------------------------

   procedure Initialize_Character_Database is
      use type Ada.Containers.Count_Type;

   begin
      --  Detect binary properties

      for P of Properties.All_Properties loop

         if P.All_Values.Is_Empty then
            --  Ignore non-enumerated properties.

            null;

         elsif P.All_Values.Length = 2
           and then P.Name_To_Value.Contains
                      (To_Unbounded_Wide_Wide_String ("N"))
           and then P.Name_To_Value.Contains
                      (To_Unbounded_Wide_Wide_String ("Y"))
         then
            P.Is_Binary := True;

            Boolean_Properties.Append (P);
            Boolean_Property_To_Index.Insert
              (P, Boolean_Properties.Last_Index);

         else
            P.Is_Enumeration := True;

            Enumeration_Properties.Append (P);
            Enumeration_Property_To_Index.Insert
              (P, Enumeration_Properties.Last_Index);
         end if;
      end loop;

      Put_Line
        ("  - boolean properties     :"
         & Ada.Containers.Count_Type'Wide_Wide_Image
           (Boolean_Properties.Length)
         & " (of"
         & Integer'Wide_Wide_Image (Boolean_Array'Length)
         & ')');
      Put_Line
        ("  - enumeration properties :"
         & Ada.Containers.Count_Type'Wide_Wide_Image
           (Enumeration_Properties.Length)
         & " (of"
         & Integer'Wide_Wide_Image (Unsigned_16_Array'Length)
         & ')');
      Put_Line
        ("  - string properties      : dynamic (of"
         & Integer'Wide_Wide_Image (String_Array'Length)
         & ')');

      --  Allocate database and reset all information.

      Put_Line ("Initializing in-memory database");
      Database :=
        new Character_Array'
          (others =>
             (Boolean     => (others => False),
              Enumeration => (others => 0),
              String      => (others => null)));

      --  Initialize special cases.

      --  Unicode 13.0: exception: Extended_Pictographic property is Y by
      --  default for unassigned code points in few ranges of the code points.
      --
      --  To construct this property UnicodeData.txt must be loaded first,
      --  thus it is initialized when corresponding data is loaded from the
      --  emoji/emoji-data.txt file.

      --  Default value for General_Category is 'Cn' ("Unassigned")

      declare
         GC_Property    : constant not null Properties.Property_Access :=
           Properties.Resolve ("gc");
         GC_Value       : constant not null Properties.Property_Value_Access :=
           Properties.Resolve (GC_Property, "Cn");
         GC_Index       : constant Positive :=
           Enumeration_Property_To_Index (GC_Property);
         GC_Value_Index : constant Interfaces.Unsigned_16 :=
           Internal_Enumeration_Value (GC_Property, GC_Value);

      begin
         for C in Code_Point loop
            Database (C).Enumeration (GC_Index) := GC_Value_Index;
         end loop;

         GC_Value.Is_Used := True;
      end;

      --  Default value for Canonical_Combining_Class is 'NR' ("Not_Reordered")

      declare
         CCC_Property    : constant not null Properties.Property_Access :=
           Properties.Resolve ("ccc");
         CCC_Value       : constant not null
           Properties.Property_Value_Access :=
             Properties.Resolve (CCC_Property, "NR");
         CCC_Index       : constant Positive :=
           Enumeration_Property_To_Index (CCC_Property);
         CCC_Value_Index : constant Interfaces.Unsigned_16 :=
           Internal_Enumeration_Value (CCC_Property, CCC_Value);

      begin
         for C in Code_Point loop
            Database (C).Enumeration (CCC_Index) := CCC_Value_Index;
         end loop;

         CCC_Value.Is_Used := True;
      end;

      --  Default value for Decomposition_Type is 'None'

      declare
         DT_Property    : constant not null Properties.Property_Access :=
           Properties.Resolve ("dt");
         DT_Value       : constant not null Properties.Property_Value_Access :=
           Properties.Resolve (DT_Property, "None");
         DT_Index       : constant Positive :=
           Enumeration_Property_To_Index (DT_Property);
         DT_Value_Index : constant Interfaces.Unsigned_16 :=
           Internal_Enumeration_Value (DT_Property, DT_Value);

      begin
         for C in Code_Point loop
            Database (C).Enumeration (DT_Index) := DT_Value_Index;
         end loop;

         DT_Value.Is_Used := True;
      end;
   end Initialize_Character_Database;

   --------------------------------
   -- Internal_Enumeration_Value --
   --------------------------------

   function Internal_Enumeration_Value
     (Property  : not null Properties.Property_Access;
      Value     : not null Properties.Property_Value_Access)
      return Interfaces.Unsigned_16
   is
      use type UCD.Properties.Property_Value_Access;

   begin
      for J in Property.All_Values.First_Index
                 .. Property.All_Values.Last_Index
      loop
         if Property.All_Values.Element (J) = Value then
            return Interfaces.Unsigned_16 (J);
         end if;
      end loop;

      raise Program_Error;
   end Internal_Enumeration_Value;

   ------------------------------
   -- Register_String_Property --
   ------------------------------

   procedure Register_String_Property
     (Property : not null Properties.Property_Access) is
   begin
      if not String_Property_To_Index.Contains (Property) then
         String_Properties.Append (Property);
         String_Property_To_Index.Insert
           (Property, String_Properties.Last_Index);
      end if;
   end Register_String_Property;

   ---------
   -- Set --
   ---------

   procedure Set
     (Character : Code_Point;
      Property  : not null UCD.Properties.Property_Access;
      Value     : not null UCD.Properties.Property_Value_Access) is
   begin
      if Property.Is_Binary then
         Database (Character).Boolean
           (Boolean_Property_To_Index.Element (Property)) :=
             Value.Names.First_Element = "Y";

      elsif Property.Is_Enumeration then
         Database (Character).Enumeration
           (Enumeration_Property_To_Index.Element (Property)) :=
             Internal_Enumeration_Value (Property, Value);

         --  Set flag of use of value.

         Value.Is_Used := True;

      elsif Property.Is_String then
         Register_String_Property (Property);

         Database (Character).String
           (String_Property_To_Index.Element (Property)) := Value;

      else
         raise Program_Error;
      end if;
   end Set;

end UCD.Characters;