utilada_lzma_2.1.0_56b45091/regtests/util-strings-tests.adb

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
-----------------------------------------------------------------------
--  strings.tests -- Unit tests for strings
--  Copyright (C) 2009, 2010, 2011, 2012, 2015, 2018 Stephane Carrez
--  Written by Stephane Carrez (Stephane.Carrez@gmail.com)
--
--  Licensed under the Apache License, Version 2.0 (the "License");
--  you may not use this file except in compliance with the License.
--  You may obtain a copy of the License at
--
--      http://www.apache.org/licenses/LICENSE-2.0
--
--  Unless required by applicable law or agreed to in writing, software
--  distributed under the License is distributed on an "AS IS" BASIS,
--  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
--  See the License for the specific language governing permissions and
--  limitations under the License.
-----------------------------------------------------------------------
with Ada.Strings;
with Ada.Strings.Unbounded;
with Ada.Strings.Fixed;
with Ada.Strings.Fixed.Hash;
with Ada.Strings.Unbounded.Hash;
with Ada.Containers;
with Util.Test_Caller;
with Util.Strings.Transforms;
with Util.Strings.Maps;
with Util.Strings.Vectors;
with Util.Perfect_Hash;
with Util.Strings.Tokenizers;
with Ada.Streams;
with Util.Measures;
package body Util.Strings.Tests is

   use Ada.Strings.Unbounded;
   use Util.Tests;
   use Util.Strings.Transforms;

   package Caller is new Util.Test_Caller (Test, "Strings");

   procedure Add_Tests (Suite : in Util.Tests.Access_Test_Suite) is
   begin
      Caller.Add_Test (Suite, "Test Util.Strings.Transforms.Escape_Javascript",
                       Test_Escape_Javascript'Access);
      Caller.Add_Test (Suite, "Test Util.Strings.Transforms.Escape_Xml",
                       Test_Escape_Xml'Access);
      Caller.Add_Test (Suite, "Test Util.Strings.Transforms.Unescape_Xml",
                       Test_Unescape_Xml'Access);
      Caller.Add_Test (Suite, "Test Util.Strings.Transforms.Capitalize",
                       Test_Capitalize'Access);
      Caller.Add_Test (Suite, "Test Util.Strings.Transforms.To_Upper_Case",
                       Test_To_Upper_Case'Access);
      Caller.Add_Test (Suite, "Test Util.Strings.Transforms.To_Lower_Case",
                       Test_To_Lower_Case'Access);
      Caller.Add_Test (Suite, "Test Util.Strings.Transforms.To_Hex",
                       Test_To_Hex'Access);
      Caller.Add_Test (Suite, "Test Measure",
                       Test_Measure_Copy'Access);
      Caller.Add_Test (Suite, "Test Util.Strings.Index",
                       Test_Index'Access);
      Caller.Add_Test (Suite, "Test Util.Strings.Rindex",
                       Test_Rindex'Access);
      Caller.Add_Test (Suite, "Test Util.Strings.Benchmark",
                       Test_Measure_Hash'Access);
      Caller.Add_Test (Suite, "Test Util.Strings.String_Ref",
                       Test_String_Ref'Access);
      Caller.Add_Test (Suite, "Test Util.Strings.Starts_With",
                       Test_Starts_With'Access);
      Caller.Add_Test (Suite, "Test Util.Strings.Ends_With",
                       Test_Ends_With'Access);
      Caller.Add_Test (Suite, "Test perfect hash",
                       Test_Perfect_Hash'Access);
      Caller.Add_Test (Suite, "Test Util.Strings.Tokenizers.Iterate_Token",
                       Test_Iterate_Token'Access);
      Caller.Add_Test (Suite, "Test Util.Strings.Vectors perf",
                       Test_Perf_Vector'Access);

   end Add_Tests;

   procedure Test_Escape_Javascript (T : in out Test) is
      Result : Unbounded_String;
   begin
      Escape_Javascript (Content => ASCII.LF & " ""a string"" a 'single quote'",
                         Into    => Result);
      Assert_Equals (T, "\n \""a string\"" a \'single quote\'", Result);

      Result := To_Unbounded_String ("");
      Escape_Javascript (Content => ASCII.ESC & "[m " & Character'Val (255),
                         Into    => Result);
      Assert_Equals (T, "\u001B[m " & Character'Val (255), Result);
   end Test_Escape_Javascript;

   procedure Test_Escape_Xml (T : in out Test) is
      Result : Unbounded_String;
   begin
      Escape_Xml (Content => ASCII.LF & " < ""a string"" a 'single quote' >& ",
                  Into    => Result);
      Assert_Equals (T, ASCII.LF & " &lt; ""a string"" a &apos;single quote&apos; &gt;&amp; ",
                     Result);

      Result := To_Unbounded_String ("");
      Escape_Xml (Content => ASCII.ESC & "[m " & Character'Val (255),
                  Into    => Result);
      Assert_Equals (T, ASCII.ESC & "[m &#255;", Result);
   end Test_Escape_Xml;

   procedure Test_Unescape_Xml (T : in out Test) is
      Result : Unbounded_String;
   begin
      Unescape_Xml (Content    => "&lt;&gt;&amp;&quot; &apos; &#x41;",
                    Translator => Util.Strings.Transforms.TR.Translate_Xml_Entity'Access,
                    Into       => Result);
      Util.Tests.Assert_Equals (T, "<>&"" ' A", Result, "Invalid unescape");

      Set_Unbounded_String (Result, "");
      Unescape_Xml (Content    => "Test &#65;&#111;&#126; end",
                    Translator => Util.Strings.Transforms.TR.Translate_Xml_Entity'Access,
                    Into       => Result);
      Util.Tests.Assert_Equals (T, "Test Ao~ end", Result, "Invalid decimal unescape");

      Set_Unbounded_String (Result, "");
      Unescape_Xml (Content    => "Test &#x65;&#xc0;&#x111;&#x126;&#xcb; end",
                    Translator => Util.Strings.Transforms.TR.Translate_Xml_Entity'Access,
                    Into       => Result);
      Util.Tests.Assert_Equals (T, "Test eÀđĦË end", Result, "Invalid Decimal Unescape");

      Unescape_Xml (Content    => "&;&#qsf;&qsd;&#12121212121212121212;; &#41",
                    Translator => Util.Strings.Transforms.TR.Translate_Xml_Entity'Access,
                    Into       => Result);
   end Test_Unescape_Xml;

   procedure Test_Capitalize (T : in out Test) is
      Result : Unbounded_String;
   begin
      Assert_Equals (T, "Capitalize_A_String", Capitalize ("capITalIZe_a_strING"));

      Capitalize ("CapAS_String", Result);
      Assert_Equals (T, "Capas_String", Result);
   end Test_Capitalize;

   procedure Test_To_Upper_Case (T : in out Test) is
   begin
      Assert_Equals (T, "UPPERCASE_0123_STR", To_Upper_Case ("upperCase_0123_str"));
   end Test_To_Upper_Case;

   procedure Test_To_Lower_Case (T : in out Test) is
   begin
      Assert_Equals (T, "lowercase_0123_str", To_Lower_Case ("LowERCase_0123_STR"));
   end Test_To_Lower_Case;

   procedure Test_To_Hex (T : in out Test) is
      Result : Unbounded_String;
   begin
      To_Hex (Result, Character'Val (23));
      Assert_Equals (T, "\u0017", Result);

      To_Hex (Result, Character'Val (31));
      Assert_Equals (T, "\u0017\u001F", Result);

      To_Hex (Result, Character'Val (255));
      Assert_Equals (T, "\u0017\u001F\u00FF", Result);
   end Test_To_Hex;

   procedure Test_Measure_Copy (T : in out Test) is
      pragma Unreferenced (T);

      Buf : constant Ada.Streams.Stream_Element_Array (1 .. 10_024) := (others => 23);
      pragma Suppress (All_Checks, Buf);
   begin
      declare
         T : Util.Measures.Stamp;
         R : Ada.Strings.Unbounded.Unbounded_String;
      begin
         for I in Buf'Range loop
            Append (R, Character'Val (Buf (I)));
         end loop;
         Util.Measures.Report (T, "Stream transform using Append (1024 bytes)");
      end;
      declare
         T : Util.Measures.Stamp;
         R : Ada.Strings.Unbounded.Unbounded_String;
         S : String (1 .. 10_024);
         pragma Suppress (All_Checks, S);
      begin
         for I in Buf'Range loop
            S (Natural (I)) := Character'Val (Buf (I));
         end loop;
         Append (R, S);
         Util.Measures.Report (T, "Stream transform using temporary string (1024 bytes)");
      end;
--        declare
--           T : Util.Measures.Stamp;
--           R : Ada.Strings.Unbounded.Unbounded_String;
--           P : constant Ptr := new String (1 .. Buf'Length);
--
--           pragma Suppress (All_Checks, P);
--        begin
--           for I in P'Range loop
--              P (I) := Character'Val (Buf (Ada.Streams.Stream_Element_Offset (I)));
--           end loop;
--           Ada.Strings.Unbounded.Aux.Set_String (R, P.all'Access);
--           Util.Measures.Report (T, "Stream transform using Aux string (1024 bytes)");
--        end;
      declare
         T : Util.Measures.Stamp;
         H : Ada.Containers.Hash_Type;

         pragma Unreferenced (H);
      begin
         for I in 1 .. 1_000 loop
            H := Ada.Strings.Fixed.Hash ("http://code.google.com/p/ada-awa/jsf:wiki");
         end loop;
         Util.Measures.Report (T, "Ada.Strings.Fixed.Hash");
      end;
      declare
         T : Util.Measures.Stamp;
         H : Ada.Containers.Hash_Type;

         pragma Unreferenced (H);
      begin
         for I in 1 .. 1_000 loop
            H := Ada.Strings.Fixed.Hash ("http://code.google.com/p/ada-awa/jsf:wiki");
         end loop;
         Util.Measures.Report (T, "Ada.Strings.Fixed.Hash");
      end;
   end Test_Measure_Copy;

   --  ------------------------------
   --  Test the Index operation
   --  ------------------------------
   procedure Test_Index (T : in out Test) is
      Str : constant String := "0123456789abcdefghijklmnopq";
   begin
      declare
         St  : Util.Measures.Stamp;
         Pos : Integer;
      begin
         for I in 1 .. 10 loop
            Pos := Index (Str, 'q');
         end loop;
         Util.Measures.Report (St, "Util.Strings.Index");
         Assert_Equals (T, 27, Pos, "Invalid index position");
      end;
      declare
         St  : Util.Measures.Stamp;
         Pos : Integer;
      begin
         for I in 1 .. 10 loop
            Pos := Ada.Strings.Fixed.Index (Str, "q");
         end loop;
         Util.Measures.Report (St, "Ada.Strings.Fixed.Index");
         Assert_Equals (T, 27, Pos, "Invalid index position");
      end;
   end Test_Index;

   --  ------------------------------
   --  Test the Rindex operation
   --  ------------------------------
   procedure Test_Rindex (T : in out Test) is
      Str : constant String := "0123456789abcdefghijklmnopq";
   begin
      declare
         St  : Util.Measures.Stamp;
         Pos : Natural;
      begin
         for I in 1 .. 10 loop
            Pos := Rindex (Str, '0');
         end loop;
         Util.Measures.Report (St, "Util.Strings.Rindex");
         Assert_Equals (T, 1, Pos, "Invalid rindex position");
      end;
      declare
         St  : Util.Measures.Stamp;
         Pos : Natural;
      begin
         for I in 1 .. 10 loop
            Pos := Ada.Strings.Fixed.Index (Str, "0", Ada.Strings.Backward);
         end loop;
         Util.Measures.Report (St, "Ada.Strings.Fixed.Rindex");
         Assert_Equals (T, 1, Pos, "Invalid rindex position");
      end;
   end Test_Rindex;

   procedure Test_Starts_With (T : in out Test) is
   begin
      T.Assert (Starts_With ("abcde", "abc"), "Starts_With should return True");
      T.Assert (Starts_With ("abcd", "abcd"), "Starts_With should return True");
      T.Assert (not Starts_With ("ab", "abc"), "Starts_With should return False");
      T.Assert (not Starts_With ("abd", "abc"), "Starts_With should return False");
      T.Assert (not Starts_With ("abde", "abc"), "Starts_With should return False");
   end Test_Starts_With;

   procedure Test_Ends_With (T : in out Test) is
   begin
      T.Assert (Ends_With ("abcde", "de"), "Ends_With should return True");
      T.Assert (Ends_With ("abcd", "abcd"), "Ends_With should return True");
      T.Assert (not Ends_With ("ab", "abc"), "Ends_With should return False");
      T.Assert (not Ends_With ("abd", "abc"), "Ends_With should return False");
      T.Assert (not Ends_With ("abde", "cde"), "Ends_With should return False");
   end Test_Ends_With;

   package String_Map is new Ada.Containers.Hashed_Maps
     (Key_Type        => Unbounded_String,
      Element_Type    => Unbounded_String,
      Hash            => Hash,
      Equivalent_Keys => "=");

   package String_Ref_Map is new Ada.Containers.Hashed_Maps
     (Key_Type        => String_Ref,
      Element_Type    => String_Ref,
      Hash            => Hash,
      Equivalent_Keys => Equivalent_Keys);

   --  ------------------------------
   --  Do some benchmark on String -> X hash mapped.
   --  ------------------------------
   procedure Test_Measure_Hash (T : in out Test) is
      KEY     : aliased constant String := "testing";
      Str_Map : Util.Strings.Maps.Map;
      Ptr_Map : Util.Strings.String_Access_Map.Map;
      Ref_Map : String_Ref_Map.Map;
      Unb_Map : String_Map.Map;
      Name    : String_Access := new String '(KEY);
      Ref     : constant String_Ref := To_String_Ref (KEY);
   begin
      Str_Map.Insert (Name.all, Name.all);
      Ptr_Map.Insert (Name.all'Access, Name.all'Access);
      Unb_Map.Insert (To_Unbounded_String (KEY), To_Unbounded_String (KEY));
      Ref_Map.Insert (Ref, Ref);

      declare
         T : Util.Measures.Stamp;
         H : Ada.Containers.Hash_Type;

         pragma Unreferenced (H);
      begin
         for I in 1 .. 1_000 loop
            H := Ada.Strings.Fixed.Hash ("http://code.google.com/p/ada-awa/jsf:wiki");
         end loop;
         Util.Measures.Report (T, "Ada.Strings.Fixed.Hash (1000 calls)");
      end;

      --  Performance of Hashed_Map Name_Access -> Name_Access
      --  (the fastest hash)
      declare
         St  : Util.Measures.Stamp;
         Pos : constant Strings.String_Access_Map.Cursor := Ptr_Map.Find (KEY'Unchecked_Access);
         Val : constant Name_Access := Util.Strings.String_Access_Map.Element (Pos);
      begin
         Util.Measures.Report (St, "Util.Strings.String_Access_Maps.Find+Element");
         Assert_Equals (T, "testing", Val.all, "Invalid value returned");
      end;

      --  Performance of Hashed_Map String_Ref -> String_Ref
      --  (almost same performance as Hashed_Map Name_Access -> Name_Access)
      declare
         St  : Util.Measures.Stamp;
         Pos : constant String_Ref_Map.Cursor := Ref_Map.Find (Ref);
         Val : constant String_Ref := String_Ref_Map.Element (Pos);
      begin
         Util.Measures.Report (St, "Util.Strings.String_Ref_Maps.Find+Element");
         Assert_Equals (T, "testing", String '(To_String (Val)), "Invalid value returned");
      end;

      --  Performance of Hashed_Map Unbounded_String -> Unbounded_String
      --  (little overhead due to String copy made by Unbounded_String)
      declare
         St  : Util.Measures.Stamp;
         Pos : constant String_Map.Cursor := Unb_Map.Find (To_Unbounded_String (KEY));
         Val : constant Unbounded_String := String_Map.Element (Pos);
      begin
         Util.Measures.Report (St, "Hashed_Maps<Unbounded,Unbounded..Find+Element");
         Assert_Equals (T, "testing", Val, "Invalid value returned");
      end;

      --  Performance for Indefinite_Hashed_Map String -> String
      --  (the slowest hash, string copy to get the result, pointer to key and element
      --  in the hash map implementation)
      declare
         St  : Util.Measures.Stamp;
         Pos : constant Util.Strings.Maps.Cursor := Str_Map.Find (KEY);
         Val : constant String := Util.Strings.Maps.Element (Pos);
      begin
         Util.Measures.Report (St, "Util.Strings.Maps.Find+Element");
         Assert_Equals (T, "testing", Val, "Invalid value returned");
      end;

      Free (Name);
   end Test_Measure_Hash;

   --  ------------------------------
   --  Test String_Ref creation
   --  ------------------------------
   procedure Test_String_Ref (T : in out Test) is
      R1 : String_Ref := To_String_Ref ("testing a string");
   begin
      for I in 1 .. 1_000 loop
         declare
            S  : constant String (1 .. I) := (others => 'x');
            R2 : constant String_Ref := To_String_Ref (S);
         begin
            Assert_Equals (T, S, To_String (R2), "Invalid String_Ref");
            T.Assert (R2 = S, "Invalid comparison");
            Assert_Equals (T, I, Length (R2), "Invalid length");
            R1 := R2;
            T.Assert (R1 = R2, "Invalid String_Ref copy");
         end;
      end loop;
   end Test_String_Ref;

   --  ------------------------------
   --  Test perfect hash (samples/gperfhash)
   --  ------------------------------
   procedure Test_Perfect_Hash (T : in out Test) is
   begin
      for I in Util.Perfect_Hash.Keywords'Range loop
         declare
            K : constant String := Util.Perfect_Hash.Keywords (I).all;
         begin
            Assert_Equals (T, I, Util.Perfect_Hash.Hash (K),
                           "Invalid hash");
            Assert_Equals (T, I, Util.Perfect_Hash.Hash (To_Lower_Case (K)),
                           "Invalid hash");
            Assert (T, Util.Perfect_Hash.Is_Keyword (K), "Keyword " & K & " is not a keyword");
            Assert (T, Util.Perfect_Hash.Is_Keyword (To_Lower_Case (K)),
                    "Keyword " & K & " is not a keyword");
         end;
      end loop;
   end Test_Perfect_Hash;

   --  ------------------------------
   --  Benchmark comparison between the use of Iterate vs Query_Element.
   --  ------------------------------
   procedure Test_Perf_Vector (T : in out Test) is
      pragma Unreferenced (T);
      procedure Iterate_Item (Item : in String);
      procedure Iterate (Pos : in Util.Strings.Vectors.Cursor);

      List  : Util.Strings.Vectors.Vector;
      Count : Natural := 0;
      Total : Natural := 0;

      procedure Iterate_Item (Item : in String) is
      begin
         Count := Count + 1;
         Total := Total + Item'Length;
      end Iterate_Item;

      procedure Iterate (Pos : in Util.Strings.Vectors.Cursor) is
         S : constant String := Util.Strings.Vectors.Element (Pos);
      begin
         Count := Count + 1;
         Total := Total + S'Length;
      end Iterate;

   begin
      for I in 1 .. 100 loop
         List.Append ("yet another fixed string with some reasonable content");
      end loop;

      --  First form of iterate by using the container Iterate procedure.
      --  Due to the Cursor usage, this forces a copy of the string to the secondary stack.
      declare
         St  : Util.Measures.Stamp;
      begin
         List.Iterate (Iterate'Access);
         Util.Measures.Report (St, "Util.Strings.Vectors.Iterate (100)");
      end;

      --  Second form by using the cursor and the Query_Element procedure.
      --  We avoid a copy of the string to the secondary stack.
      declare
         St   : Util.Measures.Stamp;
         Iter : Util.Strings.Vectors.Cursor := List.First;
      begin
         while Util.Strings.Vectors.Has_Element (Iter) loop
            Util.Strings.Vectors.Query_Element (Iter, Iterate_Item'Access);
            Util.Strings.Vectors.Next (Iter);
         end loop;
         Util.Measures.Report (St, "Util.Strings.Vectors.Query_Element+Cursor (100)");
      end;

      --  Third form by using a manual index iteration.
      --  This is the fastest form of iteration with the current GNAT implementation.
      declare
         St   : Util.Measures.Stamp;
         Last : constant Ada.Containers.Count_Type := List.Length;
      begin
         for I in 1 .. Last loop
            List.Query_Element (Positive (I), Iterate_Item'Access);
         end loop;
         Util.Measures.Report (St, "Util.Strings.Vectors.Query_Element+Index (100)");
      end;

   end Test_Perf_Vector;

   --  ------------------------------
   --  Test the token iteration.
   --  ------------------------------
   procedure Test_Iterate_Token (T : in out Test) is
      procedure Process_Token (Token : in String;
                               Done  : out Boolean);

      Called : Natural := 0;

      procedure Process_Token (Token : in String;
                               Done  : out Boolean) is
      begin
         T.Assert (Token = "one" or Token = "two" or Token = "three"
                   or Token = "four five" or Token = "six seven",
                   "Invalid token: [" & Token & "]");

         Called := Called + 1;
         Done   := False;
      end Process_Token;

   begin
      Util.Strings.Tokenizers.Iterate_Tokens (Content => "one two three",
                                              Pattern => " ",
                                              Process => Process_Token'Access);
      Util.Tests.Assert_Equals (T, 3, Called, "Iterate_Tokens calls Process incorrectly");

      Util.Strings.Tokenizers.Iterate_Tokens (Content => "one two three",
                                              Pattern => " ",
                                              Process => Process_Token'Access,
                                              Going   => Ada.Strings.Backward);
      Util.Tests.Assert_Equals (T, 6, Called, "Iterate_Tokens calls Process incorrectly");

      Util.Strings.Tokenizers.Iterate_Tokens (Content => "four five blob six seven",
                                              Pattern => " blob ",
                                              Process => Process_Token'Access);
      Util.Tests.Assert_Equals (T, 8, Called, "Iterate_Tokens calls Process incorrectly");
   end Test_Iterate_Token;

end Util.Strings.Tests;