uxstrings_0.6.0_c9bcce5c/src/uxstrings2.ads

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
with Ada.Strings;                use Ada.Strings;
with Ada.Strings.Wide_Wide_Maps; use Ada.Strings.Wide_Wide_Maps;
with Ada.Strings.UTF_Encoding;
with Ada.Characters.Handling;
with Ada.Characters.Latin_1;
with Ada.Characters.Wide_Latin_1;
private with Ada.Finalization;
private with Ada.Streams;

package UXStrings is

   type Encoding_Scheme is (ASCII_7, Latin_1, UTF_8, UTF_16BE, UTF_16LE);
   -- Supported encoding schemes
   subtype UTF_16_Encoding_Scheme is Encoding_Scheme range UTF_16BE .. UTF_16LE;
   -- Supported UTF-16 encoding schemes

   subtype ASCII_Character is Ada.Characters.Handling.ISO_646;
   subtype ASCII_Character_Array is String with
       Dynamic_Predicate => (for all Item of ASCII_Character_Array => Item in ASCII_Character);
   -- Characters in ISO/IEC 646

   subtype Latin_1_Character is Character;
   subtype Latin_1_Character_Array is String;
   -- Characters in ISO/IEC 8859-1

   subtype BMP_Character is Wide_Character;
   subtype BMP_Character_Array is Wide_String;
   -- Characters in Unicode Basic Multilingual Plane
   -- (Could be also named UCS_2_Character (Universal Coded Character Set)?)

   subtype Unicode_Character is Wide_Wide_Character;
   subtype Unicode_Character_Array is Wide_Wide_String;
   -- Characters in Unicode planes
   -- (Could be also named UCS_4_Character?)

   subtype UTF_8_Character_Array is Ada.Strings.UTF_Encoding.UTF_String;
   subtype UTF_16_Character_Array is Ada.Strings.UTF_Encoding.UTF_String;
   -- Array of 8 bits values representing UTF encodings (UTF-8, UTF-16BE, or UTF-16LE)

   type UXString is tagged private with
     Constant_Indexing => Element,
     Iterable          => (First => First, Next => Next, Has_Element => Has_Element, Element => Element),
     String_Literal    => From_Unicode;
   -- Container type of Unicode characters with dynamic size usually named string

   Null_UXString : constant UXString;
   -- Represent the null string

   function Length (Source : UXString) return Natural;
   -- Return the number of (Unicode) characters

   function First (Source : UXString) return Positive;
   -- Return the position of the first character of Source (actually 1)
   function Next (Source : UXString; Index : Positive) return Positive;
   -- Return the position of the next character of Source after Index position (actually Index + 1)
   procedure Next (Source : UXString; Index : in out Positive);
   -- Update Index to the position of the next character of Source after Index position (actually Index + 1)
   function Has_Element (Source : UXString; Index : Positive) return Boolean;
   -- Return True if a character of Source is present at Index position (actually Index <= Length (Source))
   function Element (Source : UXString; Index : Positive) return Unicode_Character;
   -- Return the Unicode character of Source at Index position
   function Last (Source : UXString) return Natural;
   -- Return the position of the last character of Source (actually Length (Source))

   function Character_Set_Version return UXString;
   -- Returns an implementation-defined identifier that identifies the version of the character set standard
   -- that is used for categorizing characters by the implementation.

   Q_L : Latin_1_Character renames Ada.Characters.Latin_1.Question;

   function Is_ASCII (Source : UXString; Index : Positive) return Boolean;
   -- Return True if the character of Source at Index position is in ASCII set
   function Is_ASCII (Source : UXString) return Boolean;
   -- Return True if all the characters of Source are in ASCII set
   function Get_ASCII
     (Source : UXString; Index : Positive; Substitute : in ASCII_Character := Q_L) return ASCII_Character;
   -- Return the ASCII character of Source at Index position,
   -- if the character is not in ASCII set then Substitute is returned
   function To_ASCII (Source : UXString; Substitute : in ASCII_Character := Q_L) return ASCII_Character_Array;
   -- Return an array of ASCII characters from Source,
   -- if a character is not in ASCII set then Substitute is returned
   function From_ASCII (Item : ASCII_Character) return UXString;
   -- Return an UXString from the ASCII character Item
   function From_ASCII (Source : ASCII_Character_Array) return UXString;
   -- Return an UXString from the array of ASCII characters Source

   function Is_ISO_646 (Item : UXString) return Boolean renames Is_ASCII;
   function To_ISO_646
     (Item : UXString; Substitute : Ada.Characters.Handling.ISO_646 := Q_L) return ASCII_Character_Array renames
     To_ASCII;

   Inv_Q_L : Latin_1_Character renames Ada.Characters.Latin_1.Inverted_Question;

   function Is_Latin_1 (Source : UXString; Index : Positive) return Boolean;
   -- Return True if the character of Source at Index position is in Latin 1 set
   function Is_Latin_1 (Source : UXString) return Boolean;
   -- Return True if all the characters of Source are in Latin 1 set
   function Get_Latin_1
     (Source : UXString; Index : Positive; Substitute : in Latin_1_Character := Inv_Q_L) return Latin_1_Character;
   -- Return the Latin 1 character from Source at Index position,
   -- if the character is not in latin 1 set then Substitute is returned
   function To_Latin_1 (Source : UXString; Substitute : in Latin_1_Character := Inv_Q_L) return Latin_1_Character_Array;
   -- Return an array of Latin 1 characters from Source,
   -- if a character is not in latin 1 set then Substitute is returned
   function From_Latin_1 (Item : Latin_1_Character) return UXString;
   -- Return an UXString from the Latin 1 character parameter Item
   function From_Latin_1 (Source : Latin_1_Character_Array) return UXString;
   -- Return an UXString from the array of Latin 1 characters parameter Source

   Inv_Q_B : BMP_Character renames Ada.Characters.Wide_Latin_1.Inverted_Question;

   function Is_BMP (Source : UXString; Index : Positive) return Boolean;
   -- Return True if the character of Source at Index position is in BMP set
   function Is_BMP (Source : UXString) return Boolean;
   -- Return True if all the characters of Source are in BMP set
   function Get_BMP
     (Source : UXString; Index : Positive; Substitute : in BMP_Character := Inv_Q_B) return BMP_Character;
   -- Return the BMP character from Source at Index position,
   -- if the character is not in BMP set then Substitute is returned
   function To_BMP (Source : UXString; Substitute : in BMP_Character := Inv_Q_B) return BMP_Character_Array;
   -- Return an array of BMP characters from Source,
   -- if a character is not in BMP set then Substitute is returned
   function From_BMP (Item : BMP_Character) return UXString;
   -- Return an UXString from the BMP character parameter Item
   function From_BMP (Source : BMP_Character_Array) return UXString;
   -- Return an UXString from the array of BMP characters parameter Source

   function Is_Unicode (Source : UXString; Index : Positive) return Boolean;
   -- Return True if the character of Source at Index position is in Unicode set (actually True)
   function Is_Unicode (Source : UXString) return Boolean;
   -- Return True if all the characters of Source are in Unicode set (actually True)
   function Get_Unicode (Source : UXString; Index : Positive) return Unicode_Character;
   -- Return the Unicode character from Source at Index position
   function To_Unicode (Source : UXString) return Unicode_Character_Array;
   -- Return an array of Unicode characters from Source
   function From_Unicode (Item : Unicode_Character) return UXString;
   -- Return an UXString from the Unicode character parameter Item
   function From_Unicode (Source : Unicode_Character_Array) return UXString;
   -- Return an UXString from the array of Unicode characters parameter Source

   function To_UTF_8 (Source : UXString; Output_BOM : Boolean := False) return UTF_8_Character_Array;
   -- Return an array of UTF-8 characters from Source, prepend UTF-8 BOM if Output_BOM is set to True
   function From_UTF_8 (Source : UTF_8_Character_Array) return UXString;
   -- Return an UXString from the array of UTF-8 characters parameter Source,
   -- leading BOM characters are suppressed if any

   function To_UTF_16
     (Source : UXString; Output_Scheme : UTF_16_Encoding_Scheme; Output_BOM : Boolean := False)
      return UTF_16_Character_Array;
   -- Return an array of UTF-16 characters from Source according to the encoding scheme specified by Output_Scheme,
   -- prepend UTF-16 BOM if Output_BOM is set to True
   function From_UTF_16 (Source : UTF_16_Character_Array; Input_Scheme : UTF_16_Encoding_Scheme) return UXString;
   -- Return an UXString from the array of UTF-16 characters parameter Source
   -- according to the encoding scheme specified by Input_Scheme,
   -- leading BOM characters are suppressed if any

   procedure Set (Target : out UXString; Source : Unicode_Character_Array);
   -- Set Target to an UXString from the array of Unicode characters parameter Source

   procedure Append (Source : in out UXString; New_Item : UXString);
   -- Update Source to the concatenation of Source and New_Item
   procedure Append (Source : in out UXString; New_Item : Unicode_Character);
   -- Update Source to the concatenation of Source and New_Item

   procedure Prepend (Source : in out UXString; New_Item : UXString);
   -- Update Source to the concatenation of New_Item and Source
   procedure Prepend (Source : in out UXString; New_Item : Unicode_Character);
   -- Update Source to the concatenation of New_Item and Source

   function "&" (Left : UXString; Right : UXString) return UXString;
   -- Return the concatenation of Left and Right
   function "&" (Left : UXString; Right : Unicode_Character) return UXString;
   -- Return the concatenation of Left and Right
   function "&" (Left : Unicode_Character; Right : UXString) return UXString;
   -- Return the concatenation of Left and Right

   procedure Replace_ASCII (Source : in out UXString; Index : Positive; By : ASCII_Character);
   -- Update Source such as the character at Index position is set to the ASCII character parameter By
   procedure Replace_Latin_1 (Source : in out UXString; Index : Positive; By : Latin_1_Character);
   -- Update Source such as the character at Index position is set to the Latin 1 character parameter By
   procedure Replace_BMP (Source : in out UXString; Index : Positive; By : BMP_Character);
   -- Update Source such as the character at Index position is set to the BMP character parameter By
   procedure Replace_Unicode (Source : in out UXString; Index : Positive; By : Unicode_Character);
   -- Update Source such as the character at Index position is set to the Unicode character parameter By

   function Slice (Source : UXString; Low : Positive; High : Natural) return UXString;
   -- Return the slice at positions Low through High from Source
   procedure Slice (Source : UXString; Target : out UXString; Low : Positive; High : Natural);
   -- Set Target to the slice at positions Low through High from Source

   function "=" (Left : UXString; Right : UXString) return Boolean;
   -- Return True if Left equals Right
   function "<" (Left : UXString; Right : UXString) return Boolean;
   -- Return True if Left is less than Right
   function "<=" (Left : UXString; Right : UXString) return Boolean;
   -- Return True if Left is less or equal than Right
   function ">" (Left : UXString; Right : UXString) return Boolean;
   -- Return True if Left is greater than Right
   function ">=" (Left : UXString; Right : UXString) return Boolean;
   -- Return True if Left greater or equal than Right

   ------------------------
   -- Search Subprograms --
   ------------------------

   function Index
     (Source  : UXString; Pattern : UXString; Going : Direction := Forward;
      Mapping : Wide_Wide_Character_Mapping := Identity) return Natural;
   -- Return the position of the first character where Pattern matches Source
   -- with respect of Going direction and Mapping
   function Index
     (Source  : UXString; Pattern : UXString; Going : Direction := Forward;
      Mapping : Wide_Wide_Character_Mapping_Function) return Natural;
   -- Return the position of the first character where Pattern matches Source
   -- with respect of Going direction and Mapping
   function Index
     (Source : UXString; Set : Wide_Wide_Character_Set; Test : Membership := Inside; Going : Direction := Forward)
      return Natural;
   -- Return the position of the first character inside or outside Set matches Source
   -- with respect of Going direction and Test membership
   function Index
     (Source  : UXString; Pattern : UXString; From : Positive; Going : Direction := Forward;
      Mapping : Wide_Wide_Character_Mapping := Identity) return Natural;
   -- Return the position of the first character where Pattern matches Source starting at From position
   -- with respect of Going direction and Mapping
   function Index
     (Source  : UXString; Pattern : UXString; From : Positive; Going : Direction := Forward;
      Mapping : Wide_Wide_Character_Mapping_Function) return Natural;
   -- Return the position of the first character where Pattern matches Source starting at From position
   -- with respect of Going direction and Mapping
   function Index
     (Source : UXString; Set : Wide_Wide_Character_Set; From : Positive; Test : Membership := Inside;
      Going  : Direction := Forward) return Natural;
   -- Return the position of the first character inside or outside Set matches Source starting at From position
   -- with respect of Test membership

   function Index_Non_Blank (Source : UXString; Going : Direction := Forward) return Natural;
   -- Return the position of the first non space character of Source with respect of Going direction
   function Index_Non_Blank (Source : UXString; From : Positive; Going : Direction := Forward) return Natural;
   -- Return the position of the first non space character of Source starting at From position
   -- with respect of Going direction

   function Count
     (Source : UXString; Pattern : UXString; Mapping : Wide_Wide_Character_Mapping := Identity) return Natural;
   -- Return the number of non overlapping occurrences of Pattern matching Source with respect of Mapping
   function Count
     (Source : UXString; Pattern : UXString; Mapping : Wide_Wide_Character_Mapping_Function) return Natural;
   -- Return the number of non overlapping occurrences of Pattern matching Source with respect of Mapping
   function Count (Source : UXString; Set : Wide_Wide_Character_Set) return Natural;
   -- Return the number of occurrences of characters in parameter Set matching Source

   procedure Find_Token
     (Source :     UXString; Set : Wide_Wide_Character_Set; From : Positive; Test : Membership; First : out Positive;
      Last   : out Natural);
   -- Set First to position of the first character inside or outside Set matches Source starting at From position
   -- Set Last to position of the last character inside or outside Set matches Source with respect of Test membership
   procedure Find_Token
     (Source : UXString; Set : Wide_Wide_Character_Set; Test : Membership; First : out Positive; Last : out Natural);
   -- Set First to position of the first character inside or outside Set matches Source
   -- Set Last to position of the last character inside or outside Set matches Source with respect of Test membership

   ------------------------------------
   -- String Translation Subprograms --
   ------------------------------------

   function Translate (Source : UXString; Mapping : Wide_Wide_Character_Mapping) return UXString;
   -- Return Source updated with respect of Mapping
   procedure Translate (Source : in out UXString; Mapping : Wide_Wide_Character_Mapping);
   -- Update Source with respect of Mapping
   function Translate (Source : UXString; Mapping : Wide_Wide_Character_Mapping_Function) return UXString;
   -- Return Source updated with respect of Mapping
   procedure Translate (Source : in out UXString; Mapping : Wide_Wide_Character_Mapping_Function);
   -- Update Source with respect of Mapping

   ---------------------------------------
   -- String Transformation Subprograms --
   ---------------------------------------

   function Replace_Slice (Source : UXString; Low : Positive; High : Natural; By : UXString) return UXString;
   -- Return Source whom characters with positions from Low to High are replaced with parameter By
   procedure Replace_Slice (Source : in out UXString; Low : Positive; High : Natural; By : UXString);
   -- Update Source whom characters with positions from Low to High are replaced with parameter By
   function Insert (Source : UXString; Before : Positive; New_Item : UXString) return UXString;
   -- Return Source with New_Item inserted at position ahead of parameter Before
   procedure Insert (Source : in out UXString; Before : Positive; New_Item : UXString);
   -- Update Source with New_Item inserted at position ahead of parameter Before
   function Overwrite (Source : UXString; Position : Positive; New_Item : UXString) return UXString;
   -- Return Source whom characters starting at Position are replaced with parameter New_Item
   procedure Overwrite (Source : in out UXString; Position : Positive; New_Item : UXString);
   -- Update Source whom characters starting at Position are replaced with parameter New_Item
   function Delete (Source : UXString; From : Positive; Through : Natural) return UXString;
   -- Return Source whom characters with positions from Low to High are removed
   procedure Delete (Source : in out UXString; From : Positive; Through : Natural);
   -- Update Source whom characters with positions from Low to High are removed
   function Trim (Source : UXString; Side : Trim_End) return UXString;
   -- Return Source with Space characters removed from left, right or both with respect of Side
   procedure Trim (Source : in out UXString; Side : Trim_End);
   -- Update Source with Space characters removed from left, right or both with respect of Side
   function Trim (Source : UXString; Left : Wide_Wide_Character_Set; Right : Wide_Wide_Character_Set) return UXString;
   -- Return Source with leading characters in Left and trailing characters in Right removed
   procedure Trim (Source : in out UXString; Left : Wide_Wide_Character_Set; Right : Wide_Wide_Character_Set);
   -- Update Source with leading characters in Left and trailing characters in Right removed
   function Head (Source : UXString; Count : Natural; Pad : Unicode_Character := Wide_Wide_Space) return UXString;
   -- Return the first characters from Source up to Count concatenated with Pad characters if needed
   procedure Head (Source : in out UXString; Count : Natural; Pad : Unicode_Character := Wide_Wide_Space);
   -- Update Source to the first characters from Source up to Count concatenated with Pad characters if needed
   function Tail (Source : UXString; Count : Natural; Pad : Unicode_Character := Wide_Wide_Space) return UXString;
   -- Return the last characters from Source up to Count concatenated with Pad characters if needed
   procedure Tail (Source : in out UXString; Count : Natural; Pad : Unicode_Character := Wide_Wide_Space);
   -- Update Source to the last characters from Source up to Count concatenated with Pad characters if needed

   function "*" (Left : Natural; Right : UXString) return UXString;
   -- Return Right string duplicated Left times
   function "*" (Left : Natural; Right : Unicode_Character) return UXString;
   -- Return Right character duplicated Left times

   ----------------------------------------------
   -- String Additional Comparison Subprograms --
   ----------------------------------------------

   function Equal_Case_Insensitive (Left, Right : UXString) return Boolean;
   -- Returns True if the strings consist of the same sequence of characters after applying locale-independent
   -- simple case folding, as defined by documents referenced in the note in Clause 1 of ISO/IEC 10646:2011.
   -- Otherwise, returns False.
   function Less_Case_Insensitive (Left, Right : UXString) return Boolean;
   -- Performs a lexicographic comparison of strings Left and Right, converted to lower case.

   -----------------------------------
   -- String Conversion Subprograms --
   -----------------------------------

   function To_Lower (Item : UXString) return UXString;
   -- Returns the corresponding lower-case value for Item if Is_Upper(Item), and returns Item otherwise.
   function To_Upper (Item : UXString) return UXString;
   -- Returns the corresponding upper-case value for Item if Is_Lower(Item) and Item has an upper-case form,
   -- and returns Item otherwise.
   function To_Basic (Item : UXString) return UXString;
   -- Returns the letter corresponding to Item but with no diacritical mark,
   -- if Item is a letter but not a basic letter; returns Item otherwise.

private

   type UTF_8_Characters_Access is access UTF_8_Character_Array;
   type UXString is new Ada.Finalization.Controlled with record
      Chars      : UTF_8_Characters_Access := new UTF_8_Character_Array (2 .. 1);
      Full_ASCII : Boolean                 := False;
      Finalized  : Boolean                 := False;
   end record;

   procedure Adjust (Object : in out UXString);
   procedure Finalize (Object : in out UXString);

   procedure Bounded_Move (Source : in out UXString; Target : out UXString; Max : Natural; Last : out Natural);

   procedure UXString_Read (Stream : not null access Ada.Streams.Root_Stream_Type'Class; Item : out UXString);
   for UXString'Read use UXString_Read;

   procedure UXString_Write (Stream : not null access Ada.Streams.Root_Stream_Type'Class; Item : UXString);
   for UXString'Write use UXString_Write;

   Null_UXString : constant UXString :=
     (Ada.Finalization.Controlled with Chars => new UTF_8_Character_Array (2 .. 1), Full_ASCII => True,
      Finalized                              => False);

end UXStrings;