-- -*- coding: utf-8 -*- -- -- ZanyBlue, an Ada library and framework for finite element analysis. -- -- Copyright (c) 2012, 2016, Michael Rohan -- All rights reserved. -- -- Redistribution and use in source and binary forms, with or without -- modification, are permitted provided that the following conditions -- are met: -- -- * Redistributions of source code must retain the above copyright -- notice, this list of conditions and the following disclaimer. -- -- * Redistributions in binary form must reproduce the above copyright -- notice, this list of conditions and the following disclaimer in the -- documentation and/or other materials provided with the distribution. -- -- * Neither the name of ZanyBlue nor the names of its contributors may -- be used to endorse or promote products derived from this software -- without specific prior written permission. -- -- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -- TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -- -- -- Language 2-letter codes and names defined by Library of Congress: -- -- http://www.loc.gov/standards/iso639-2 -- -- Territory codes defined by ISO 3166 -- -- http://www.iso.org/iso/list-en1-semic-3.txt -- with Ada.Characters.Handling; with Ada.Characters.Conversions; package body ZanyBlue.Text.Utils is function Escape_Character (C : Unicode_Character) return String; -- Return the String value for a Wide Character, either the character -- itself if within Character range or the Unicode_Escape string for -- the character. function Requires_Unicode_Escape (C : Unicode_Character) return Boolean; -- Determine if a Wide character requires Unicode escaping for strings. function Unicode_Escape (C : Unicode_Character) return String; -- Return Unicode escape sequence for a wide character, e.g., "\u009e". ----------------------- -- ASCII_Capitalize -- ----------------------- procedure ASCII_Capitalize (S : in out String) is begin for I in S loop S.Replace_Unicode (I, ASCII_Lowercase (S (I))); end loop; S.Replace_Unicode ((S.First), ASCII_Uppercase (S (S.First))); end ASCII_Capitalize; --------------------- -- ASCII_Lowercase -- --------------------- procedure ASCII_Lowercase (S : in out String) is begin for I in S loop S.Replace_Unicode (I, ASCII_Lowercase (S (I))); end loop; end ASCII_Lowercase; --------------------- -- ASCII_Lowercase -- --------------------- function ASCII_Lowercase (C : Unicode_Character) return Unicode_Character is use Ada.Characters.Handling; use Ada.Characters.Conversions; N_C : constant Character := To_Character (C); begin if Is_Letter (N_C) and then Is_Upper (N_C) then return To_Wide_Wide_Character (To_Lower (N_C)); else return C; end if; end ASCII_Lowercase; --------------------- -- ASCII_Uppercase -- --------------------- procedure ASCII_Uppercase (S : in out String) is begin for I in S loop S.Replace_Unicode (I, ASCII_Uppercase (S (I))); end loop; end ASCII_Uppercase; --------------------- -- ASCII_Uppercase -- --------------------- function ASCII_Uppercase (C : Unicode_Character) return Unicode_Character is use Ada.Characters.Handling; use Ada.Characters.Conversions; N_C : constant Character := To_Character (C); begin if Is_Letter (N_C) and then Is_Lower (N_C) then return To_Wide_Wide_Character (To_Upper (N_C)); else return C; end if; end ASCII_Uppercase; ---------------------- -- Escape_Character -- ---------------------- function Escape_Character (C : Unicode_Character) return String is begin if Requires_Unicode_Escape (C) then return Unicode_Escape (C); else return From_Unicode (C); end if; end Escape_Character; ------------------- -- Escape_String -- ------------------- function Escape_String (Source : String) return String is Result : String; begin for I in Source loop Result := Result & Escape_Character (Source (I)); end loop; return Result; end Escape_String; ---------------------- -- Non_Blank_Prefix -- ---------------------- function Non_Blank_Prefix (S : String) return String is begin for I in S loop if S (I) = ' ' then return S.Slice (S.First, I - 1); end if; end loop; return S; end Non_Blank_Prefix; ----------------------------- -- Requires_Unicode_Escape -- ----------------------------- function Requires_Unicode_Escape (C : Unicode_Character) return Boolean is Pos : constant Natural := Unicode_Character'Pos (C); Ch : Character; begin if Pos >= 127 then return True; end if; Ch := Character'Val (Pos); return Ch = '"' or else not Ada.Characters.Handling.Is_Graphic (Ch); end Requires_Unicode_Escape; ----------------- -- Starts_With -- ----------------- function Starts_With (S : String; Start : Positive; Prefix : String) return Boolean is begin return Head (S.Slice (Start, S.Last), Prefix.Length) = Prefix; end Starts_With; --------------------- -- Unescape_String -- --------------------- function Unescape_String (Source : String) return String is use Ada.Characters.Conversions; Wide_BS : constant Unicode_Character := To_Wide_Wide_Character (ASCII.BS); Wide_HT : constant Unicode_Character := To_Wide_Wide_Character (ASCII.HT); Wide_LF : constant Unicode_Character := To_Wide_Wide_Character (ASCII.LF); Wide_FF : constant Unicode_Character := To_Wide_Wide_Character (ASCII.FF); Wide_CR : constant Unicode_Character := To_Wide_Wide_Character (ASCII.CR); Buffer : String; WCh : Unicode_Character; Ch : Unicode_Character; I : Positive := Source.First; Done : Boolean; procedure Get (Ch : out Unicode_Character; Done : out Boolean); procedure Get (X : out Natural; Done : out Boolean); procedure Get (Ch : out Unicode_Character; Done : out Boolean) is begin Ch := 'x'; Done := I > Source.Last; if not Done then Ch := Source (I); I := I + 1; end if; end Get; procedure Get (X : out Natural; Done : out Boolean) is Offset : Natural; Base : Natural; Ch : Unicode_Character; begin Get (Ch, Done); case Ch is when '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => Offset := Character'Pos ('0'); Base := 0; when 'a' | 'b' | 'c' | 'd' | 'e' | 'f' => Offset := Character'Pos ('a'); Base := 10; when 'A' | 'B' | 'C' | 'D' | 'E' | 'F' => Offset := Character'Pos ('A'); Base := 10; when others => raise Unicode_Format_Error with To_Latin_1 (Source) & ':' & To_Character (Ch); end case; X := Base + Unicode_Character'Pos (Ch) - Offset; end Get; Digit : Natural; Value : Natural; begin Character_Loop : loop Get (Ch, Done); exit Character_Loop when Done; if Ch = '\' then Get (Ch, Done); exit Character_Loop when Done; case Ch is when 'b' => -- \b, backspace WCh := Wide_BS; when 'f' => -- \f, form feed WCh := Wide_FF; when 'n' => -- \n, newline WCh := Wide_LF; when 'r' => -- \r, carriage return WCh := Wide_CR; when 't' => -- \t, horizontal tab WCh := Wide_HT; when 'u' => -- Unicode escape Value := 0; Unicode_Character_Loop : for I in 1 .. 4 loop Get (Digit, Done); Value := Value * 16 + Digit; end loop Unicode_Character_Loop; WCh := Unicode_Character'Val (Value); when others => WCh := Ch; end case; else WCh := Ch; end if; Append (Buffer, WCh); end loop Character_Loop; return Buffer; end Unescape_String; -------------------- -- Unicode_Escape -- -------------------- function Unicode_Escape (C : Unicode_Character) return String is Hex_Map : constant String := "0123456789abcdef"; Result : String; Pos : Natural := Unicode_Character'Pos (C); begin for I in 1 .. 4 loop Result.Replace_Unicode ((5 - I), Hex_Map ((Pos rem 16) + 1)); Pos := Pos / 16; end loop; return "\u" & Result; end Unicode_Escape; end ZanyBlue.Text.Utils;