with Ada.Characters.Conversions; with Ada.Command_Line; use Ada.Command_Line; with Ada.Integer_Text_IO; with Ada.Streams; use Ada.Streams; with Ada.Strings.UTF_Encoding.Wide_Wide_Strings; with Ada.Strings.UTF_Encoding.Strings; with Ada.Text_IO; with Ada.Wide_Wide_Text_IO; with GNAT.IO; procedure Utf8test is package Conv renames Ada.Characters.Conversions; package Enc8 renames Ada.Strings.UTF_Encoding.Strings; package Enc32 renames Ada.Strings.UTF_Encoding.Wide_Wide_Strings; package GIO renames GNAT.IO; package TIO renames Ada.Text_IO; package WIO renames Ada.Wide_Wide_Text_IO; function Byte_Image (B : Stream_Element) return String is use Ada.Integer_Text_IO; Img : String (1 .. 6); begin Put (Img, Integer (B), Base => 16); return Img (4 .. 5); end Byte_Image; type Charsets is (Latin1, Utf8); procedure Print_Bytes (S : String; -- Either latin1 or utf8 WWS : Wide_Wide_String; -- Always proper utf32 Label : String; Charset : Charsets) -- Say what S is is Bytes : Stream_Element_Array (1 .. S'Length); for Bytes'Address use S'Address; use Ada.Text_IO; begin Put (Label & " is" & S'Length'Image & " bytes: "); for B of Bytes loop Put (Byte_Image (B) & ":"); end loop; New_Line; GIO.Put_Line (Label & " GIO image is: " & S); if Charset = Latin1 then TIO.Put_Line (Label & " TIO image is: " & S); -- It breaks the terminal sometimes for utf8 sequences end if; WIO.Put_Line (Conv.To_Wide_Wide_String (Label) & " WWIO image is: " & WWS); case Charset is when Latin1 => WIO.Put_Line (Conv.To_Wide_Wide_String (Label & " latin1->utf32 image is: " & S)); when Utf8 => WIO.Put_Line (Enc32.Decode (Label & " utf8->utf32 image is: " & S)); end case; New_Line; end Print_Bytes; Ascii : constant String := "aeiou"; Lat1 : constant String := "áéíóú"; -- This is not unicode but Latin1, even -- if the source file is in utf8! Utf8_From_Latin1 : constant String := Enc8.Encode (String'("áéíóú")); Utf8_From_Utf32 : constant String := Enc32.Encode (Wide_Wide_String'("€")); begin Print_Bytes (Ascii, Conv.To_Wide_Wide_String (Ascii), "ascii", Latin1); Print_Bytes (Lat1, Conv.To_Wide_Wide_String (Lat1), "latin1", Latin1); Print_Bytes (Utf8_From_Latin1, Enc32.Decode (Utf8_From_Latin1), "lat1->utf8", Utf8); Print_Bytes (Utf8_From_Utf32, Enc32.Decode (Utf8_From_Utf32), "utf32->utf8", Utf8); for I in 1 .. Argument_Count loop Print_Bytes (Argument (I), Enc32.Decode (Argument (I)), -- Presuming Ada.Arguments strings are the raw input "Arg" & I'Image, Utf8); end loop; end Utf8test;