zipada_58.0.0_08ab9be1/test/lz77_stats.adb

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
--  Gather some statistics about LZ77 DLE codes from UnZip.Decompress
--  (some_trace mode), and output it in a matrix form.
--
--  May help answering questions like:
--   - which distances and lengths are used, and how often ?
--   - which distance and length *pairs* are used, and how often ?

with Ada.Integer_Text_IO;               use Ada.Integer_Text_IO;
with Ada.Text_IO;                       use Ada.Text_IO;

procedure LZ77_Stats is
  LZ77_dump : File_Type;
  tag : String (1 .. 3);
  wrong_LZ77_tag : exception;
  a, b : Integer;
  stat        : File_Type;
  stat_name   : constant String := "LZ77_Stats.csv";
  sep         : constant Character := ';';

  subtype Distance_range is Integer range 1 .. 32768;
  subtype Length_range is Integer range 3 .. 258;

  d_stats : array (Distance_range) of Natural := (others => 0);
  l_stats : array (Length_range) of Natural := (others => 0);
  type DL_matrix is array (Distance_range, Length_range) of Natural;
  type p_DL_matrix is access DL_matrix;
  dl_stats : constant p_DL_matrix := new DL_matrix'(others => (others => 0));
  total : Natural := 0;

begin
  Open (LZ77_dump, In_File, "dump.lz77");  --  File from UnZip.Decompress, some_trace mode
  while not End_Of_File (LZ77_dump) loop
    Get (LZ77_dump, tag);
    if tag = "Lit" then
      Get (LZ77_dump, a);
    elsif tag = "DLE" then
      Get (LZ77_dump, a);
      Get (LZ77_dump, b);
      d_stats (a) := d_stats (a) + 1;
      l_stats (b) := l_stats (b) + 1;
      dl_stats (a, b) := dl_stats (a, b) + 1;
      total := total + 1;
    else
      raise wrong_LZ77_tag;
    end if;
    Skip_Line (LZ77_dump);
  end loop;
  Close (LZ77_dump);
  --
  --  Write stats
  --
  Create (stat, Out_File, stat_name);
  Put (stat, sep & sep & sep & "Length" & sep & sep);
  for l in Length_range loop
    Put (stat, l);
    Put (stat, sep);
  end loop;
  New_Line (stat, 2);
  Put (stat, sep & sep);
  Put (stat, total);
  Put (stat, sep & "stats" & sep & sep);
  for l in Length_range loop
    Put (stat, l_stats (l));
    Put (stat, sep);
  end loop;
  New_Line (stat);
  Put_Line (stat, "Distance" & sep & sep & "stats");
  New_Line (stat);
  for d in Distance_range loop
    Put (stat, d);
    Put (stat, sep & sep);
    Put (stat, d_stats (d));
    Put (stat, sep & sep & sep);
    for l in Length_range loop
      Put (stat, dl_stats (d, l));
      Put (stat, sep);
    end loop;
    New_Line (stat);
  end loop;
  Close (stat);
end LZ77_Stats;