zipada_58.0.0_08ab9be1/zip_lib/lzma.ads

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
--  LZMA library
----------------
--  Library for encoding and decoding data streams in the LZMA compression
--  format invented by Igor Pavlov.
--
--  Pure Ada 95+ code, 100% portable: OS-, CPU- and compiler- independent.

--  Legal licensing note:

--  Copyright (c) 2016 .. 2019 Gautier de Montmollin
--  SWITZERLAND

--  Permission is hereby granted, free of charge, to any person obtaining a copy
--  of this software and associated documentation files (the "Software"), to deal
--  in the Software without restriction, including without limitation the rights
--  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
--  copies of the Software, and to permit persons to whom the Software is
--  furnished to do so, subject to the following conditions:

--  The above copyright notice and this permission notice shall be included in
--  all copies or substantial portions of the Software.

--  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
--  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
--  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
--  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
--  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
--  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
--  THE SOFTWARE.

--  NB: this is the MIT License, as found 21-Aug-2016 on the site
--  http://www.opensource.org/licenses/mit-license.php

with Ada.Direct_IO;  --  Only used for the type Data_Bytes_Count below.
with Interfaces;
with System;

package LZMA is

  --  The compression and decompression procedures are located
  --  in child packages LZMA.Encoding and LZMA.Decoding respectively.

  --  Bits of last byte being used as context.
  --    With the value 8, LZMA uses a complete Markov chain for predicting
  --    a literal from the previous one, like PKZip's Reduce format.
  subtype Literal_context_bits_range  is Integer range 0 .. 8;

  --  Position mod 2**bits is used, but for literal context only.
  subtype Literal_position_bits_range is Integer range 0 .. 4;

  --  Position mod 2**bits is used in various places.
  subtype Position_bits_range         is Integer range 0 .. 4;

  Default_dictionary_size : constant := 2 ** 15;  --  32 KiB, like Deflate.

  subtype Byte is Interfaces.Unsigned_8;

  --  Ada.Direct_IO is only there for the Data_Bytes_Count type.
  --  In case you want to avoid reference to Ada.Direct_IO,
  --  you can customize the definition of Data_Bytes_Count, provided
  --  it has enough capacity for counting bytes in the streams involved.
  package BIO is new Ada.Direct_IO (Byte);
  subtype Data_Bytes_Count is BIO.Count;

private

  use Interfaces;

  --  These integer types are defined in the LZMA specification
  --  (DRAFT version, 2015-06-14, by Igor Pavlov)

  type Unsigned is mod 2 ** System.Word_Size;
  subtype UInt64 is Unsigned_64;
  subtype UInt32 is Unsigned_32;
  subtype UInt16 is Unsigned_16;

  ----------------------------
  --  Finite state machine  --
  ----------------------------

  States_count : constant := 12;  --  LZMA specification name: "kNumStates"
  subtype State_range is Unsigned range 0 .. States_count - 1;
  type Transition is array (State_range) of State_range;

  ------------------------------------ From ...  0  1  2  3  4  5  6   7   8   9  10  11
  Update_State_Literal  : constant Transition := (0, 0, 0, 0, 1, 2, 3,  4,  5,  6,  4,  5);
  Update_State_Match    : constant Transition := (7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10);
  Update_State_Rep      : constant Transition := (8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11);
  Update_State_ShortRep : constant Transition := (9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11);

  --  Context for improving compression of aligned data,
  --  modulo 2**n = 2, 4, 8 or 16 (max) bytes, or disabled: n = 0.
  Max_pos_bits : constant := 4;  --  LZMA specification name: "kNumPosBitsMax"
  Max_pos_states_count : constant := 2**Max_pos_bits;
  subtype Pos_state_range is Unsigned range 0 .. Max_pos_states_count - 1;

  ----------------------------------------
  --  Probability model for bit coding  --
  ----------------------------------------

  Probability_model_bits  : constant := 11;  --  LZMA specification name: "kNumBitModelTotalBits"
  Probability_model_count : constant := 2 ** Probability_model_bits;

  Probability_change_bits : constant := 5;   --  LZMA specification name: "kNumMoveBits"

  --  All probabilities are initialized with p=0.5. LZMA specification name: "PROB_INIT_VAL"
  Initial_probability : constant := Probability_model_count / 2;

  --  Type for storing probabilities, must have at least Probability_model_bits bits.
  --  LZMA specification recommends UInt16. LzmaEnc.c uses UInt16 or optionally UInt32.
  type CProb is new UInt16;

  --  Integer (signed) used as index because there is a -1 (unused) index in Pos_coder_range.
  type CProb_array is array (Integer range <>) of CProb;

  Align_bits       : constant := 4;  --  LZMA specification name: "kNumAlignBits"
  Align_table_size : constant := 2 ** Align_bits;
  Align_mask       : constant := Align_table_size - 1;

  subtype Bits_3_range is Integer range 0 .. 2**3 - 1;
  subtype Bits_6_range is Integer range 0 .. 2**6 - 1;
  subtype Bits_8_range is Integer range 0 .. 2**8 - 1;
  subtype Bits_NAB_range is Integer range 0 .. 2**Align_bits - 1;

  subtype Probs_3_bits is CProb_array (Bits_3_range);
  subtype Probs_6_bits is CProb_array (Bits_6_range);
  subtype Probs_8_bits is CProb_array (Bits_8_range);
  subtype Probs_NAB_bits is CProb_array (Bits_NAB_range);

  --------------------------------------------------
  --  Probabilities for the binary decision tree  --
  --------------------------------------------------

  type Probs_state is array (State_range) of CProb;
  type Probs_state_and_pos_state is array (State_range, Pos_state_range) of CProb;

  type Probs_for_switches is record
    --  This is the context for the switch between a Literal and a LZ Distance-Length code
    match     : Probs_state_and_pos_state := (others => (others => Initial_probability));
    --  These are contexts for various repetition modes
    rep       : Probs_state := (others => Initial_probability);
    rep_g0    : Probs_state := (others => Initial_probability);
    rep_g1    : Probs_state := (others => Initial_probability);
    rep_g2    : Probs_state := (others => Initial_probability);
    rep0_long : Probs_state_and_pos_state := (others => (others => Initial_probability));
  end record;

  ------------------------------------
  --  Probabilities for LZ lengths  --
  ------------------------------------

  type Low_mid_coder_probs is array (Pos_state_range) of Probs_3_bits;

  --  Probabilities used for encoding LZ lengths. LZMA specification name: "CLenDecoder"
  type Probs_for_LZ_Lengths is record
    choice_1   : CProb               := Initial_probability;  --  0: low coder; 1: mid or high
    choice_2   : CProb               := Initial_probability;  --  0: mid; 1: high
    low_coder  : Low_mid_coder_probs := (others => (others => Initial_probability));
    mid_coder  : Low_mid_coder_probs := (others => (others => Initial_probability));
    high_coder : Probs_8_bits        := (others => Initial_probability);
  end record;

  --------------------------------------
  --  Probabilities for LZ distances  --
  --------------------------------------

  Len_to_pos_states  : constant := 4;
  subtype Slot_coder_range is Unsigned range 0 .. Len_to_pos_states - 1;
  type Slot_coder_probs is array (Slot_coder_range) of Probs_6_bits;
  Dist_slot_bits : constant := 6;  --  "kNumPosSlotBits"

  Start_dist_model_index : constant :=  4;  --  "kStartPosModelIndex"
  End_dist_model_index   : constant := 14;  --  LZMA specification name: "kEndPosModelIndex"
  Num_full_distances  : constant := 2 ** (End_dist_model_index / 2);  --  "kNumFullDistances"

  --  Pos_coder_range: index -1 is never used as such but appears
  --  when calling Bit_Tree_Reverse_Encode (as in the original C version, RcTree_ReverseEncode).
  subtype Pos_coder_range is Integer range -1 .. Num_full_distances - End_dist_model_index;
  subtype Pos_coder_probs is CProb_array (Pos_coder_range);

  type Probs_for_LZ_Distances is record
    slot_coder  : Slot_coder_probs := (others => (others => Initial_probability));
    align_coder : Probs_NAB_bits   := (others => Initial_probability);
    pos_coder   : Pos_coder_probs  := (others => Initial_probability);
  end record;

  --------------------------------------
  --  All probabilities used by LZMA  --
  --------------------------------------

  type All_probabilities (last_lit_prob_index : Integer) is record
    --  Literals:
    lit     : CProb_array (0 .. last_lit_prob_index) := (others => Initial_probability);
    --  Distances:
    dist    : Probs_for_LZ_Distances;
    --  Lengths:
    len     : Probs_for_LZ_Lengths;
    rep_len : Probs_for_LZ_Lengths;
    --  Decision tree switches:
    switch  : Probs_for_switches;
  end record;

  -------------
  --  Misc.  --
  -------------

  --  Minimum dictionary (= plain text buffer of n previous bytes)
  --  size is 4096. LZMA specification name: "LZMA_DIC_MIN"
  Min_dictionary_size : constant := 2 ** 12;

  --  Log2-style encoding of LZ lengths
  Len_low_bits     : constant := 3;
  Len_low_symbols  : constant := 2 ** Len_low_bits;
  Len_mid_bits     : constant := 3;
  Len_mid_symbols  : constant := 2 ** Len_mid_bits;
  Len_high_bits    : constant := 8;
  Len_high_symbols : constant := 2 ** Len_high_bits;
  Len_symbols      : constant := Len_low_symbols + Len_mid_symbols + Len_high_symbols;

  Min_match_length : constant := 2;  --  "LZMA_MATCH_LEN_MIN"
  Max_match_length : constant := Min_match_length + Len_symbols - 1;  --  "LZMA_MATCH_LEN_MAX"

  subtype Match_length_range is Integer range Min_match_length .. Max_match_length;

  --  Fake distance, used as an end-of-stream marker.
  end_of_stream_magic_distance : constant := 16#FFFF_FFFF#;

  --------------------------------------------------
  --  Binary values of various decision switches  --
  --------------------------------------------------

  --  LZ literal vs. DL code
  Literal_choice : constant := 0;
  DL_code_choice : constant := 1;

  --  Within DL code: "Simple match" vs. "Rep match"
  Simple_match_choice : constant := 0;
  Rep_match_choice    : constant := 1;

  --  Within "Rep match": "Distance is rep0" vs. "Distance is not rep0"
  The_distance_is_rep0_choice     : constant := 0;
  The_distance_is_not_rep0_choice : constant := 1;
  --  Within "Distance is rep0":
  The_length_is_1_choice     : constant := 0;
  The_length_is_not_1_choice : constant := 1;
  --  Within "Distance is not rep0": "Distance is rep1" vs. "Distance is not rep1"
  The_distance_is_rep1_choice     : constant := 0;
  The_distance_is_not_rep1_choice : constant := 1;
  --  Within "Distance is not rep1": "Distance is rep2" vs. "Distance is not rep2"
  The_distance_is_rep2_choice     : constant := 0;
  The_distance_is_not_rep2_choice : constant := 1;

  ----------------------
  --  Range encoding  --
  ----------------------

  --  Normalization threshold. When the range width is below that value,
  --  a shift is needed.
  width_threshold : constant := 2**24;  --  LZMA specification name: "kTopValue"

  --  The following article (the only reference in the LZMA specification)
  --  explains how range encoding works:
  --
  --     G. N. N. Martin, Range encoding: an algorithm for removing redundancy
  --     from a digitized message, Video & Data Recording Conference,
  --     Southampton, UK, July 24-27, 1979.

end LZMA;