Natools

Check-in [4b73acb2a8]
Login
Overview
Comment:smaz_generic: new package attempting generic Smaz-like compression
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 4b73acb2a8e38f012c38be461007808f036efe81
User & Date: nat on 2016-11-13 21:42:05
Other Links: manifest | tags
Context
2016-11-14
20:19
smaz_implementations-base_256: component to instantiate standard Smaz check-in: b13d7f70e3 user: nat tags: trunk
2016-11-13
21:42
smaz_generic: new package attempting generic Smaz-like compression check-in: 4b73acb2a8 user: nat tags: trunk
2016-11-12
20:15
tools/smaz: use variable length verbatim option on built dictionaries check-in: 33d57890f4 user: nat tags: trunk
Changes

Added src/natools-smaz_generic.adb version [328ba1ead5].





































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
------------------------------------------------------------------------------
-- Copyright (c) 2016, Natacha Porté                                        --
--                                                                          --
-- Permission to use, copy, modify, and distribute this software for any    --
-- purpose with or without fee is hereby granted, provided that the above   --
-- copyright notice and this permission notice appear in all copies.        --
--                                                                          --
-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES --
-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF         --
-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR  --
-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   --
-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN    --
-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF  --
-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.           --
------------------------------------------------------------------------------

package body Natools.Smaz_Generic is

   use type Ada.Streams.Stream_Element_Offset;


   procedure Find_Entry
     (Dict : in Dictionary;
      Template : in String;
      Code : out Dictionary_Code;
      Length : out Natural);
      --  Try to find the longest entry in Dict that is a prefix of Template,
      --  setting Length to 0 when no such entry exists.

   function Verbatim_Size
     (Dict : in Dictionary;
      Length : in Positive)
     return Ada.Streams.Stream_Element_Count
   is (Verbatim_Size (Length, Dict.Last_Code, Dict.Variable_Length_Verbatim));
      --  Wrapper around the formal Verbatim_Size


   ------------------------------
   -- Local Helper Subprograms --
   ------------------------------

   procedure Find_Entry
     (Dict : in Dictionary;
      Template : in String;
      Code : out Dictionary_Code;
      Length : out Natural)
   is
      N : Natural;
      Is_Valid : Boolean;
   begin
      Length := 0;

      for Last in reverse Template'Range loop
         Is_Valid := False;
         N := Dict.Hash (Template (Template'First .. Last));

         To_Code :
         begin
            Code := Dictionary_Code'Val (N);
            if Is_Valid_Code (Dict, Code) then
               Is_Valid := True;
            end if;
         exception
            when Constraint_Error => null;
         end To_Code;

         if Is_Valid
           and then Dict_Entry (Dict, Code) = Template (Template'First .. Last)
         then
            Length := 1 + Last - Template'First;
            return;
         end if;
      end loop;
   end Find_Entry;



   ----------------------
   -- Public Interface --
   ----------------------

   function Compressed_Upper_Bound
     (Dict : in Dictionary;
      Input : in String)
     return Ada.Streams.Stream_Element_Count is
   begin
      return Verbatim_Size
        (Input'Length, Dict.Last_Code, Dict.Variable_Length_Verbatim);
   end Compressed_Upper_Bound;


   procedure Compress
     (Dict : in Dictionary;
      Input : in String;
      Output_Buffer : out Ada.Streams.Stream_Element_Array;
      Output_Last : out Ada.Streams.Stream_Element_Offset)
   is
      procedure Find_Current_Entry;

      Input_Index : Positive := Input'First;
      Length : Natural;
      Code : Dictionary_Code;
      Output_Index : Ada.Streams.Stream_Element_Offset;

      procedure Find_Current_Entry is
      begin
         Find_Entry
           (Dict,
            Input (Input_Index
                   .. Natural'Min (Input_Index + Dict.Max_Word_Length - 1,
                                   Input'Last)),
            Code,
            Length);
      end Find_Current_Entry;

      Previous_Verbatim_Beginning : Natural := 0;
      Previous_Verbatim_Index : Ada.Streams.Stream_Element_Offset := 0;
   begin
      Output_Index := Output_Buffer'First;
      Find_Current_Entry;

      Main_Loop :
      while Input_Index in Input'Range loop
         Data_In_Dict :
         while Length > 0 loop
            Write_Code (Output_Buffer, Output_Index, Code);
            Input_Index := Input_Index + Length;
            exit Main_Loop when Input_Index not in Input'Range;
            Find_Current_Entry;
         end loop Data_In_Dict;

         Verbatim_Block :
         declare
            Beginning : Positive := Input_Index;
            Verbatim_Length : Natural;
         begin
            Verbatim_Scan :
            while Length = 0 and Input_Index in Input'Range loop
               Input_Index := Input_Index + 1;
               Find_Current_Entry;
            end loop Verbatim_Scan;

            Verbatim_Length := Input_Index - Beginning;

            if Previous_Verbatim_Beginning > 0
              and then Output_Index + Verbatim_Size (Dict, Verbatim_Length)
                 >= Previous_Verbatim_Index + Verbatim_Size
                    (Dict, Input_Index - Previous_Verbatim_Beginning)
            then
               Beginning := Previous_Verbatim_Beginning;
               Output_Index := Previous_Verbatim_Index;
               Verbatim_Length := Input_Index - Beginning;
            else
               Previous_Verbatim_Beginning := Beginning;
               Previous_Verbatim_Index := Output_Index;
            end if;

            Write_Verbatim
              (Output_Buffer, Output_Index,
               Input (Beginning .. Input_Index - 1),
               Dict.Last_Code, Dict.Variable_Length_Verbatim);
         end Verbatim_Block;
      end loop Main_Loop;

      Output_Last := Output_Index - 1;
   end Compress;


   function Compress (Dict : in Dictionary; Input : in String)
     return Ada.Streams.Stream_Element_Array
   is
      Result : Ada.Streams.Stream_Element_Array
        (1 .. Compressed_Upper_Bound (Dict, Input));
      Last : Ada.Streams.Stream_Element_Offset;
   begin
      Compress (Dict, Input, Result, Last);
      return Result (Result'First .. Last);
   end Compress;


   function Decompressed_Length
     (Dict : in Dictionary;
      Input : in Ada.Streams.Stream_Element_Array)
     return Natural
   is
      Result : Natural := 0;
      Input_Index : Ada.Streams.Stream_Element_Offset := Input'First;
      Code : Dictionary_Code;
      Verbatim_Length : Natural;
   begin
      while Input_Index in Input'Range loop
         Read_Code
           (Input, Input_Index,
            Code, Verbatim_Length,
            Dict.Last_Code, Dict.Variable_Length_Verbatim);

         if Verbatim_Length > 0 then
            Skip_Verbatim (Input, Input_Index, Verbatim_Length);
            Result := Result + Verbatim_Length;
         else
            Result := Result + Dict_Entry_Length (Dict, Code);
         end if;
      end loop;

      return Result;
   end Decompressed_Length;


   procedure Decompress
     (Dict : in Dictionary;
      Input : in Ada.Streams.Stream_Element_Array;
      Output_Buffer : out String;
      Output_Last : out Natural)
   is
      Input_Index : Ada.Streams.Stream_Element_Offset := Input'First;
      Code : Dictionary_Code;
      Verbatim_Length : Natural;
   begin
      Output_Last := Output_Buffer'First - 1;

      while Input_Index in Input'Range loop
         Read_Code
           (Input, Input_Index,
            Code, Verbatim_Length,
            Dict.Last_Code, Dict.Variable_Length_Verbatim);

         if Verbatim_Length > 0 then
            Read_Verbatim
              (Input, Input_Index,
               Output_Buffer
                 (Output_Last + 1 .. Output_Last + Verbatim_Length));
            Output_Last := Output_Last + Verbatim_Length;
         else
            declare
               Decoded : constant String := Dict_Entry (Dict, Code);
            begin
               Output_Buffer (Output_Last + 1 .. Output_Last + Decoded'Length)
                 := Decoded;
               Output_Last := Output_Last + Decoded'Length;
            end;
         end if;
      end loop;
   end Decompress;


   function Decompress
     (Dict : in Dictionary; Input : in Ada.Streams.Stream_Element_Array)
     return String
   is
      Result : String (1 .. Decompressed_Length (Dict, Input));
      Last : Natural;
   begin
      Decompress (Dict, Input, Result, Last);
      pragma Assert (Last = Result'Last);
      return Result;
   end Decompress;

end Natools.Smaz_Generic;

Added src/natools-smaz_generic.ads version [1239416aea].







































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
------------------------------------------------------------------------------
-- Copyright (c) 2016, Natacha Porté                                        --
--                                                                          --
-- Permission to use, copy, modify, and distribute this software for any    --
-- purpose with or without fee is hereby granted, provided that the above   --
-- copyright notice and this permission notice appear in all copies.        --
--                                                                          --
-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES --
-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF         --
-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR  --
-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   --
-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN    --
-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF  --
-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.           --
------------------------------------------------------------------------------

with Ada.Streams;

generic
   type Dictionary_Code is (<>);

   with procedure Read_Code
     (Input : in Ada.Streams.Stream_Element_Array;
      Offset : in out Ada.Streams.Stream_Element_Offset;
      Code : out Dictionary_Code;
      Verbatim_Length : out Natural;
      Last_Code : in Dictionary_Code;
      Variable_Length_Verbatim : in Boolean);

   with procedure Read_Verbatim
     (Input : in Ada.Streams.Stream_Element_Array;
      Offset : in out Ada.Streams.Stream_Element_Offset;
      Output : out String);

   with procedure Skip_Verbatim
     (Input : in Ada.Streams.Stream_Element_Array;
      Offset : in out Ada.Streams.Stream_Element_Offset;
      Verbatim_Length : in Positive);

   with function Verbatim_Size
     (Input_Length : in Positive;
      Last_Code : in Dictionary_Code;
      Variable_Length_Verbatim : in Boolean)
     return Ada.Streams.Stream_Element_Count;

   with procedure Write_Code
     (Output : in out Ada.Streams.Stream_Element_Array;
      Offset : in out Ada.Streams.Stream_Element_Offset;
      Code : in Dictionary_Code);

   with procedure Write_Verbatim
     (Output : in out Ada.Streams.Stream_Element_Array;
      Offset : in out Ada.Streams.Stream_Element_Offset;
      Input : in String;
      Last_Code : in Dictionary_Code;
      Variable_Length_Verbatim : in Boolean);

package Natools.Smaz_Generic is
   pragma Pure;

   type Offset_Array is array (Dictionary_Code range <>) of Positive;

   type Dictionary
     (Last_Code : Dictionary_Code;
      Values_Last : Natural)
   is record
      Variable_Length_Verbatim : Boolean;
      Max_Word_Length : Positive;
      Offsets : Offset_Array
        (Dictionary_Code'Succ (Dictionary_Code'First) .. Last_Code);
      Values : String (1 .. Values_Last);
      Hash : not null access function (Value : String) return Natural;
   end record
     with Dynamic_Predicate =>
           (for all Code in Dictionary.Offsets'Range
            => Dictionary.Offsets (Code) in Dictionary.Values'Range)
        and then (for all Code in Dictionary_Code'First .. Dictionary.Last_Code
            => Code_Last (Dictionary.Offsets, Code, Dictionary.Values'Last) + 1
               - Code_First (Dictionary.Offsets, Code, Dictionary.Values'First)
               in 1 .. Dictionary.Max_Word_Length);


   function Code_First
     (Offsets : in Offset_Array;
      Code : in Dictionary_Code;
      Fallback : in Positive)
     return Positive
     is (if Code in Offsets'Range then Offsets (Code) else Fallback);
      --  Return the first index of the value for Code in Dictionary.Values

   function Code_Last
     (Offsets : in Offset_Array;
      Code : in Dictionary_Code;
      Fallback : in Natural)
     return Natural
     is (if Dictionary_Code'Succ (Code) in Offsets'Range
         then Offsets (Dictionary_Code'Succ (Code)) - 1
         else Fallback);
      --  Return the value index of the value for Code in Dictionary.Values

   function Is_Valid_Code
     (Dict : in Dictionary;
      Code : in Dictionary_Code)
     return Boolean
     is (Code in Dictionary_Code'First .. Dict.Last_Code);
      --  Return whether Code exists in Dict

   function Dict_Entry
     (Dict : in Dictionary;
      Code : in Dictionary_Code)
     return String
     is (Dict.Values (Code_First (Dict.Offsets, Code, Dict.Values'First)
                    .. Code_Last (Dict.Offsets, Code, Dict.Values'Last)))
     with Pre => Is_Valid_Code (Dict, Code);
      --  Return the string for at the given Index in Dict

   function Dict_Entry_Length
     (Dict : in Dictionary;
      Code : in Dictionary_Code)
     return Positive
     is (1 + Code_Last (Dict.Offsets, Code, Dict.Values'Last)
           - Code_First (Dict.Offsets, Code, Dict.Values'First))
     with Pre => Is_Valid_Code (Dict, Code);
      --  Return the length of the string for at the given Index in Dict


   function Compressed_Upper_Bound
     (Dict : in Dictionary;
      Input : in String)
     return Ada.Streams.Stream_Element_Count;
      --  Return the maximum number of bytes needed to encode Input

   procedure Compress
     (Dict : in Dictionary;
      Input : in String;
      Output_Buffer : out Ada.Streams.Stream_Element_Array;
      Output_Last : out Ada.Streams.Stream_Element_Offset);
      --  Encode Input into Output_Buffer

   function Compress (Dict : in Dictionary; Input : in String)
     return Ada.Streams.Stream_Element_Array;
      --  Return an encoded buffer for Input


   function Decompressed_Length
     (Dict : in Dictionary;
      Input : in Ada.Streams.Stream_Element_Array)
     return Natural;
      --  Return the exact length when Input is decoded

   procedure Decompress
     (Dict : in Dictionary;
      Input : in Ada.Streams.Stream_Element_Array;
      Output_Buffer : out String;
      Output_Last : out Natural);
      --  Decode Input into Output_Buffer

   function Decompress
     (Dict : in Dictionary; Input : in Ada.Streams.Stream_Element_Array)
     return String;
      --  Return a decoded buffer for Input

end Natools.Smaz_Generic;