Natools

Check-in [90bba135f9]
Login
Overview
Comment:smaz: merge verbatim blocks when it improves compression
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 90bba135f9523785c21af52085404155711f2d47
User & Date: nat on 2016-09-16 19:59:10
Other Links: manifest | tags
Context
2016-09-17
19:15
smaz: add functions that return directly the filtered array check-in: f85deca432 user: nat tags: trunk
2016-09-16
19:59
smaz: merge verbatim blocks when it improves compression check-in: 90bba135f9 user: nat tags: trunk
2016-09-14
21:44
smaz-tests: make round-trip test really a round-trip

Since there are multiple encoded sequence that decompress into the same original string, it is much more important to check that compressed data really decompress into the original data than to check that it matches exactly the expected byte sequence check-in: e2fbcb314b user: nat tags: trunk

Changes

Modified src/natools-smaz.adb from [84a6bd2551] to [860a7091a3].

29
30
31
32
33
34
35



36
37
38
39
40
41
42
      Template : in String;
      Index : out Ada.Streams.Stream_Element;
      Length : out Natural);

   function To_String (Data : in Ada.Streams.Stream_Element_Array)
     return String;





   ------------------------------
   -- Local Helper Subprograms --
   ------------------------------

   function Dict_Entry
     (Dict : in Dictionary;







>
>
>







29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
      Template : in String;
      Index : out Ada.Streams.Stream_Element;
      Length : out Natural);

   function To_String (Data : in Ada.Streams.Stream_Element_Array)
     return String;

   function Verbatim_Size (Dict : Dictionary; Original_Size : Natural)
     return Ada.Streams.Stream_Element_Count;


   ------------------------------
   -- Local Helper Subprograms --
   ------------------------------

   function Dict_Entry
     (Dict : in Dictionary;
89
90
91
92
93
94
95











































96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
            Result (I) := Character'Val (Data
              (Data'First + Ada.Streams.Stream_Element_Offset (I - 1)));
         end loop;
      end return;
   end To_String;














































   ----------------------
   -- Public Interface --
   ----------------------

   function Compressed_Upper_Bound
     (Dict : in Dictionary;
      Input : in String)
     return Ada.Streams.Stream_Element_Count
   is
      Verbatim1_Max_Size : constant Natural
        := Natural (Ada.Streams.Stream_Element'Last - Dict.Dict_Last)
         - Boolean'Pos (Dict.Variable_Length_Verbatim);
      Verbatim2_Max_Size : constant Natural
        := Natural (Ada.Streams.Stream_Element'Last)
         + Verbatim1_Max_Size;
   begin
      if Dict.Variable_Length_Verbatim then
         return Ada.Streams.Stream_Element_Count (Input'Length
           + 2 * (Input'Length + Verbatim2_Max_Size - 1) / Verbatim2_Max_Size);
      else
         return Ada.Streams.Stream_Element_Count (Input'Length
           + (Input'Length + Verbatim1_Max_Size - 1) / Verbatim1_Max_Size);
      end if;
   end Compressed_Upper_Bound;


   procedure Compress
     (Dict : in Dictionary;
      Input : in String;
      Output_Buffer : out Ada.Streams.Stream_Element_Array;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>








|
<
<
<
<
<
<
<

<
|
<
<
<
<
<







92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150







151

152





153
154
155
156
157
158
159
            Result (I) := Character'Val (Data
              (Data'First + Ada.Streams.Stream_Element_Offset (I - 1)));
         end loop;
      end return;
   end To_String;


   function Verbatim_Size (Dict : Dictionary; Original_Size : Natural)
     return Ada.Streams.Stream_Element_Count
   is
      Verbatim1_Max_Size : constant Ada.Streams.Stream_Element_Count
        := Ada.Streams.Stream_Element_Count
            (Ada.Streams.Stream_Element'Last - Dict.Dict_Last)
         - Boolean'Pos (Dict.Variable_Length_Verbatim);
      Verbatim2_Max_Size : constant Ada.Streams.Stream_Element_Count
        := Ada.Streams.Stream_Element_Count (Ada.Streams.Stream_Element'Last)
         + Verbatim1_Max_Size;

      Remaining : Ada.Streams.Stream_Element_Count
        := Ada.Streams.Stream_Element_Count (Original_Size);
      Overhead : Ada.Streams.Stream_Element_Count := 0;
   begin
      if Dict.Variable_Length_Verbatim then
         if Remaining >= Verbatim2_Max_Size then
            declare
               Full_Blocks : constant Ada.Streams.Stream_Element_Count
                 := Remaining / Verbatim2_Max_Size;
            begin
               Overhead := Overhead + 2 * Full_Blocks;
               Remaining := Remaining - Verbatim2_Max_Size * Full_Blocks;
            end;
         end if;

         if Remaining > Verbatim1_Max_Size then
            Overhead := Overhead + 2;
            Remaining := 0;
         end if;
      end if;

      declare
         Full_Blocks : constant Ada.Streams.Stream_Element_Count
           := Remaining / Verbatim1_Max_Size;
      begin
         Overhead := Overhead + Full_Blocks;
      end;

      return Overhead + Ada.Streams.Stream_Element_Count (Original_Size);
   end Verbatim_Size;



   ----------------------
   -- Public Interface --
   ----------------------

   function Compressed_Upper_Bound
     (Dict : in Dictionary;
      Input : in String)
     return Ada.Streams.Stream_Element_Count is







   begin

      return Verbatim_Size (Dict, Input'Length);





   end Compressed_Upper_Bound;


   procedure Compress
     (Dict : in Dictionary;
      Input : in String;
      Output_Buffer : out Ada.Streams.Stream_Element_Array;
145
146
147
148
149
150
151



152
153
154
155
156
157
158
           (Dict,
            Input (Input_Index
                   .. Natural'Min (Input_Index + Dict.Max_Word_Length - 1,
                                   Input'Last)),
            Word,
            Length);
      end Find_Entry;



   begin
      Output_Last := Output_Buffer'First - 1;
      Find_Entry;

      Main_Loop :
      while Input_Index in Input'Range loop
         Data_In_Dict :







>
>
>







178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
           (Dict,
            Input (Input_Index
                   .. Natural'Min (Input_Index + Dict.Max_Word_Length - 1,
                                   Input'Last)),
            Word,
            Length);
      end Find_Entry;

      Previous_Verbatim_Beginning : Natural := 0;
      Previous_Verbatim_Last : Ada.Streams.Stream_Element_Offset := 0;
   begin
      Output_Last := Output_Buffer'First - 1;
      Find_Entry;

      Main_Loop :
      while Input_Index in Input'Range loop
         Data_In_Dict :
172
173
174
175
176
177
178













179
180
181
182
183
184
185
            Verbatim_Scan :
            while Length = 0 and Input_Index in Input'Range loop
               Input_Index := Input_Index + 1;
               Find_Entry;
            end loop Verbatim_Scan;

            Verbatim_Length := Input_Index - Beginning;














            Verbatim_Encode :
            while Verbatim_Length > 0 loop
               if Dict.Variable_Length_Verbatim
                 and then Verbatim_Length > Verbatim1_Max_Size
               then
                  Block_Length := Natural'Min







>
>
>
>
>
>
>
>
>
>
>
>
>







208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
            Verbatim_Scan :
            while Length = 0 and Input_Index in Input'Range loop
               Input_Index := Input_Index + 1;
               Find_Entry;
            end loop Verbatim_Scan;

            Verbatim_Length := Input_Index - Beginning;

            if Previous_Verbatim_Beginning > 0
              and then Output_Last + Verbatim_Size (Dict, Verbatim_Length)
                 > Previous_Verbatim_Last + Verbatim_Size
                    (Dict, Input_Index - Previous_Verbatim_Beginning)
            then
               Beginning := Previous_Verbatim_Beginning;
               Output_Last := Previous_Verbatim_Last;
               Verbatim_Length := Input_Index - Beginning;
            else
               Previous_Verbatim_Beginning := Beginning;
               Previous_Verbatim_Last := Output_Last;
            end if;

            Verbatim_Encode :
            while Verbatim_Length > 0 loop
               if Dict.Variable_Length_Verbatim
                 and then Verbatim_Length > Verbatim1_Max_Size
               then
                  Block_Length := Natural'Min