Natools

Check-in [9922c8afaf]
Login
Overview
Comment:smaz: add missing documentation comments
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 9922c8afaf33f117954a924066cb1705b68c304a
User & Date: nat on 2016-09-25 21:14:17
Other Links: manifest | tags
Context
2016-09-26
21:22
tools/smaz: add decompression of an input list of encoded strings check-in: 6f2cf4bf88 user: nat tags: trunk
2016-09-25
21:14
smaz: add missing documentation comments check-in: 9922c8afaf user: nat tags: trunk
2016-09-24
21:53
tools/smaz: add the output of statistics about compression check-in: 939809f44a user: nat tags: trunk
Changes

Modified src/natools-smaz.adb from [e6c590a339] to [e861783700].

19
20
21
22
23
24
25

26
27
28
29
30
31


32
33
34

35
36
37


38
39
40
41
42
43
44
   use type Ada.Streams.Stream_Element_Offset;

   function Dict_Entry
     (Dict : in Dictionary;
      Index : in Ada.Streams.Stream_Element)
     return String
     with Pre => Index <= Dict.Dict_Last;


   procedure Find_Entry
     (Dict : in Dictionary;
      Template : in String;
      Index : out Ada.Streams.Stream_Element;
      Length : out Natural);



   function To_String (Data : in Ada.Streams.Stream_Element_Array)
     return String;


   function Verbatim_Size (Dict : Dictionary; Original_Size : Natural)
     return Ada.Streams.Stream_Element_Count;




   ------------------------------
   -- Local Helper Subprograms --
   ------------------------------

   function Dict_Entry







>






>
>



>



>
>







19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
   use type Ada.Streams.Stream_Element_Offset;

   function Dict_Entry
     (Dict : in Dictionary;
      Index : in Ada.Streams.Stream_Element)
     return String
     with Pre => Index <= Dict.Dict_Last;
      --  Return the string for at the given Index in Dict

   procedure Find_Entry
     (Dict : in Dictionary;
      Template : in String;
      Index : out Ada.Streams.Stream_Element;
      Length : out Natural);
      --  Try to find the longest entry in Dict that is a prefix of Template,
      --  setting Length to 0 when no such entry exists.

   function To_String (Data : in Ada.Streams.Stream_Element_Array)
     return String;
      --  Convert a stream element array into a string

   function Verbatim_Size (Dict : Dictionary; Original_Size : Natural)
     return Ada.Streams.Stream_Element_Count;
      --  Return the number of bytes needed by the verbatim encoding
      --  of Original_Size bytes.


   ------------------------------
   -- Local Helper Subprograms --
   ------------------------------

   function Dict_Entry

Modified src/natools-smaz.ads from [ddb34c3613] to [083f639e5f].

10
11
12
13
14
15
16






























17
18
19
20
21
22
23
-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR  --
-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   --
-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN    --
-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF  --
-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.           --
------------------------------------------------------------------------------































with Ada.Streams;

package Natools.Smaz is
   pragma Pure (Natools.Smaz);

   use type Ada.Streams.Stream_Element;








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR  --
-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   --
-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN    --
-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF  --
-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.           --
------------------------------------------------------------------------------

------------------------------------------------------------------------------
-- Natools.Smaz is a re-implementation of the short string compression      --
-- algorithm "Smaz" by Salvatore Sanfilippo                                 --
-- (see https://github.com/antirez/smaz).                                   --
-- Its main selling point is its simplicity and CPU performance. However    --
-- the implementation here emphasizes correctness (which greatly benefits   --
-- from simplicity) over performance (so no benchmarks have been made).     --
--                                                                          --
-- The basic idea behind the algorithm is that bytes in the encoded (and    --
-- hopefully compressed) message are indexes in a static compiled-in        --
-- dictionary, and two special byte values to mark verbatim data.           --
--                                                                          --
-- For example, using original Smaz dictionary, the string "Athe33" is      --
-- encoded as (254, 65, 1, 255, 1, 51, 51), which can be broken down as:    --
--   * 254 to mark the following byte as verbatim                           --
--   * 65 which is verbatim byte for 'A'                                    --
--   * 1 to mark the second word in the dictionary: "the"                   --
--   * 255 to mark variable-length verbatim escape                          --
--   * 1 to encoding the length of the verbatim fragment: 2 bytes           --
--   * 51, 51 the verbatim bytes for "33".                                  --
--                                                                          --
-- Note that the encoder has been improved over the original Smaz encoder,  --
-- in that it merges adjacent verbatim fragments when it makes the output   --
-- smaller. For example, with the input 5-byte string "33 33", the original --
-- naive encoder would produce the 9-byte output                            --
-- (255, 1, 51, 51, 0, 255, 1, 51, 51), while encoder here would encode the --
-- whole string in a single verbatim fragment, leading to the 7-byte output --
-- (255, 4, 51, 51, 32, 51, 51).                                            --
------------------------------------------------------------------------------

with Ada.Streams;

package Natools.Smaz is
   pragma Pure (Natools.Smaz);

   use type Ada.Streams.Stream_Element;

43
44
45
46
47
48
49

50
51
52
53
54
55

56
57
58

59
60
61
62
63
64

65
66
67
68
69
70

71
72
73
74

75
76
                  in 1 .. Dictionary.Max_Word_Length));


   function Compressed_Upper_Bound
     (Dict : in Dictionary;
      Input : in String)
     return Ada.Streams.Stream_Element_Count;


   procedure Compress
     (Dict : in Dictionary;
      Input : in String;
      Output_Buffer : out Ada.Streams.Stream_Element_Array;
      Output_Last : out Ada.Streams.Stream_Element_Offset);


   function Compress (Dict : in Dictionary; Input : in String)
     return Ada.Streams.Stream_Element_Array;



   function Decompressed_Length
     (Dict : in Dictionary;
      Input : in Ada.Streams.Stream_Element_Array)
     return Natural;


   procedure Decompress
     (Dict : in Dictionary;
      Input : in Ada.Streams.Stream_Element_Array;
      Output_Buffer : out String;
      Output_Last : out Natural);


   function Decompress
     (Dict : in Dictionary; Input : in Ada.Streams.Stream_Element_Array)
     return String;


end Natools.Smaz;







>






>



>






>






>




>


73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
                  in 1 .. Dictionary.Max_Word_Length));


   function Compressed_Upper_Bound
     (Dict : in Dictionary;
      Input : in String)
     return Ada.Streams.Stream_Element_Count;
      --  Return the maximum number of bytes needed to encode Input

   procedure Compress
     (Dict : in Dictionary;
      Input : in String;
      Output_Buffer : out Ada.Streams.Stream_Element_Array;
      Output_Last : out Ada.Streams.Stream_Element_Offset);
      --  Encode Input into Output_Buffer

   function Compress (Dict : in Dictionary; Input : in String)
     return Ada.Streams.Stream_Element_Array;
      --  Return an encoded buffer for Input


   function Decompressed_Length
     (Dict : in Dictionary;
      Input : in Ada.Streams.Stream_Element_Array)
     return Natural;
      --  Return the exact length when Input is decoded

   procedure Decompress
     (Dict : in Dictionary;
      Input : in Ada.Streams.Stream_Element_Array;
      Output_Buffer : out String;
      Output_Last : out Natural);
      --  Decode Input into Output_Buffer

   function Decompress
     (Dict : in Dictionary; Input : in Ada.Streams.Stream_Element_Array)
     return String;
      --  Return a decoded buffer for Input

end Natools.Smaz;