Index: src/natools-smaz.adb ================================================================== --- src/natools-smaz.adb +++ src/natools-smaz.adb @@ -21,22 +21,28 @@ function Dict_Entry (Dict : in Dictionary; Index : in Ada.Streams.Stream_Element) return String with Pre => Index <= Dict.Dict_Last; + -- Return the string for at the given Index in Dict procedure Find_Entry (Dict : in Dictionary; Template : in String; Index : out Ada.Streams.Stream_Element; Length : out Natural); + -- Try to find the longest entry in Dict that is a prefix of Template, + -- setting Length to 0 when no such entry exists. function To_String (Data : in Ada.Streams.Stream_Element_Array) return String; + -- Convert a stream element array into a string function Verbatim_Size (Dict : Dictionary; Original_Size : Natural) return Ada.Streams.Stream_Element_Count; + -- Return the number of bytes needed by the verbatim encoding + -- of Original_Size bytes. ------------------------------ -- Local Helper Subprograms -- ------------------------------ Index: src/natools-smaz.ads ================================================================== --- src/natools-smaz.ads +++ src/natools-smaz.ads @@ -12,10 +12,40 @@ -- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -- -- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -- -- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -- ------------------------------------------------------------------------------ +------------------------------------------------------------------------------ +-- Natools.Smaz is a re-implementation of the short string compression -- +-- algorithm "Smaz" by Salvatore Sanfilippo -- +-- (see https://github.com/antirez/smaz). -- +-- Its main selling point is its simplicity and CPU performance. However -- +-- the implementation here emphasizes correctness (which greatly benefits -- +-- from simplicity) over performance (so no benchmarks have been made). -- +-- -- +-- The basic idea behind the algorithm is that bytes in the encoded (and -- +-- hopefully compressed) message are indexes in a static compiled-in -- +-- dictionary, and two special byte values to mark verbatim data. -- +-- -- +-- For example, using original Smaz dictionary, the string "Athe33" is -- +-- encoded as (254, 65, 1, 255, 1, 51, 51), which can be broken down as: -- +-- * 254 to mark the following byte as verbatim -- +-- * 65 which is verbatim byte for 'A' -- +-- * 1 to mark the second word in the dictionary: "the" -- +-- * 255 to mark variable-length verbatim escape -- +-- * 1 to encoding the length of the verbatim fragment: 2 bytes -- +-- * 51, 51 the verbatim bytes for "33". -- +-- -- +-- Note that the encoder has been improved over the original Smaz encoder, -- +-- in that it merges adjacent verbatim fragments when it makes the output -- +-- smaller. For example, with the input 5-byte string "33 33", the original -- +-- naive encoder would produce the 9-byte output -- +-- (255, 1, 51, 51, 0, 255, 1, 51, 51), while encoder here would encode the -- +-- whole string in a single verbatim fragment, leading to the 7-byte output -- +-- (255, 4, 51, 51, 32, 51, 51). -- +------------------------------------------------------------------------------ + with Ada.Streams; package Natools.Smaz is pragma Pure (Natools.Smaz); @@ -45,32 +75,38 @@ function Compressed_Upper_Bound (Dict : in Dictionary; Input : in String) return Ada.Streams.Stream_Element_Count; + -- Return the maximum number of bytes needed to encode Input procedure Compress (Dict : in Dictionary; Input : in String; Output_Buffer : out Ada.Streams.Stream_Element_Array; Output_Last : out Ada.Streams.Stream_Element_Offset); + -- Encode Input into Output_Buffer function Compress (Dict : in Dictionary; Input : in String) return Ada.Streams.Stream_Element_Array; + -- Return an encoded buffer for Input function Decompressed_Length (Dict : in Dictionary; Input : in Ada.Streams.Stream_Element_Array) return Natural; + -- Return the exact length when Input is decoded procedure Decompress (Dict : in Dictionary; Input : in Ada.Streams.Stream_Element_Array; Output_Buffer : out String; Output_Last : out Natural); + -- Decode Input into Output_Buffer function Decompress (Dict : in Dictionary; Input : in Ada.Streams.Stream_Element_Array) return String; + -- Return a decoded buffer for Input end Natools.Smaz;