ADDED src/natools-smaz_generic-tools.adb Index: src/natools-smaz_generic-tools.adb ================================================================== --- src/natools-smaz_generic-tools.adb +++ src/natools-smaz_generic-tools.adb @@ -0,0 +1,379 @@ +------------------------------------------------------------------------------ +-- Copyright (c) 2016, Natacha Porté -- +-- -- +-- Permission to use, copy, modify, and distribute this software for any -- +-- purpose with or without fee is hereby granted, provided that the above -- +-- copyright notice and this permission notice appear in all copies. -- +-- -- +-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -- +-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -- +-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -- +-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -- +-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -- +-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -- +-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -- +------------------------------------------------------------------------------ + +package body Natools.Smaz_Generic.Tools is + + function Image (B : Boolean) return String; + -- Return correctly-cased image of B + + + ------------------------------ + -- Local Helper Subprograms -- + ------------------------------ + + function Image (B : Boolean) return String is + begin + if B then + return "True"; + else + return "False"; + end if; + end Image; + + + + ---------------------- + -- Public Interface -- + ---------------------- + + function Append_String + (Dict : in Dictionary; + Value : in String) + return Dictionary is + begin + return Dictionary' + (Last_Code => Dictionary_Code'Succ (Dict.Last_Code), + Values_Last => Dict.Values_Last + Value'Length, + Variable_Length_Verbatim => Dict.Variable_Length_Verbatim, + Max_Word_Length => Positive'Max (Dict.Max_Word_Length, Value'Length), + Offsets => Dict.Offsets + & (Dictionary_Code'First => Dict.Values_Last + 1), + Values => Dict.Values & Value, + Hash => Smaz_Tools.Dummy_Hash'Access); + end Append_String; + + + procedure Print_Dictionary_In_Ada + (Dict : in Dictionary; + Hash_Image : in String := "TODO"; + Max_Width : in Positive := 70; + First_Prefix : in String := " := ("; + Prefix : in String := " "; + Half_Indent : in String := " ") + is + procedure Append_Entity + (Buffer : in out String; + Last : in out Natural; + Entity : in String); + function Double_Quote (S : String; Count : Natural) return String; + function Offsets_Suffix (I : Dictionary_Code) return String; + function Strip_Image (S : String) return String; + function Values_Separator (I : Positive) return String; + + procedure Append_Entity + (Buffer : in out String; + Last : in out Natural; + Entity : in String) is + begin + if Last + 1 + Entity'Length <= Buffer'Last then + Buffer (Last + 1) := ' '; + Buffer (Last + 2 .. Last + 1 + Entity'Length) := Entity; + Last := Last + 1 + Entity'Length; + else + Put_Line (Buffer (Buffer'First .. Last)); + Last := Buffer'First + Prefix'Length - 1; + Buffer (Last + 1 .. Last + Half_Indent'Length) := Half_Indent; + Last := Last + Half_Indent'Length; + Buffer (Last + 1 .. Last + Entity'Length) := Entity; + Last := Last + Entity'Length; + end if; + end Append_Entity; + + function Double_Quote (S : String; Count : Natural) return String is + begin + if Count = 0 then + return S; + else + return Quoted : String (1 .. S'Length + Count) do + declare + O : Positive := Quoted'First; + begin + for I in S'Range loop + Quoted (O) := S (I); + O := O + 1; + + if S (I) = '"' then + Quoted (O) := S (I); + O := O + 1; + end if; + end loop; + end; + end return; + end if; + end Double_Quote; + + function Offsets_Suffix (I : Dictionary_Code) return String is + begin + if I < Dict.Offsets'Last then + return ","; + else + return "),"; + end if; + end Offsets_Suffix; + + function Strip_Image (S : String) return String is + begin + if S'Length > 0 and then S (S'First) = ' ' then + return S (S'First + 1 .. S'Last); + else + return S; + end if; + end Strip_Image; + + function Values_Separator (I : Positive) return String is + begin + if I > Dict.Values'First then + return "& "; + else + return ""; + end if; + end Values_Separator; + + Line_Buffer : String (1 .. Max_Width + Prefix'Length); + Buffer_Last : Natural; + begin + Put_Line (First_Prefix & "Last_Code =>" + & Dictionary_Code'Image (Dict.Last_Code) & ','); + Put_Line (Prefix & "Values_Last =>" + & Natural'Image (Dict.Values_Last) & ','); + Put_Line (Prefix & "Variable_Length_Verbatim => " + & Image (Dict.Variable_Length_Verbatim) & ','); + Put_Line (Prefix & "Max_Word_Length =>" + & Natural'Image (Dict.Max_Word_Length) & ','); + + Line_Buffer (1 .. Prefix'Length) := Prefix; + Line_Buffer (Prefix'Length + 1 .. Prefix'Length + 11) := "Offsets => "; + Buffer_Last := Prefix'Length + 11; + + for I in Dict.Offsets'Range loop + Append_Entity (Line_Buffer, Buffer_Last, Strip_Image + (Positive'Image (Dict.Offsets (I)) & Offsets_Suffix (I))); + + if I = Dict.Offsets'First then + Line_Buffer (Prefix'Length + 12) := '('; + end if; + end loop; + + Put_Line (Line_Buffer (Line_Buffer'First .. Buffer_Last)); + Line_Buffer (Prefix'Length + 1 .. Prefix'Length + 9) := "Values =>"; + Buffer_Last := Prefix'Length + 9; + + declare + I : Positive := Dict.Values'First; + First, Last : Positive; + Quote_Count : Natural; + begin + Values_Loop : + while I <= Dict.Values'Last loop + Add_Unprintable : + while Dict.Values (I) not in ' ' .. '~' loop + Append_Entity + (Line_Buffer, Buffer_Last, + Values_Separator (I) & Character'Image (Dict.Values (I))); + I := I + 1; + exit Values_Loop when I > Dict.Values'Last; + end loop Add_Unprintable; + + First := I; + Quote_Count := 0; + + Find_Printable_Substring : + loop + if Dict.Values (I) = '"' then + Quote_Count := Quote_Count + 1; + end if; + + I := I + 1; + exit Find_Printable_Substring when I > Dict.Values'Last + or else Dict.Values (I) not in ' ' .. '~'; + end loop Find_Printable_Substring; + + Last := I - 1; + + Split_Lines : + loop + declare + Partial_Quote_Count : Natural := 0; + Partial_Width : Natural := 0; + Partial_Last : Natural := First - 1; + Sep : constant String := Values_Separator (First); + Available_Length : constant Natural + := (if Line_Buffer'Last > Buffer_Last + Sep'Length + 4 + then Line_Buffer'Last - Buffer_Last - Sep'Length - 4 + else Line_Buffer'Length - Prefix'Length + - Half_Indent'Length - Sep'Length - 3); + begin + if 1 + Last - First + Quote_Count < Available_Length then + Append_Entity + (Line_Buffer, Buffer_Last, + Sep & '"' & Double_Quote + (Dict.Values (First .. Last), Quote_Count) & '"'); + exit Split_Lines; + else + Count_Quotes : + loop + if Dict.Values (Partial_Last + 1) = '"' then + exit Count_Quotes + when Partial_Width + 2 > Available_Length; + Partial_Width := Partial_Width + 1; + Partial_Quote_Count := Partial_Quote_Count + 1; + else + exit Count_Quotes + when Partial_Width + 1 > Available_Length; + end if; + + Partial_Width := Partial_Width + 1; + Partial_Last := Partial_Last + 1; + end loop Count_Quotes; + + Append_Entity + (Line_Buffer, Buffer_Last, Sep & '"' + & Double_Quote + (Dict.Values (First .. Partial_Last), + Partial_Quote_Count) + & '"'); + First := Partial_Last + 1; + Quote_Count := Quote_Count - Partial_Quote_Count; + end if; + end; + end loop Split_Lines; + end loop Values_Loop; + + Put_Line (Line_Buffer (Line_Buffer'First .. Buffer_Last) & ','); + end; + + Line_Buffer (Prefix'Length + 1 .. Prefix'Length + 7) := "Hash =>"; + Buffer_Last := Prefix'Length + 7; + Append_Entity (Line_Buffer, Buffer_Last, Hash_Image & ");"); + Put_Line (Line_Buffer (Line_Buffer'First .. Buffer_Last)); + end Print_Dictionary_In_Ada; + + + function Remove_Element + (Dict : in Dictionary; + Index : in Dictionary_Code) + return Dictionary + is + Removed_Length : constant Positive := Dict_Entry_Length (Dict, Index); + + function New_Offsets return Offset_Array; + function New_Values return String; + + function New_Offsets return Offset_Array is + Result : Offset_Array + (Dict.Offsets'First .. Dictionary_Code'Pred (Dict.Last_Code)); + begin + for I in Result'Range loop + if I < Index then + Result (I) := Dict.Offsets (I); + else + Result (I) := Dict.Offsets (Dictionary_Code'Succ (I)) + - Removed_Length; + end if; + end loop; + + return Result; + end New_Offsets; + + function New_Values return String is + begin + if Index < Dict.Last_Code then + return Dict.Values (1 .. Dict.Offsets (Index) - 1) + & Dict.Values (Dict.Offsets (Dictionary_Code'Succ (Index)) + .. Dict.Values'Last); + else + return Dict.Values (1 .. Dict.Offsets (Index) - 1); + end if; + end New_Values; + + New_Max_Word_Length : Positive := Dict.Max_Word_Length; + begin + if Removed_Length = Dict.Max_Word_Length then + New_Max_Word_Length := 1; + for I in Dict.Offsets'Range loop + if I /= Index + and then Dict_Entry (Dict, I)'Length > New_Max_Word_Length + then + New_Max_Word_Length := Dict_Entry (Dict, I)'Length; + end if; + end loop; + end if; + + return Dictionary' + (Last_Code => Dictionary_Code'Pred (Dict.Last_Code), + Values_Last => Dict.Values_Last - Removed_Length, + Variable_Length_Verbatim => Dict.Variable_Length_Verbatim, + Max_Word_Length => New_Max_Word_Length, + Offsets => New_Offsets, + Values => New_Values, + Hash => Smaz_Tools.Dummy_Hash'Access); + end Remove_Element; + + + function To_Dictionary + (List : in String_Lists.List; + Variable_Length_Verbatim : in Boolean) + return Dictionary + is + Code_After_Last : Dictionary_Code := Dictionary_Code'First; + String_Size : Natural := 0; + Max_Word_Length : Positive := 1; + begin + for S of List loop + Code_After_Last := Dictionary_Code'Succ (Code_After_Last); + String_Size := String_Size + S'Length; + + if S'Length > Max_Word_Length then + Max_Word_Length := S'Length; + end if; + end loop; + + declare + Last_Code : constant Dictionary_Code + := Dictionary_Code'Pred (Code_After_Last); + Offsets : Offset_Array + (Dictionary_Code'Succ (Dictionary_Code'First) .. Last_Code); + Values : String (1 .. String_Size); + Current_Offset : Positive := 1; + Current_Index : Dictionary_Code := Dictionary_Code'First; + Next_Offset : Positive; + begin + for S of List loop + if Current_Index in Offsets'Range then + Offsets (Current_Index) := Current_Offset; + end if; + + Next_Offset := Current_Offset + S'Length; + Values (Current_Offset .. Next_Offset - 1) := S; + Current_Offset := Next_Offset; + Current_Index := Dictionary_Code'Succ (Current_Index); + end loop; + + pragma Assert (Current_Index = Code_After_Last); + pragma Assert (Current_Offset = String_Size + 1); + + return + (Last_Code => Last_Code, + Values_Last => String_Size, + Variable_Length_Verbatim => Variable_Length_Verbatim, + Max_Word_Length => Max_Word_Length, + Offsets => Offsets, + Values => Values, + Hash => Smaz_Tools.Dummy_Hash'Access); + end; + end To_Dictionary; + +end Natools.Smaz_Generic.Tools; ADDED src/natools-smaz_generic-tools.ads Index: src/natools-smaz_generic-tools.ads ================================================================== --- src/natools-smaz_generic-tools.ads +++ src/natools-smaz_generic-tools.ads @@ -0,0 +1,132 @@ +------------------------------------------------------------------------------ +-- Copyright (c) 2016, Natacha Porté -- +-- -- +-- Permission to use, copy, modify, and distribute this software for any -- +-- purpose with or without fee is hereby granted, provided that the above -- +-- copyright notice and this permission notice appear in all copies. -- +-- -- +-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -- +-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -- +-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -- +-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -- +-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -- +-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -- +-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -- +------------------------------------------------------------------------------ + +------------------------------------------------------------------------------ +-- Natools.Smaz_Generic.Tools provides tools specific to the dictionary -- +-- implementation. These tools are useful for dictionary manipulation, -- +-- even though the intended use of this Smaz implementation is through a -- +-- global constant dictionary object. -- +------------------------------------------------------------------------------ + +with Ada.Containers; +with Natools.Smaz_Tools; + +generic +package Natools.Smaz_Generic.Tools is + pragma Preelaborate; + + package String_Lists renames Smaz_Tools.String_Lists; + + + function To_Dictionary + (List : in String_Lists.List; + Variable_Length_Verbatim : in Boolean) + return Dictionary + with Pre => String_Lists.Length (List) in 1 .. + Ada.Containers.Count_Type (Ada.Streams.Stream_Element'Last); + -- Build a Dictionary object from a string list + -- Note that Hash is set to a placeholder which unconditionnally + -- raises Program_Error when called. + + generic + with procedure Put_Line (Line : String); + procedure Print_Dictionary_In_Ada + (Dict : in Dictionary; + Hash_Image : in String := "TODO"; + Max_Width : in Positive := 70; + First_Prefix : in String := " := ("; + Prefix : in String := " "; + Half_Indent : in String := " "); + -- Output Ada code corresponding to the value of the dictionary. + -- Note that Prefix is the actual base indentation, while Half_Indent + -- is added beyond Prefix before values continued on another line. + -- Frist_Prefix is used instead of Prefix on the first line. + -- All the defaults value are what was used to generate the constant + -- in Natools.Smaz_Original. + + function Remove_Element + (Dict : in Dictionary; + Index : in Dictionary_Code) + return Dictionary + with Pre => Index <= Dict.Last_Code, + Post => Dict.Last_Code = Dictionary_Code'Succ + (Remove_Element'Result.Last_Code) + and then (Index = Dictionary_Code'First + or else (for all I in Dictionary_Code'First + .. Dictionary_Code'Pred (Index) + => Dict_Entry (Dict, I) + = Dict_Entry (Remove_Element'Result, I))) + and then (Index = Dict.Last_Code + or else (for all I in Index + .. Dictionary_Code'Pred (Dict.Last_Code) + => Dict_Entry (Dict, Dictionary_Code'Succ (I)) + = Dict_Entry (Remove_Element'Result, I))); + -- Return a new dictionary equal to Dict without element for Index + + function Append_String + (Dict : in Dictionary; + Value : in String) + return Dictionary + with Pre => Dict.Last_Code < Dictionary_Code'Last + and then Value'Length > 0, + Post => Dict.Last_Code = Dictionary_Code'Pred + (Append_String'Result.Last_Code) + and then (for all I in Dictionary_Code'First .. Dict.Last_Code + => Dict_Entry (Dict, I) + = Dict_Entry (Append_String'Result, I)) + and then Dict_Entry (Append_String'Result, + Append_String'Result.Last_Code) + = Value; + -- Return a new dictionary with Value appended + + + type Dictionary_Counts is + array (Dictionary_Code) of Smaz_Tools.String_Count; + + function Score_Encoded + (Dict : in Dictionary; + Counts : in Dictionary_Counts; + E : in Dictionary_Code) + return Smaz_Tools.Score_Value + is (Smaz_Tools.Score_Encoded (Counts (E), Dict_Entry_Length (Dict, E))); + -- Score value using the amount of encoded data using E + + function Score_Frequency + (Dict : in Dictionary; + Counts : in Dictionary_Counts; + E : in Dictionary_Code) + return Smaz_Tools.Score_Value + is (Smaz_Tools.Score_Frequency (Counts (E), Dict_Entry_Length (Dict, E))); + -- Score value using the number of times E was used + + function Score_Gain + (Dict : in Dictionary; + Counts : in Dictionary_Counts; + E : in Dictionary_Code) + return Smaz_Tools.Score_Value + is (Smaz_Tools.Score_Gain (Counts (E), Dict_Entry_Length (Dict, E))); + -- Score value using the number of bytes saved using E + + function Score + (Dict : in Dictionary; + Counts : in Dictionary_Counts; + E : in Dictionary_Code; + Method : in Smaz_Tools.Methods.Enum) + return Smaz_Tools.Score_Value + is (Smaz_Tools.Score (Counts (E), Dict_Entry_Length (Dict, E), Method)); + -- Scare value with dynamically chosen method + +end Natools.Smaz_Generic.Tools;