Index: tools/smaz.adb ================================================================== --- tools/smaz.adb +++ tools/smaz.adb @@ -38,10 +38,12 @@ renames Natools.S_Expressions.To_Atom; package Holders is new Ada.Containers.Indefinite_Holders (Natools.Smaz.Dictionary, Natools.Smaz."="); + type Score_Value is range 0 .. 2 ** 31 - 1; + package Actions is type Enum is (Nothing, Decode, Encode, @@ -129,10 +131,17 @@ Job_Count : in Natural; Updated : out Boolean); -- Try to improve on Dict by replacing a single entry from it with -- one of the substring in Pending_Words. + function Length + (Dictionary : in Natools.Smaz.Dictionary; + E : in Ada.Streams.Stream_Element) + return Score_Value + is (Natools.Smaz.Dict_Entry (Dictionary, E)'Length); + -- Length of a dictionary entry + function Optimize_Dictionary (Base : in Natools.Smaz.Dictionary; Pending_Words : in Natools.Smaz.Tools.String_Lists.List; Input_Texts : in Natools.Smaz.Tools.String_Lists.List; Job_Count : in Natural) @@ -162,10 +171,34 @@ procedure Print_Help (Opt : in Getopt.Configuration; Output : in Ada.Text_IO.File_Type); -- Print the help text to the given file + function Score_Encoded + (Dictionary : in Natools.Smaz.Dictionary; + Counts : in Natools.Smaz.Tools.Dictionary_Counts; + E : Ada.Streams.Stream_Element) + return Score_Value + is (Score_Value (Counts (E)) * Length (Dictionary, E)); + -- Score value using the amount of encoded data using E + + function Score_Frequency + (Dictionary : in Natools.Smaz.Dictionary; + Counts : in Natools.Smaz.Tools.Dictionary_Counts; + E : Ada.Streams.Stream_Element) + return Score_Value + is (Score_Value (Counts (E))); + -- Score value using the number of times E was used + + function Score_Gain + (Dictionary : in Natools.Smaz.Dictionary; + Counts : in Natools.Smaz.Tools.Dictionary_Counts; + E : Ada.Streams.Stream_Element) + return Score_Value + is (Score_Value (Counts (E)) * (Length (Dictionary, E) - 1)); + -- Score value using the number of bytes saved using E + function To_Dictionary (Handler : in Callback'Class; Input : in Natools.Smaz.Tools.String_Lists.List) return Natools.Smaz.Dictionary; -- Convert the input into a dictionary given the option in Handler @@ -874,40 +907,30 @@ Sx_Output.Close_List; end if; if Handler.Stat_Output then declare - type Score_Value is range 0 .. 2 ** 31 - 1; - - function Length (E : Ada.Streams.Stream_Element) - return Score_Value - is (Natools.Smaz.Dict_Entry (Dictionary, E)'Length); - - function Encoded (E : Ada.Streams.Stream_Element) - return Score_Value - is (Score_Value (Counts (E)) * Length (E)); - function Frequency (E : Ada.Streams.Stream_Element) - return Score_Value - is (Score_Value (Counts (E))); - function Gain (E : Ada.Streams.Stream_Element) - return Score_Value - is (Score_Value (Counts (E)) * (Length (E) - 1)); - procedure Print (Label : in String; E : in Ada.Streams.Stream_Element; Score : in Score_Value); procedure Print_Min_Max (Label : in String; Score : not null access function - (E : Ada.Streams.Stream_Element) return Score_Value); + (D : in Natools.Smaz.Dictionary; + C : in Natools.Smaz.Tools.Dictionary_Counts; + E : in Ada.Streams.Stream_Element) + return Score_Value); procedure Print_Value (Label : in String; Score : not null access function - (E : Ada.Streams.Stream_Element) return Score_Value; + (D : in Natools.Smaz.Dictionary; + C : in Natools.Smaz.Tools.Dictionary_Counts; + E : in Ada.Streams.Stream_Element) + return Score_Value; Ref : in Score_Value); procedure Print (Label : in String; @@ -936,17 +959,21 @@ end Print; procedure Print_Min_Max (Label : in String; Score : not null access function - (E : Ada.Streams.Stream_Element) return Score_Value) + (D : in Natools.Smaz.Dictionary; + C : in Natools.Smaz.Tools.Dictionary_Counts; + E : in Ada.Streams.Stream_Element) + return Score_Value) is - Min_Score, Max_Score : Score_Value := Score (0); + Min_Score, Max_Score : Score_Value + := Score (Dictionary, Counts, 0); S : Score_Value; begin for E in 1 .. Dictionary.Dict_Last loop - S := Score (E); + S := Score (Dictionary, Counts, E); if S < Min_Score then Min_Score := S; end if; if S > Max_Score then Max_Score := S; @@ -958,33 +985,36 @@ end Print_Min_Max; procedure Print_Value (Label : in String; Score : not null access function - (E : Ada.Streams.Stream_Element) return Score_Value; + (D : in Natools.Smaz.Dictionary; + C : in Natools.Smaz.Tools.Dictionary_Counts; + E : in Ada.Streams.Stream_Element) + return Score_Value; Ref : in Score_Value) is begin if Handler.Sx_Output then Sx_Output.Open_List; Sx_Output.Append_String (Label); end if; for E in Dictionary.Offsets'Range loop - if Score (E) = Ref then + if Score (Dictionary, Counts, E) = Ref then Print (Label, E, Ref); end if; end loop; if Handler.Sx_Output then Sx_Output.Close_List; end if; end Print_Value; begin - Print_Min_Max ("encoded", Encoded'Access); - Print_Min_Max ("frequency", Frequency'Access); - Print_Min_Max ("gain", Gain'Access); + Print_Min_Max ("encoded", Score_Encoded'Access); + Print_Min_Max ("frequency", Score_Frequency'Access); + Print_Min_Max ("gain", Score_Gain'Access); end; end if; end; end case; end Build_Dictionary; end Smaz;