Overview
Comment: | tools/smaz: refactor scores out of the evaluation block |
---|---|
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
fbe80ac18454a2c9f4a7e966b549580f |
User & Date: | nat on 2016-10-28 20:56:40 |
Other Links: | manifest | tags |
Context
2016-10-29
| ||
20:29 | tools/smaz: actually implement dictionary optimization check-in: fb7d230fe5 user: nat tags: trunk | |
2016-10-28
| ||
20:56 | tools/smaz: refactor scores out of the evaluation block check-in: fbe80ac184 user: nat tags: trunk | |
2016-10-27
| ||
21:58 | tools/smaz: add a command-line option for optimized dictionary build check-in: cbe3489d15 user: nat tags: trunk | |
Changes
Modified tools/smaz.adb from [ac7f848c5d] to [7214a27560].
︙ | ︙ | |||
36 37 38 39 40 41 42 43 44 45 46 47 48 49 | procedure Smaz is function To_SEA (S : String) return Ada.Streams.Stream_Element_Array renames Natools.S_Expressions.To_Atom; package Holders is new Ada.Containers.Indefinite_Holders (Natools.Smaz.Dictionary, Natools.Smaz."="); package Actions is type Enum is (Nothing, Decode, Encode, Evaluate); end Actions; | > > | 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | procedure Smaz is function To_SEA (S : String) return Ada.Streams.Stream_Element_Array renames Natools.S_Expressions.To_Atom; package Holders is new Ada.Containers.Indefinite_Holders (Natools.Smaz.Dictionary, Natools.Smaz."="); type Score_Value is range 0 .. 2 ** 31 - 1; package Actions is type Enum is (Nothing, Decode, Encode, Evaluate); end Actions; |
︙ | ︙ | |||
127 128 129 130 131 132 133 134 135 136 137 138 139 140 | Pending_Words : in out Natools.Smaz.Tools.String_Lists.List; Input_Texts : in Natools.Smaz.Tools.String_Lists.List; Job_Count : in Natural; Updated : out Boolean); -- Try to improve on Dict by replacing a single entry from it with -- one of the substring in Pending_Words. function Optimize_Dictionary (Base : in Natools.Smaz.Dictionary; Pending_Words : in Natools.Smaz.Tools.String_Lists.List; Input_Texts : in Natools.Smaz.Tools.String_Lists.List; Job_Count : in Natural) return Natools.Smaz.Dictionary; -- Optimize the dictionary on Input_Texts, starting with Base and | > > > > > > > | 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | Pending_Words : in out Natools.Smaz.Tools.String_Lists.List; Input_Texts : in Natools.Smaz.Tools.String_Lists.List; Job_Count : in Natural; Updated : out Boolean); -- Try to improve on Dict by replacing a single entry from it with -- one of the substring in Pending_Words. function Length (Dictionary : in Natools.Smaz.Dictionary; E : in Ada.Streams.Stream_Element) return Score_Value is (Natools.Smaz.Dict_Entry (Dictionary, E)'Length); -- Length of a dictionary entry function Optimize_Dictionary (Base : in Natools.Smaz.Dictionary; Pending_Words : in Natools.Smaz.Tools.String_Lists.List; Input_Texts : in Natools.Smaz.Tools.String_Lists.List; Job_Count : in Natural) return Natools.Smaz.Dictionary; -- Optimize the dictionary on Input_Texts, starting with Base and |
︙ | ︙ | |||
160 161 162 163 164 165 166 167 168 169 170 171 172 173 | -- print the given dictionary in the given file procedure Print_Help (Opt : in Getopt.Configuration; Output : in Ada.Text_IO.File_Type); -- Print the help text to the given file function To_Dictionary (Handler : in Callback'Class; Input : in Natools.Smaz.Tools.String_Lists.List) return Natools.Smaz.Dictionary; -- Convert the input into a dictionary given the option in Handler | > > > > > > > > > > > > > > > > > > > > > > > > | 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 | -- print the given dictionary in the given file procedure Print_Help (Opt : in Getopt.Configuration; Output : in Ada.Text_IO.File_Type); -- Print the help text to the given file function Score_Encoded (Dictionary : in Natools.Smaz.Dictionary; Counts : in Natools.Smaz.Tools.Dictionary_Counts; E : Ada.Streams.Stream_Element) return Score_Value is (Score_Value (Counts (E)) * Length (Dictionary, E)); -- Score value using the amount of encoded data using E function Score_Frequency (Dictionary : in Natools.Smaz.Dictionary; Counts : in Natools.Smaz.Tools.Dictionary_Counts; E : Ada.Streams.Stream_Element) return Score_Value is (Score_Value (Counts (E))); -- Score value using the number of times E was used function Score_Gain (Dictionary : in Natools.Smaz.Dictionary; Counts : in Natools.Smaz.Tools.Dictionary_Counts; E : Ada.Streams.Stream_Element) return Score_Value is (Score_Value (Counts (E)) * (Length (Dictionary, E) - 1)); -- Score value using the number of bytes saved using E function To_Dictionary (Handler : in Callback'Class; Input : in Natools.Smaz.Tools.String_Lists.List) return Natools.Smaz.Dictionary; -- Convert the input into a dictionary given the option in Handler |
︙ | ︙ | |||
872 873 874 875 876 877 878 | Sx_Output.Close_List; end loop; Sx_Output.Close_List; end if; if Handler.Stat_Output then declare | < < < < < < < < < < < < < < < < > > | > > > | > | 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 | Sx_Output.Close_List; end loop; Sx_Output.Close_List; end if; if Handler.Stat_Output then declare procedure Print (Label : in String; E : in Ada.Streams.Stream_Element; Score : in Score_Value); procedure Print_Min_Max (Label : in String; Score : not null access function (D : in Natools.Smaz.Dictionary; C : in Natools.Smaz.Tools.Dictionary_Counts; E : in Ada.Streams.Stream_Element) return Score_Value); procedure Print_Value (Label : in String; Score : not null access function (D : in Natools.Smaz.Dictionary; C : in Natools.Smaz.Tools.Dictionary_Counts; E : in Ada.Streams.Stream_Element) return Score_Value; Ref : in Score_Value); procedure Print (Label : in String; E : in Ada.Streams.Stream_Element; Score : in Score_Value) is |
︙ | ︙ | |||
934 935 936 937 938 939 940 | & Score_Value'Image (Score)); end if; end Print; procedure Print_Min_Max (Label : in String; Score : not null access function | > > | > | > | > > | > | | | | | 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 | & Score_Value'Image (Score)); end if; end Print; procedure Print_Min_Max (Label : in String; Score : not null access function (D : in Natools.Smaz.Dictionary; C : in Natools.Smaz.Tools.Dictionary_Counts; E : in Ada.Streams.Stream_Element) return Score_Value) is Min_Score, Max_Score : Score_Value := Score (Dictionary, Counts, 0); S : Score_Value; begin for E in 1 .. Dictionary.Dict_Last loop S := Score (Dictionary, Counts, E); if S < Min_Score then Min_Score := S; end if; if S > Max_Score then Max_Score := S; end if; end loop; Print_Value ("best-" & Label, Score, Max_Score); Print_Value ("worst-" & Label, Score, Min_Score); end Print_Min_Max; procedure Print_Value (Label : in String; Score : not null access function (D : in Natools.Smaz.Dictionary; C : in Natools.Smaz.Tools.Dictionary_Counts; E : in Ada.Streams.Stream_Element) return Score_Value; Ref : in Score_Value) is begin if Handler.Sx_Output then Sx_Output.Open_List; Sx_Output.Append_String (Label); end if; for E in Dictionary.Offsets'Range loop if Score (Dictionary, Counts, E) = Ref then Print (Label, E, Ref); end if; end loop; if Handler.Sx_Output then Sx_Output.Close_List; end if; end Print_Value; begin Print_Min_Max ("encoded", Score_Encoded'Access); Print_Min_Max ("frequency", Score_Frequency'Access); Print_Min_Max ("gain", Score_Gain'Access); end; end if; end; end case; end Build_Dictionary; end Smaz; |