Overview
Comment: | tools/smaz: genericize Evaluate_Dictionary |
---|---|
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
bc86bc41eedf407f382301e89417a9b1 |
User & Date: | nat on 2016-12-03 22:29:29 |
Other Links: | manifest | tags |
Context
2016-12-04
| ||
20:06 | tools/smaz: genericize Optimize_Dictionary check-in: 5c617d9676 user: nat tags: trunk | |
2016-12-03
| ||
22:29 | tools/smaz: genericize Evaluate_Dictionary check-in: bc86bc41ee user: nat tags: trunk | |
2016-12-02
| ||
21:12 | tools/smaz: genericize Parallel_Evaluate_Dictionary check-in: 79a36ec957 user: nat tags: trunk | |
Changes
Modified tools/smaz.adb from [876acf9894] to [0fafdd0696].
︙ | ︙ | |||
121 122 123 124 125 126 127 | overriding procedure Argument (Handler : in out Callback; Argument : in String) is null; | < < < < < < < < < | 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | overriding procedure Argument (Handler : in out Callback; Argument : in String) is null; function Getopt_Config return Getopt.Configuration; -- Build the configuration object procedure Optimization_Round (Dict : in out Holders.Holder; Score : in out Ada.Streams.Stream_Element_Count; Counts : in out Tools_256.Dictionary_Counts; |
︙ | ︙ | |||
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 | function To_Dictionary (Handler : in Callback'Class; Input : in Natools.Smaz_Tools.String_Lists.List) return Natools.Smaz_256.Dictionary; -- Convert the input into a dictionary given the option in Handler generic type Dictionary (<>) is private; type Dictionary_Entry is (<>); type String_Count is range <>; type Dictionary_Counts is array (Dictionary_Entry) of String_Count; with package String_Lists is new Ada.Containers.Indefinite_Doubly_Linked_Lists (String); with procedure Evaluate_Dictionary_Partial (Dict : in Dictionary; Corpus_Entry : in String; Compressed_Size : in out Ada.Streams.Stream_Element_Count; Counts : in out Dictionary_Counts); package Dictionary_Subprograms is procedure Parallel_Evaluate_Dictionary (Job_Count : in Positive; Dict : in Dictionary; Corpus : in String_Lists.List; Compressed_Size : out Ada.Streams.Stream_Element_Count; Counts : out Dictionary_Counts); -- Return the same results as Natools.Smaz.Tools.Evaluate_Dictionary, -- but hopefully more quickly, using Job_Count tasks. end Dictionary_Subprograms; package body Dictionary_Subprograms is procedure Parallel_Evaluate_Dictionary (Job_Count : in Positive; Dict : in Dictionary; Corpus : in String_Lists.List; Compressed_Size : out Ada.Streams.Stream_Element_Count; Counts : out Dictionary_Counts) | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 | function To_Dictionary (Handler : in Callback'Class; Input : in Natools.Smaz_Tools.String_Lists.List) return Natools.Smaz_256.Dictionary; -- Convert the input into a dictionary given the option in Handler procedure Use_Dictionary (Dict : in out Natools.Smaz_256.Dictionary); -- Update Dictionary.Hash so that it can be actually used generic type Dictionary (<>) is private; type Dictionary_Entry is (<>); type String_Count is range <>; type Dictionary_Counts is array (Dictionary_Entry) of String_Count; with package String_Lists is new Ada.Containers.Indefinite_Doubly_Linked_Lists (String); with procedure Evaluate_Dictionary (Dict : in Dictionary; Corpus : in String_Lists.List; Compressed_Size : out Ada.Streams.Stream_Element_Count; Counts : out Dictionary_Counts); with procedure Evaluate_Dictionary_Partial (Dict : in Dictionary; Corpus_Entry : in String; Compressed_Size : in out Ada.Streams.Stream_Element_Count; Counts : in out Dictionary_Counts); with procedure Use_Dictionary (Dict : in out Dictionary) is <>; package Dictionary_Subprograms is procedure Evaluate_Dictionary (Job_Count : in Natural; Dict : in Dictionary; Corpus : in String_Lists.List; Compressed_Size : out Ada.Streams.Stream_Element_Count; Counts : out Dictionary_Counts); -- Dispatch to parallel or non-parallel version of -- Evaluate_Dictionary depending on Job_Count. procedure Parallel_Evaluate_Dictionary (Job_Count : in Positive; Dict : in Dictionary; Corpus : in String_Lists.List; Compressed_Size : out Ada.Streams.Stream_Element_Count; Counts : out Dictionary_Counts); -- Return the same results as Natools.Smaz.Tools.Evaluate_Dictionary, -- but hopefully more quickly, using Job_Count tasks. end Dictionary_Subprograms; package body Dictionary_Subprograms is procedure Evaluate_Dictionary (Job_Count : in Natural; Dict : in Dictionary; Corpus : in String_Lists.List; Compressed_Size : out Ada.Streams.Stream_Element_Count; Counts : out Dictionary_Counts) is Actual_Dict : Dictionary := Dict; begin Use_Dictionary (Actual_Dict); if Job_Count > 0 then Parallel_Evaluate_Dictionary (Job_Count, Actual_Dict, Corpus, Compressed_Size, Counts); else Evaluate_Dictionary (Actual_Dict, Corpus, Compressed_Size, Counts); end if; end Evaluate_Dictionary; procedure Parallel_Evaluate_Dictionary (Job_Count : in Positive; Dict : in Dictionary; Corpus : in String_Lists.List; Compressed_Size : out Ada.Streams.Stream_Element_Count; Counts : out Dictionary_Counts) |
︙ | ︙ | |||
304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 | Compressed_Size := 0; Counts := (others => 0); Parallel_Run (Cursor, Job_Count); end Parallel_Evaluate_Dictionary; end Dictionary_Subprograms; package Dict_256 is new Dictionary_Subprograms (Dictionary => Natools.Smaz_256.Dictionary, Dictionary_Entry => Ada.Streams.Stream_Element, String_Count => Natools.Smaz_Tools.String_Count, Dictionary_Counts => Tools_256.Dictionary_Counts, String_Lists => Natools.Smaz_Tools.String_Lists, Evaluate_Dictionary_Partial => Tools_256.Evaluate_Dictionary_Partial); overriding procedure Option (Handler : in out Callback; Id : in Options.Id; Argument : in String) is begin | > > > | 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 | Compressed_Size := 0; Counts := (others => 0); Parallel_Run (Cursor, Job_Count); end Parallel_Evaluate_Dictionary; end Dictionary_Subprograms; package Dict_256 is new Dictionary_Subprograms (Dictionary => Natools.Smaz_256.Dictionary, Dictionary_Entry => Ada.Streams.Stream_Element, String_Count => Natools.Smaz_Tools.String_Count, Dictionary_Counts => Tools_256.Dictionary_Counts, String_Lists => Natools.Smaz_Tools.String_Lists, Evaluate_Dictionary => Tools_256.Evaluate_Dictionary, Evaluate_Dictionary_Partial => Tools_256.Evaluate_Dictionary_Partial); overriding procedure Option (Handler : in out Callback; Id : in Options.Id; Argument : in String) is begin |
︙ | ︙ | |||
409 410 411 412 413 414 415 | Handler.Vlen_Verbatim := True; when Options.No_Vlen_Verbatim => Handler.Vlen_Verbatim := False; end case; end Option; | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 446 447 448 449 450 451 452 453 454 455 456 457 458 459 | Handler.Vlen_Verbatim := True; when Options.No_Vlen_Verbatim => Handler.Vlen_Verbatim := False; end case; end Option; function Getopt_Config return Getopt.Configuration is use Getopt; use Options; R : Getopt.Configuration; begin R.Add_Option ("ada-dict", 'A', Optional_Argument, Output_Ada_Dict); |
︙ | ︙ | |||
516 517 518 519 520 521 522 | Word : constant String := Natools.Smaz_Tools.String_Lists.Element (Position); New_Dict : constant Natools.Smaz_256.Dictionary := Tools_256.Append_String (Base, Word); New_Score : Ada.Streams.Stream_Element_Count; New_Counts : Tools_256.Dictionary_Counts; begin | | | 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 | Word : constant String := Natools.Smaz_Tools.String_Lists.Element (Position); New_Dict : constant Natools.Smaz_256.Dictionary := Tools_256.Append_String (Base, Word); New_Score : Ada.Streams.Stream_Element_Count; New_Counts : Tools_256.Dictionary_Counts; begin Dict_256.Evaluate_Dictionary (Job_Count, New_Dict, Input_Texts, New_Score, New_Counts); if New_Score < Score then Dict := Holders.To_Holder (New_Dict); Score := New_Score; Counts := New_Counts; New_Value := Ada.Strings.Unbounded.To_Unbounded_String (Word); |
︙ | ︙ | |||
566 567 568 569 570 571 572 | is Holder : Holders.Holder := Holders.To_Holder (Base); Pending : Natools.Smaz_Tools.String_Lists.List := Pending_Words; Score : Ada.Streams.Stream_Element_Count; Counts : Tools_256.Dictionary_Counts; Running : Boolean := True; begin | > | | 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 | is Holder : Holders.Holder := Holders.To_Holder (Base); Pending : Natools.Smaz_Tools.String_Lists.List := Pending_Words; Score : Ada.Streams.Stream_Element_Count; Counts : Tools_256.Dictionary_Counts; Running : Boolean := True; begin Dict_256.Evaluate_Dictionary (Job_Count, Base, Input_Texts, Score, Counts); while Running loop Optimization_Round (Holder, Score, Counts, Pending, |
︙ | ︙ | |||
911 912 913 914 915 916 917 | end if; when Actions.Evaluate => declare Total_Size : Ada.Streams.Stream_Element_Count; Counts : Tools_256.Dictionary_Counts; begin | | | 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 | end if; when Actions.Evaluate => declare Total_Size : Ada.Streams.Stream_Element_Count; Counts : Tools_256.Dictionary_Counts; begin Dict_256.Evaluate_Dictionary (Handler.Job_Count, Dictionary, Data_List, Total_Size, Counts); if Handler.Sx_Output then Sx_Output.Open_List; Sx_Output.Append_String (Ada.Strings.Fixed.Trim (Ada.Streams.Stream_Element_Count'Image (Total_Size), Ada.Strings.Both)); |
︙ | ︙ | |||
1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 | (Counter, Handler.Dict_Size, Handler.Score_Method), Handler.Vlen_Verbatim); end if; end; end case; end To_Dictionary; Opt_Config : constant Getopt.Configuration := Getopt_Config; Handler : Callback; Input_List, Input_Data : Natools.Smaz_Tools.String_Lists.List; begin Process_Command_Line : begin | > > > > > > > > > > > > > > > > > > > > > > | 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 | (Counter, Handler.Dict_Size, Handler.Score_Method), Handler.Vlen_Verbatim); end if; end; end case; end To_Dictionary; procedure Use_Dictionary (Dict : in out Natools.Smaz_256.Dictionary) is begin Natools.Smaz_Tools.Set_Dictionary_For_Trie_Search (Tools_256.To_String_List (Dict)); Dict.Hash := Natools.Smaz_Tools.Trie_Search'Access; for I in Dict.Offsets'Range loop if Natools.Smaz_Tools.Trie_Search (Natools.Smaz_256.Dict_Entry (Dict, I)) /= Natural (I) then Ada.Text_IO.Put_Line (Ada.Text_IO.Current_Error, "Fail at" & Ada.Streams.Stream_Element'Image (I) & " -> " & Natools.String_Escapes.C_Escape_Hex (Natools.Smaz_256.Dict_Entry (Dict, I), True) & " ->" & Natural'Image (Natools.Smaz_Tools.Trie_Search (Natools.Smaz_256.Dict_Entry (Dict, I)))); end if; end loop; end Use_Dictionary; Opt_Config : constant Getopt.Configuration := Getopt_Config; Handler : Callback; Input_List, Input_Data : Natools.Smaz_Tools.String_Lists.List; begin Process_Command_Line : begin |
︙ | ︙ |