Overview
Comment: | tools/smaz: actually implement dictionary optimization |
---|---|
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
fb7d230fe5900baca2b05464b4924ec3 |
User & Date: | nat on 2016-10-29 20:29:20 |
Other Links: | manifest | tags |
Context
2016-10-30
| ||
18:19 | tools/sxcat: add a command-line option to output a list of input atoms check-in: ac88f5abfb user: nat tags: trunk | |
2016-10-29
| ||
20:29 | tools/smaz: actually implement dictionary optimization check-in: fb7d230fe5 user: nat tags: trunk | |
2016-10-28
| ||
20:56 | tools/smaz: refactor scores out of the evaluation block check-in: fbe80ac184 user: nat tags: trunk | |
Changes
Modified tools/smaz.adb from [7214a27560] to [fa5ab81891].
︙ | ︙ | |||
199 200 201 202 203 204 205 206 207 208 209 210 211 212 | function To_Dictionary (Handler : in Callback'Class; Input : in Natools.Smaz.Tools.String_Lists.List) return Natools.Smaz.Dictionary; -- Convert the input into a dictionary given the option in Handler overriding procedure Option (Handler : in out Callback; Id : in Options.Id; Argument : in String) is begin case Id is | > > > > > > | 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 | function To_Dictionary (Handler : in Callback'Class; Input : in Natools.Smaz.Tools.String_Lists.List) return Natools.Smaz.Dictionary; -- Convert the input into a dictionary given the option in Handler function Worst_Index (Dict : in Natools.Smaz.Dictionary; Counts : in Natools.Smaz.Tools.Dictionary_Counts) return Ada.Streams.Stream_Element; -- Remove the worstly-scored item from Dict overriding procedure Option (Handler : in out Callback; Id : in Options.Id; Argument : in String) is begin case Id is |
︙ | ︙ | |||
355 356 357 358 359 360 361 | Score : in out Ada.Streams.Stream_Element_Count; Counts : in out Natools.Smaz.Tools.Dictionary_Counts; Pending_Words : in out Natools.Smaz.Tools.String_Lists.List; Input_Texts : in Natools.Smaz.Tools.String_Lists.List; Job_Count : in Natural; Updated : out Boolean) is | > | > > > > > > > > > > > > | | | < > > > > > > | | > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > | 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 | Score : in out Ada.Streams.Stream_Element_Count; Counts : in out Natools.Smaz.Tools.Dictionary_Counts; Pending_Words : in out Natools.Smaz.Tools.String_Lists.List; Input_Texts : in Natools.Smaz.Tools.String_Lists.List; Job_Count : in Natural; Updated : out Boolean) is use type Ada.Streams.Stream_Element_Offset; New_Value : Ada.Strings.Unbounded.Unbounded_String; New_Position : Natools.Smaz.Tools.String_Lists.Cursor; Worst_Index : constant Ada.Streams.Stream_Element := Smaz.Worst_Index (Dict.Element, Counts); Worst_Value : constant String := Natools.Smaz.Dict_Entry (Dict.Element, Worst_Index); Worst_Count : constant Natools.Smaz.Tools.String_Count := Counts (Worst_Index); Base : constant Natools.Smaz.Dictionary := Natools.Smaz.Tools.Remove_Element (Dict.Element, Worst_Index); Old_Score : constant Ada.Streams.Stream_Element_Count := Score; begin Updated := False; for Position in Pending_Words.Iterate loop declare Word : constant String := Natools.Smaz.Tools.String_Lists.Element (Position); New_Dict : constant Natools.Smaz.Dictionary := Natools.Smaz.Tools.Append_String (Base, Word); New_Score : Ada.Streams.Stream_Element_Count; New_Counts : Natools.Smaz.Tools.Dictionary_Counts; begin Evaluate_Dictionary (Job_Count, New_Dict, Input_Texts, New_Score, New_Counts); if New_Score < Score then Dict := Holders.To_Holder (New_Dict); Score := New_Score; Counts := New_Counts; New_Value := Ada.Strings.Unbounded.To_Unbounded_String (Word); New_Position := Position; Updated := True; end if; end; end loop; if Updated then Pending_Words.Delete (New_Position); Pending_Words.Append (Worst_Value); Ada.Text_IO.Put_Line (Ada.Text_IO.Current_Error, "Removing" & Worst_Count'Img & "x " & Natools.String_Escapes.C_Escape_Hex (Worst_Value, True) & ", adding" & Counts (Dict.Element.Dict_Last)'Img & "x " & Natools.String_Escapes.C_Escape_Hex (Ada.Strings.Unbounded.To_String (New_Value), True) & ", size" & Score'Img & " (" & Ada.Streams.Stream_Element_Offset'Image (Score - Old_Score) & ')'); end if; end Optimization_Round; function Optimize_Dictionary (Base : in Natools.Smaz.Dictionary; Pending_Words : in Natools.Smaz.Tools.String_Lists.List; Input_Texts : in Natools.Smaz.Tools.String_Lists.List; |
︙ | ︙ | |||
704 705 706 707 708 709 710 711 712 713 714 715 716 717 | return Natools.Smaz.Tools.To_Dictionary (Natools.Smaz.Tools.Simple_Dictionary (Counter, 254), True); end if; end; end case; end To_Dictionary; Opt_Config : constant Getopt.Configuration := Getopt_Config; Handler : Callback; Input_List, Input_Data : Natools.Smaz.Tools.String_Lists.List; begin Process_Command_Line : begin | > > > > > > > > > > > > > > > > > > > > > > > | 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 | return Natools.Smaz.Tools.To_Dictionary (Natools.Smaz.Tools.Simple_Dictionary (Counter, 254), True); end if; end; end case; end To_Dictionary; function Worst_Index (Dict : in Natools.Smaz.Dictionary; Counts : in Natools.Smaz.Tools.Dictionary_Counts) return Ada.Streams.Stream_Element is Result : Ada.Streams.Stream_Element := 0; Worst_Score : Score_Value := Score_Encoded (Dict, Counts, 0); S : Score_Value; begin for I in 1 .. Dict.Dict_Last loop S := Score_Encoded (Dict, Counts, I); if S < Worst_Score then Result := I; Worst_Score := S; end if; end loop; return Result; end Worst_Index; Opt_Config : constant Getopt.Configuration := Getopt_Config; Handler : Callback; Input_List, Input_Data : Natools.Smaz.Tools.String_Lists.List; begin Process_Command_Line : begin |
︙ | ︙ |