Natools

Check-in [bc5e6e89ab]
Login
Overview
Comment:tools/smaz: new command line option to set built dictionary size
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: bc5e6e89ab9584b1c428459bc47925e54acd9024
User & Date: nat on 2016-11-05 20:02:52
Other Links: manifest | tags
Context
2016-11-06
20:42
tools/smaz: add options to select variable-length verbatim codes check-in: 2365190245 user: nat tags: trunk
2016-11-05
20:02
tools/smaz: new command line option to set built dictionary size check-in: bc5e6e89ab user: nat tags: trunk
2016-11-04
22:42
tools/smaz: fix the letter of latest command line option check-in: 03959c30d5 user: nat tags: trunk
Changes

Modified tools/smaz.adb from [ddd4d7f220] to [c48a6ca8b4].

71
72
73
74
75
76
77

78
79
80
81
82
83
84
         Filter_Threshold,
         Output_Hash,
         Job_Count,
         Help,
         Sx_Dict_Output,
         Min_Sub_Size,
         Max_Sub_Size,

         Max_Pending,
         Stat_Output,
         No_Stat_Output,
         Text_List_Input,
         Fast_Text_Input,
         Max_Word_Size,
         Sx_Output,







>







71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
         Filter_Threshold,
         Output_Hash,
         Job_Count,
         Help,
         Sx_Dict_Output,
         Min_Sub_Size,
         Max_Sub_Size,
         Dict_Size,
         Max_Pending,
         Stat_Output,
         No_Stat_Output,
         Text_List_Input,
         Fast_Text_Input,
         Max_Word_Size,
         Sx_Output,
93
94
95
96
97
98
99

100
101
102
103
104
105
106
      Need_Dictionary : Boolean := False;
      Stat_Output : Boolean := False;
      Sx_Output : Boolean := False;
      Sx_Dict_Output : Boolean := False;
      Min_Sub_Size : Positive := 1;
      Max_Sub_Size : Positive := 3;
      Max_Word_Size : Positive := 10;

      Max_Pending : Ada.Containers.Count_Type
        := Ada.Containers.Count_Type'Last;
      Job_Count : Natural := 0;
      Filter_Threshold : Natools.Smaz.Tools.String_Count := 0;
      Score_Method : Methods.Enum := Methods.Encoded;
      Action : Actions.Enum := Actions.Nothing;
      Ada_Dictionary : Ada.Strings.Unbounded.Unbounded_String;







>







94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
      Need_Dictionary : Boolean := False;
      Stat_Output : Boolean := False;
      Sx_Output : Boolean := False;
      Sx_Dict_Output : Boolean := False;
      Min_Sub_Size : Positive := 1;
      Max_Sub_Size : Positive := 3;
      Max_Word_Size : Positive := 10;
      Dict_Size : Positive := 254;
      Max_Pending : Ada.Containers.Count_Type
        := Ada.Containers.Count_Type'Last;
      Job_Count : Natural := 0;
      Filter_Threshold : Natools.Smaz.Tools.String_Count := 0;
      Score_Method : Methods.Enum := Methods.Encoded;
      Action : Actions.Enum := Actions.Nothing;
      Ada_Dictionary : Ada.Strings.Unbounded.Unbounded_String;
313
314
315
316
317
318
319



320
321
322
323
324
325
326
              := Natools.Smaz.Tools.String_Count'Value (Argument);

         when Options.Score_Method =>
            Handler.Score_Method := Methods.Enum'Value (Argument);

         when Options.Max_Pending =>
            Handler.Max_Pending := Ada.Containers.Count_Type'Value (Argument);



      end case;
   end Option;


   procedure Evaluate_Dictionary
     (Job_Count : in Natural;
      Dict : in Natools.Smaz.Dictionary;







>
>
>







315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
              := Natools.Smaz.Tools.String_Count'Value (Argument);

         when Options.Score_Method =>
            Handler.Score_Method := Methods.Enum'Value (Argument);

         when Options.Max_Pending =>
            Handler.Max_Pending := Ada.Containers.Count_Type'Value (Argument);

         when Options.Dict_Size =>
            Handler.Dict_Size := Positive'Value (Argument);
      end case;
   end Option;


   procedure Evaluate_Dictionary
     (Job_Count : in Natural;
      Dict : in Natools.Smaz.Dictionary;
370
371
372
373
374
375
376

377
378
379
380
381
382
383
      R.Add_Option ("filter",        'F', Required_Argument, Filter_Threshold);
      R.Add_Option ("help",          'h', No_Argument,       Help);
      R.Add_Option ("hash-pkg",      'H', Required_Argument, Output_Hash);
      R.Add_Option ("jobs",          'j', Required_Argument, Job_Count);
      R.Add_Option ("sx-dict",       'L', No_Argument,       Sx_Dict_Output);
      R.Add_Option ("min-substring", 'm', Required_Argument, Min_Sub_Size);
      R.Add_Option ("max-substring", 'M', Required_Argument, Max_Sub_Size);

      R.Add_Option ("max-pending",   'N', Required_Argument, Max_Pending);
      R.Add_Option ("stats",         's', No_Argument,       Stat_Output);
      R.Add_Option ("no-stats",      'S', No_Argument,       No_Stat_Output);
      R.Add_Option ("text-list",     't', No_Argument,       Text_List_Input);
      R.Add_Option ("fast-text-list", 'T', No_Argument,       Fast_Text_Input);
      R.Add_Option ("max-word-len",  'W', Required_Argument, Max_Word_Size);
      R.Add_Option ("s-expr",        'x', No_Argument,       Sx_Output);







>







375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
      R.Add_Option ("filter",        'F', Required_Argument, Filter_Threshold);
      R.Add_Option ("help",          'h', No_Argument,       Help);
      R.Add_Option ("hash-pkg",      'H', Required_Argument, Output_Hash);
      R.Add_Option ("jobs",          'j', Required_Argument, Job_Count);
      R.Add_Option ("sx-dict",       'L', No_Argument,       Sx_Dict_Output);
      R.Add_Option ("min-substring", 'm', Required_Argument, Min_Sub_Size);
      R.Add_Option ("max-substring", 'M', Required_Argument, Max_Sub_Size);
      R.Add_Option ("dict-size",     'n', Required_Argument, Dict_Size);
      R.Add_Option ("max-pending",   'N', Required_Argument, Max_Pending);
      R.Add_Option ("stats",         's', No_Argument,       Stat_Output);
      R.Add_Option ("no-stats",      'S', No_Argument,       No_Stat_Output);
      R.Add_Option ("text-list",     't', No_Argument,       Text_List_Input);
      R.Add_Option ("fast-text-list", 'T', No_Argument,       Fast_Text_Input);
      R.Add_Option ("max-word-len",  'W', Required_Argument, Max_Word_Size);
      R.Add_Option ("s-expr",        'x', No_Argument,       Sx_Output);
749
750
751
752
753
754
755





756
757
758
759
760
761
762
                 & " during optimization");

            when Options.Max_Pending =>
               Put_Line (Output, " <count>");
               Put_Line (Output, Indent & Indent
                 & "Maximum size of candidate list"
                 & " when building a dictionary");





         end case;
      end loop;
   end Print_Help;


   function To_Dictionary
     (Handler : in Callback'Class;







>
>
>
>
>







755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
                 & " during optimization");

            when Options.Max_Pending =>
               Put_Line (Output, " <count>");
               Put_Line (Output, Indent & Indent
                 & "Maximum size of candidate list"
                 & " when building a dictionary");

            when Options.Dict_Size =>
               Put_Line (Output, " <count>");
               Put_Line (Output, Indent & Indent
                 & "Number of words in the dictionary to build");
         end case;
      end loop;
   end Print_Help;


   function To_Dictionary
     (Handler : in Callback'Class;
791
792
793
794
795
796
797




798
799
800
801
802
803
804
805
               end if;

               if Handler.Dict_Source = Dict_Sources.Text_List then
                  declare
                     Selected, Pending : Natools.Smaz.Tools.String_Lists.List;
                  begin
                     Natools.Smaz.Tools.Simple_Dictionary_And_Pending




                       (Counter, 254, Selected, Pending, Handler.Max_Pending);

                     return Optimize_Dictionary
                       (Natools.Smaz.Tools.To_Dictionary (Selected, True),
                        Pending,
                        Input,
                        Handler.Job_Count,
                        Handler.Score_Method);







>
>
>
>
|







802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
               end if;

               if Handler.Dict_Source = Dict_Sources.Text_List then
                  declare
                     Selected, Pending : Natools.Smaz.Tools.String_Lists.List;
                  begin
                     Natools.Smaz.Tools.Simple_Dictionary_And_Pending
                       (Counter,
                        Handler.Dict_Size,
                        Selected,
                        Pending,
                        Handler.Max_Pending);

                     return Optimize_Dictionary
                       (Natools.Smaz.Tools.To_Dictionary (Selected, True),
                        Pending,
                        Input,
                        Handler.Job_Count,
                        Handler.Score_Method);