Natools

Check-in [d418194c20]
Login
Overview
Comment:tools/smaz: refactor dictionary evaluation in a standalone subprogram
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: d418194c204e967a348645b67b8e1f055bdb3f2c
User & Date: nat on 2016-10-26 20:58:07
Other Links: manifest | tags
Context
2016-10-27
21:58
tools/smaz: add a command-line option for optimized dictionary build check-in: cbe3489d15 user: nat tags: trunk
2016-10-26
20:58
tools/smaz: refactor dictionary evaluation in a standalone subprogram check-in: d418194c20 user: nat tags: trunk
2016-10-25
19:55
smaz-tools: new primitive to build a dictionary with pending words check-in: 214d918405 user: nat tags: trunk
Changes

Modified tools/smaz.adb from [6c1366e4ae] to [ce2e2cd745].

98
99
100
101
102
103
104









105
106
107
108
109
110
111

   overriding procedure Argument
     (Handler  : in out Callback;
      Argument : in String)
     is null;











   function Getopt_Config return Getopt.Configuration;
      --  Build the configuration object

   procedure Parallel_Evaluate_Dictionary
     (Job_Count : in Positive;
      Dict : in Natools.Smaz.Dictionary;
      Corpus : in Natools.Smaz.Tools.String_Lists.List;







>
>
>
>
>
>
>
>
>







98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

   overriding procedure Argument
     (Handler  : in out Callback;
      Argument : in String)
     is null;


   procedure Evaluate_Dictionary
     (Job_Count : in Natural;
      Dict : in Natools.Smaz.Dictionary;
      Corpus : in Natools.Smaz.Tools.String_Lists.List;
      Compressed_Size : out Ada.Streams.Stream_Element_Count;
      Counts : out Natools.Smaz.Tools.Dictionary_Counts);
      --  Dispatch to parallel or non-parallel version of Evaluate_Dictionary
      --  depending on Job_Count.

   function Getopt_Config return Getopt.Configuration;
      --  Build the configuration object

   procedure Parallel_Evaluate_Dictionary
     (Job_Count : in Positive;
      Dict : in Natools.Smaz.Dictionary;
      Corpus : in Natools.Smaz.Tools.String_Lists.List;
209
210
211
212
213
214
215




































216
217
218
219
220
221
222

         when Options.Filter_Threshold =>
            Handler.Filter_Threshold
              := Natools.Smaz.Tools.String_Count'Value (Argument);
      end case;
   end Option;






































   function Getopt_Config return Getopt.Configuration is
      use Getopt;
      use Options;
      R : Getopt.Configuration;
   begin
      R.Add_Option ("ada-dict",      'A', Optional_Argument, Output_Ada_Dict);







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267

         when Options.Filter_Threshold =>
            Handler.Filter_Threshold
              := Natools.Smaz.Tools.String_Count'Value (Argument);
      end case;
   end Option;


   procedure Evaluate_Dictionary
     (Job_Count : in Natural;
      Dict : in Natools.Smaz.Dictionary;
      Corpus : in Natools.Smaz.Tools.String_Lists.List;
      Compressed_Size : out Ada.Streams.Stream_Element_Count;
      Counts : out Natools.Smaz.Tools.Dictionary_Counts)
   is
      Actual_Dict : Natools.Smaz.Dictionary := Dict;
   begin
      Natools.Smaz.Tools.Set_Dictionary_For_Trie_Search (Actual_Dict);
      Actual_Dict.Hash := Natools.Smaz.Tools.Trie_Search'Access;

      for I in Actual_Dict.Offsets'Range loop
         if Natools.Smaz.Tools.Trie_Search (Natools.Smaz.Dict_Entry
           (Actual_Dict, I)) /= Natural (I)
         then
            Ada.Text_IO.Put_Line
              (Ada.Text_IO.Current_Error,
               "Fail at" & Ada.Streams.Stream_Element'Image (I)
               & " -> " & Natools.String_Escapes.C_Escape_Hex
                  (Natools.Smaz.Dict_Entry (Actual_Dict, I), True)
               & " ->" & Natural'Image (Natools.Smaz.Tools.Trie_Search
                  (Natools.Smaz.Dict_Entry (Actual_Dict, I))));
         end if;
      end loop;

      if Job_Count > 0 then
         Parallel_Evaluate_Dictionary (Job_Count,
            Actual_Dict, Corpus, Compressed_Size, Counts);
      else
         Natools.Smaz.Tools.Evaluate_Dictionary
           (Actual_Dict, Corpus, Compressed_Size, Counts);
      end if;
   end Evaluate_Dictionary;


   function Getopt_Config return Getopt.Configuration is
      use Getopt;
      use Options;
      R : Getopt.Configuration;
   begin
      R.Add_Option ("ada-dict",      'A', Optional_Argument, Output_Ada_Dict);
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
                  end loop;

                  Print_Line (Original_Total, Output_Total, Base64_Total);
               end;
            end if;

         when Actions.Evaluate =>
            Natools.Smaz.Tools.Set_Dictionary_For_Trie_Search (Dictionary);
            Dictionary.Hash := Natools.Smaz.Tools.Trie_Search'Access;

            declare
               Total_Size : Ada.Streams.Stream_Element_Count;
               Counts : Natools.Smaz.Tools.Dictionary_Counts;
            begin
               if Handler.Job_Count > 0 then
                  Parallel_Evaluate_Dictionary (Handler.Job_Count,
                     Dictionary, Input_Data, Total_Size, Counts);
               else
                  Natools.Smaz.Tools.Evaluate_Dictionary
                    (Dictionary, Input_Data, Total_Size, Counts);
               end if;

               if Handler.Sx_Output then
                  Sx_Output.Open_List;
                  Sx_Output.Append_String (Ada.Strings.Fixed.Trim
                    (Ada.Streams.Stream_Element_Count'Image (Total_Size),
                     Ada.Strings.Both));








<
<
<




<
|
|
<
<
<
<







741
742
743
744
745
746
747



748
749
750
751

752
753




754
755
756
757
758
759
760
                  end loop;

                  Print_Line (Original_Total, Output_Total, Base64_Total);
               end;
            end if;

         when Actions.Evaluate =>



            declare
               Total_Size : Ada.Streams.Stream_Element_Count;
               Counts : Natools.Smaz.Tools.Dictionary_Counts;
            begin

               Evaluate_Dictionary (Handler.Job_Count,
                  Dictionary, Input_Data, Total_Size, Counts);





               if Handler.Sx_Output then
                  Sx_Output.Open_List;
                  Sx_Output.Append_String (Ada.Strings.Fixed.Trim
                    (Ada.Streams.Stream_Element_Count'Image (Total_Size),
                     Ada.Strings.Both));