Natools

Check-in [bc86bc41ee]
Login
Overview
Comment:tools/smaz: genericize Evaluate_Dictionary
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: bc86bc41eedf407f382301e89417a9b16e7b13f3
User & Date: nat on 2016-12-03 22:29:29
Other Links: manifest | tags
Context
2016-12-04
20:06
tools/smaz: genericize Optimize_Dictionary check-in: 5c617d9676 user: nat tags: trunk
2016-12-03
22:29
tools/smaz: genericize Evaluate_Dictionary check-in: bc86bc41ee user: nat tags: trunk
2016-12-02
21:12
tools/smaz: genericize Parallel_Evaluate_Dictionary check-in: 79a36ec957 user: nat tags: trunk
Changes

Modified tools/smaz.adb from [876acf9894] to [0fafdd0696].

121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143

   overriding procedure Argument
     (Handler  : in out Callback;
      Argument : in String)
     is null;


   procedure Evaluate_Dictionary
     (Job_Count : in Natural;
      Dict : in Natools.Smaz_256.Dictionary;
      Corpus : in Natools.Smaz_Tools.String_Lists.List;
      Compressed_Size : out Ada.Streams.Stream_Element_Count;
      Counts : out Tools_256.Dictionary_Counts);
      --  Dispatch to parallel or non-parallel version of Evaluate_Dictionary
      --  depending on Job_Count.

   function Getopt_Config return Getopt.Configuration;
      --  Build the configuration object

   procedure Optimization_Round
     (Dict : in out Holders.Holder;
      Score : in out Ada.Streams.Stream_Element_Count;
      Counts : in out Tools_256.Dictionary_Counts;







<
<
<
<
<
<
<
<
<







121
122
123
124
125
126
127









128
129
130
131
132
133
134

   overriding procedure Argument
     (Handler  : in out Callback;
      Argument : in String)
     is null;











   function Getopt_Config return Getopt.Configuration;
      --  Build the configuration object

   procedure Optimization_Round
     (Dict : in out Holders.Holder;
      Score : in out Ada.Streams.Stream_Element_Count;
      Counts : in out Tools_256.Dictionary_Counts;
182
183
184
185
186
187
188




189
190
191
192
193
194
195
196
197
198
199






200
201
202
203
204
205


206
207









208
209
210
211
212
213
214
215
216
217
218
219

220





















221
222
223
224
225
226
227

   function To_Dictionary
     (Handler : in Callback'Class;
      Input : in Natools.Smaz_Tools.String_Lists.List)
     return Natools.Smaz_256.Dictionary;
      --  Convert the input into a dictionary given the option in Handler






   generic
      type Dictionary (<>) is private;
      type Dictionary_Entry is (<>);
      type String_Count is range <>;

      type Dictionary_Counts is array (Dictionary_Entry) of String_Count;

      with package String_Lists
        is new Ada.Containers.Indefinite_Doubly_Linked_Lists (String);







      with procedure Evaluate_Dictionary_Partial
        (Dict : in Dictionary;
         Corpus_Entry : in String;
         Compressed_Size : in out Ada.Streams.Stream_Element_Count;
         Counts : in out Dictionary_Counts);



   package Dictionary_Subprograms is










      procedure Parallel_Evaluate_Dictionary
        (Job_Count : in Positive;
         Dict : in Dictionary;
         Corpus : in String_Lists.List;
         Compressed_Size : out Ada.Streams.Stream_Element_Count;
         Counts : out Dictionary_Counts);
         --  Return the same results as Natools.Smaz.Tools.Evaluate_Dictionary,
         --  but hopefully more quickly, using Job_Count tasks.

   end Dictionary_Subprograms;



   package body Dictionary_Subprograms is






















      procedure Parallel_Evaluate_Dictionary
        (Job_Count : in Positive;
         Dict : in Dictionary;
         Corpus : in String_Lists.List;
         Compressed_Size : out Ada.Streams.Stream_Element_Count;
         Counts : out Dictionary_Counts)







>
>
>
>











>
>
>
>
>
>






>
>


>
>
>
>
>
>
>
>
>












>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261

   function To_Dictionary
     (Handler : in Callback'Class;
      Input : in Natools.Smaz_Tools.String_Lists.List)
     return Natools.Smaz_256.Dictionary;
      --  Convert the input into a dictionary given the option in Handler

   procedure Use_Dictionary (Dict : in out Natools.Smaz_256.Dictionary);
      --  Update Dictionary.Hash so that it can be actually used



   generic
      type Dictionary (<>) is private;
      type Dictionary_Entry is (<>);
      type String_Count is range <>;

      type Dictionary_Counts is array (Dictionary_Entry) of String_Count;

      with package String_Lists
        is new Ada.Containers.Indefinite_Doubly_Linked_Lists (String);

      with procedure Evaluate_Dictionary
        (Dict : in Dictionary;
         Corpus : in String_Lists.List;
         Compressed_Size : out Ada.Streams.Stream_Element_Count;
         Counts : out Dictionary_Counts);

      with procedure Evaluate_Dictionary_Partial
        (Dict : in Dictionary;
         Corpus_Entry : in String;
         Compressed_Size : in out Ada.Streams.Stream_Element_Count;
         Counts : in out Dictionary_Counts);

      with procedure Use_Dictionary (Dict : in out Dictionary) is <>;

   package Dictionary_Subprograms is

      procedure Evaluate_Dictionary
        (Job_Count : in Natural;
         Dict : in Dictionary;
         Corpus : in String_Lists.List;
         Compressed_Size : out Ada.Streams.Stream_Element_Count;
         Counts : out Dictionary_Counts);
         --  Dispatch to parallel or non-parallel version of
         --  Evaluate_Dictionary depending on Job_Count.

      procedure Parallel_Evaluate_Dictionary
        (Job_Count : in Positive;
         Dict : in Dictionary;
         Corpus : in String_Lists.List;
         Compressed_Size : out Ada.Streams.Stream_Element_Count;
         Counts : out Dictionary_Counts);
         --  Return the same results as Natools.Smaz.Tools.Evaluate_Dictionary,
         --  but hopefully more quickly, using Job_Count tasks.

   end Dictionary_Subprograms;



   package body Dictionary_Subprograms is

      procedure Evaluate_Dictionary
        (Job_Count : in Natural;
         Dict : in Dictionary;
         Corpus : in String_Lists.List;
         Compressed_Size : out Ada.Streams.Stream_Element_Count;
         Counts : out Dictionary_Counts)
      is
         Actual_Dict : Dictionary := Dict;
      begin
         Use_Dictionary (Actual_Dict);

         if Job_Count > 0 then
            Parallel_Evaluate_Dictionary (Job_Count,
               Actual_Dict, Corpus, Compressed_Size, Counts);
         else
            Evaluate_Dictionary
              (Actual_Dict, Corpus, Compressed_Size, Counts);
         end if;
      end Evaluate_Dictionary;


      procedure Parallel_Evaluate_Dictionary
        (Job_Count : in Positive;
         Dict : in Dictionary;
         Corpus : in String_Lists.List;
         Compressed_Size : out Ada.Streams.Stream_Element_Count;
         Counts : out Dictionary_Counts)
304
305
306
307
308
309
310

311
312
313
314
315
316
317

318

319
320
321
322
323
324
325
         Compressed_Size := 0;
         Counts := (others => 0);
         Parallel_Run (Cursor, Job_Count);
      end Parallel_Evaluate_Dictionary;

   end Dictionary_Subprograms;



   package Dict_256 is new Dictionary_Subprograms
     (Dictionary => Natools.Smaz_256.Dictionary,
      Dictionary_Entry => Ada.Streams.Stream_Element,
      String_Count => Natools.Smaz_Tools.String_Count,
      Dictionary_Counts => Tools_256.Dictionary_Counts,
      String_Lists => Natools.Smaz_Tools.String_Lists,

      Evaluate_Dictionary_Partial => Tools_256.Evaluate_Dictionary_Partial);



   overriding procedure Option
     (Handler  : in out Callback;
      Id       : in Options.Id;
      Argument : in String) is
   begin







>







>

>







338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
         Compressed_Size := 0;
         Counts := (others => 0);
         Parallel_Run (Cursor, Job_Count);
      end Parallel_Evaluate_Dictionary;

   end Dictionary_Subprograms;



   package Dict_256 is new Dictionary_Subprograms
     (Dictionary => Natools.Smaz_256.Dictionary,
      Dictionary_Entry => Ada.Streams.Stream_Element,
      String_Count => Natools.Smaz_Tools.String_Count,
      Dictionary_Counts => Tools_256.Dictionary_Counts,
      String_Lists => Natools.Smaz_Tools.String_Lists,
      Evaluate_Dictionary => Tools_256.Evaluate_Dictionary,
      Evaluate_Dictionary_Partial => Tools_256.Evaluate_Dictionary_Partial);



   overriding procedure Option
     (Handler  : in out Callback;
      Id       : in Options.Id;
      Argument : in String) is
   begin
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
            Handler.Vlen_Verbatim := True;

         when Options.No_Vlen_Verbatim =>
            Handler.Vlen_Verbatim := False;
      end case;
   end Option;


   procedure Evaluate_Dictionary
     (Job_Count : in Natural;
      Dict : in Natools.Smaz_256.Dictionary;
      Corpus : in Natools.Smaz_Tools.String_Lists.List;
      Compressed_Size : out Ada.Streams.Stream_Element_Count;
      Counts : out Tools_256.Dictionary_Counts)
   is
      Actual_Dict : Natools.Smaz_256.Dictionary := Dict;
   begin
      Natools.Smaz_Tools.Set_Dictionary_For_Trie_Search
        (Tools_256.To_String_List (Actual_Dict));
      Actual_Dict.Hash := Natools.Smaz_Tools.Trie_Search'Access;

      for I in Actual_Dict.Offsets'Range loop
         if Natools.Smaz_Tools.Trie_Search (Natools.Smaz_256.Dict_Entry
           (Actual_Dict, I)) /= Natural (I)
         then
            Ada.Text_IO.Put_Line
              (Ada.Text_IO.Current_Error,
               "Fail at" & Ada.Streams.Stream_Element'Image (I)
               & " -> " & Natools.String_Escapes.C_Escape_Hex
                  (Natools.Smaz_256.Dict_Entry (Actual_Dict, I), True)
               & " ->" & Natural'Image (Natools.Smaz_Tools.Trie_Search
                  (Natools.Smaz_256.Dict_Entry (Actual_Dict, I))));
         end if;
      end loop;

      if Job_Count > 0 then
         Dict_256.Parallel_Evaluate_Dictionary (Job_Count,
            Actual_Dict, Corpus, Compressed_Size, Counts);
      else
         Tools_256.Evaluate_Dictionary
           (Actual_Dict, Corpus, Compressed_Size, Counts);
      end if;
   end Evaluate_Dictionary;


   function Getopt_Config return Getopt.Configuration is
      use Getopt;
      use Options;
      R : Getopt.Configuration;
   begin
      R.Add_Option ("ada-dict",      'A', Optional_Argument, Output_Ada_Dict);







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







446
447
448
449
450
451
452





































453
454
455
456
457
458
459
            Handler.Vlen_Verbatim := True;

         when Options.No_Vlen_Verbatim =>
            Handler.Vlen_Verbatim := False;
      end case;
   end Option;







































   function Getopt_Config return Getopt.Configuration is
      use Getopt;
      use Options;
      R : Getopt.Configuration;
   begin
      R.Add_Option ("ada-dict",      'A', Optional_Argument, Output_Ada_Dict);
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
            Word : constant String
              := Natools.Smaz_Tools.String_Lists.Element (Position);
            New_Dict : constant Natools.Smaz_256.Dictionary
              := Tools_256.Append_String (Base, Word);
            New_Score : Ada.Streams.Stream_Element_Count;
            New_Counts : Tools_256.Dictionary_Counts;
         begin
            Evaluate_Dictionary
              (Job_Count, New_Dict, Input_Texts, New_Score, New_Counts);

            if New_Score < Score then
               Dict := Holders.To_Holder (New_Dict);
               Score := New_Score;
               Counts := New_Counts;
               New_Value := Ada.Strings.Unbounded.To_Unbounded_String (Word);







|







516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
            Word : constant String
              := Natools.Smaz_Tools.String_Lists.Element (Position);
            New_Dict : constant Natools.Smaz_256.Dictionary
              := Tools_256.Append_String (Base, Word);
            New_Score : Ada.Streams.Stream_Element_Count;
            New_Counts : Tools_256.Dictionary_Counts;
         begin
            Dict_256.Evaluate_Dictionary
              (Job_Count, New_Dict, Input_Texts, New_Score, New_Counts);

            if New_Score < Score then
               Dict := Holders.To_Holder (New_Dict);
               Score := New_Score;
               Counts := New_Counts;
               New_Value := Ada.Strings.Unbounded.To_Unbounded_String (Word);
566
567
568
569
570
571
572

573
574
575
576
577
578
579
580
   is
      Holder : Holders.Holder := Holders.To_Holder (Base);
      Pending : Natools.Smaz_Tools.String_Lists.List := Pending_Words;
      Score : Ada.Streams.Stream_Element_Count;
      Counts : Tools_256.Dictionary_Counts;
      Running : Boolean := True;
   begin

      Evaluate_Dictionary (Job_Count, Base, Input_Texts, Score, Counts);

      while Running loop
         Optimization_Round
           (Holder,
            Score,
            Counts,
            Pending,







>
|







566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
   is
      Holder : Holders.Holder := Holders.To_Holder (Base);
      Pending : Natools.Smaz_Tools.String_Lists.List := Pending_Words;
      Score : Ada.Streams.Stream_Element_Count;
      Counts : Tools_256.Dictionary_Counts;
      Running : Boolean := True;
   begin
      Dict_256.Evaluate_Dictionary
        (Job_Count, Base, Input_Texts, Score, Counts);

      while Running loop
         Optimization_Round
           (Holder,
            Score,
            Counts,
            Pending,
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
            end if;

         when Actions.Evaluate =>
            declare
               Total_Size : Ada.Streams.Stream_Element_Count;
               Counts : Tools_256.Dictionary_Counts;
            begin
               Evaluate_Dictionary (Handler.Job_Count,
                  Dictionary, Data_List, Total_Size, Counts);

               if Handler.Sx_Output then
                  Sx_Output.Open_List;
                  Sx_Output.Append_String (Ada.Strings.Fixed.Trim
                    (Ada.Streams.Stream_Element_Count'Image (Total_Size),
                     Ada.Strings.Both));







|







912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
            end if;

         when Actions.Evaluate =>
            declare
               Total_Size : Ada.Streams.Stream_Element_Count;
               Counts : Tools_256.Dictionary_Counts;
            begin
               Dict_256.Evaluate_Dictionary (Handler.Job_Count,
                  Dictionary, Data_List, Total_Size, Counts);

               if Handler.Sx_Output then
                  Sx_Output.Open_List;
                  Sx_Output.Append_String (Ada.Strings.Fixed.Trim
                    (Ada.Streams.Stream_Element_Count'Image (Total_Size),
                     Ada.Strings.Both));
1114
1115
1116
1117
1118
1119
1120






















1121
1122
1123
1124
1125
1126
1127
                       (Counter, Handler.Dict_Size, Handler.Score_Method),
                     Handler.Vlen_Verbatim);
               end if;
            end;
      end case;
   end To_Dictionary;
























   Opt_Config : constant Getopt.Configuration := Getopt_Config;
   Handler : Callback;
   Input_List, Input_Data : Natools.Smaz_Tools.String_Lists.List;
begin
   Process_Command_Line :
   begin







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
                       (Counter, Handler.Dict_Size, Handler.Score_Method),
                     Handler.Vlen_Verbatim);
               end if;
            end;
      end case;
   end To_Dictionary;


   procedure Use_Dictionary (Dict : in out Natools.Smaz_256.Dictionary) is
   begin
      Natools.Smaz_Tools.Set_Dictionary_For_Trie_Search
        (Tools_256.To_String_List (Dict));
      Dict.Hash := Natools.Smaz_Tools.Trie_Search'Access;

      for I in Dict.Offsets'Range loop
         if Natools.Smaz_Tools.Trie_Search (Natools.Smaz_256.Dict_Entry
           (Dict, I)) /= Natural (I)
         then
            Ada.Text_IO.Put_Line
              (Ada.Text_IO.Current_Error,
               "Fail at" & Ada.Streams.Stream_Element'Image (I)
               & " -> " & Natools.String_Escapes.C_Escape_Hex
                  (Natools.Smaz_256.Dict_Entry (Dict, I), True)
               & " ->" & Natural'Image (Natools.Smaz_Tools.Trie_Search
                  (Natools.Smaz_256.Dict_Entry (Dict, I))));
         end if;
      end loop;
   end Use_Dictionary;


   Opt_Config : constant Getopt.Configuration := Getopt_Config;
   Handler : Callback;
   Input_List, Input_Data : Natools.Smaz_Tools.String_Lists.List;
begin
   Process_Command_Line :
   begin