Natools

Check-in [b2dbec1810]
Login
Overview
Comment:tools/smaz: refactor word count construction
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: b2dbec1810052675e57598e59cd3a5d0b125d1b0
User & Date: nat on 2017-05-13 20:47:16
Other Links: manifest | tags
Context
2017-05-14
20:16
tools/smaz: implement forced words for unoptimized dictionary generation check-in: bb325e8d12 user: nat tags: trunk
2017-05-13
20:47
tools/smaz: refactor word count construction check-in: b2dbec1810 user: nat tags: trunk
2017-05-12
22:26
tools/smaz: move Adjust_Dictionary call into To_Dictionary function check-in: 3b85da8290 user: nat tags: trunk
Changes

Modified tools/smaz.adb from [28df8aef20] to [d59b535a0b].

354
355
356
357
358
359
360






361
362
363
364
365
366
367
         Code : in Dictionary_Entry)
        return Natools.S_Expressions.Atom;
         --  S-expression image of Code

      function Is_In_Dict (Dict : Dictionary; Word : String) return Boolean;
         --  Return whether Word is in Dict (inefficient)







      procedure Optimization_Round
        (Dict : in out Holders.Holder;
         Score : in out Ada.Streams.Stream_Element_Count;
         Counts : in out Dictionary_Counts;
         Pending_Words : in out String_Lists.List;
         Input_Texts : in String_Lists.List;
         Job_Count : in Natural;







>
>
>
>
>
>







354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
         Code : in Dictionary_Entry)
        return Natools.S_Expressions.Atom;
         --  S-expression image of Code

      function Is_In_Dict (Dict : Dictionary; Word : String) return Boolean;
         --  Return whether Word is in Dict (inefficient)

      function Make_Word_Counter
        (Handler : in Callback'Class;
         Input : in String_Lists.List)
        return Word_Counter;
         --  Make a word counter from an input word list

      procedure Optimization_Round
        (Dict : in out Holders.Holder;
         Score : in out Ada.Streams.Stream_Element_Count;
         Counts : in out Dictionary_Counts;
         Pending_Words : in out String_Lists.List;
         Input_Texts : in String_Lists.List;
         Job_Count : in Natural;
510
511
512
513
514
515
516




























517
518
519
520
521
522
523
               return True;
            end if;
         end loop;

         return False;
      end Is_In_Dict;






























      procedure Optimization_Round
        (Dict : in out Holders.Holder;
         Score : in out Ada.Streams.Stream_Element_Count;
         Counts : in out Dictionary_Counts;
         Pending_Words : in out String_Lists.List;
         Input_Texts : in String_Lists.List;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
               return True;
            end if;
         end loop;

         return False;
      end Is_In_Dict;


      function Make_Word_Counter
        (Handler : in Callback'Class;
         Input : in String_Lists.List)
        return Word_Counter
      is
         use type Natools.Smaz_Tools.String_Count;
         Counter : Word_Counter;
      begin
         for S of Input loop
            Add_Substrings
              (Counter, S,
               Handler.Min_Sub_Size, Handler.Max_Sub_Size);

            if Handler.Max_Word_Size > Handler.Max_Sub_Size then
               Add_Words
                 (Counter, S,
                  Handler.Max_Sub_Size + 1, Handler.Max_Word_Size);
            end if;
         end loop;

         if Handler.Filter_Threshold > 0 then
            Filter_By_Count (Counter, String_Count (Handler.Filter_Threshold));
         end if;

         return Counter;
      end Make_Word_Counter;


      procedure Optimization_Round
        (Dict : in out Holders.Holder;
         Score : in out Ada.Streams.Stream_Element_Count;
         Counts : in out Dictionary_Counts;
         Pending_Words : in out String_Lists.List;
         Input_Texts : in String_Lists.List;
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098

1099
1100



1101
1102
1103
1104
1105
1106
1107
1108
1109
1110


      function To_Dictionary
        (Handler : in Callback'Class;
         Input : in String_Lists.List;
         Data_List : in String_Lists.List;
         Method : in Methods)
        return Dictionary
      is
         use type Natools.Smaz_Tools.String_Count;
         use type Dict_Sources.Enum;
      begin
         case Handler.Dict_Source is
            when Dict_Sources.S_Expression =>
               return Adjust_Dictionary
                 (Handler,
                  To_Dictionary (Input, Handler.Vlen_Verbatim),
                  Data_List,
                  Method);

            when Dict_Sources.Text_List | Dict_Sources.Unoptimized_Text_List =>
               declare
                  Counter : Word_Counter;
               begin
                  for S of Input loop
                     Add_Substrings
                       (Counter, S,
                        Handler.Min_Sub_Size, Handler.Max_Sub_Size);

                     if Handler.Max_Word_Size > Handler.Max_Sub_Size then
                        Add_Words
                          (Counter, S,
                           Handler.Max_Sub_Size + 1, Handler.Max_Word_Size);
                     end if;
                  end loop;

                  if Handler.Filter_Threshold > 0 then
                     Filter_By_Count
                       (Counter, String_Count (Handler.Filter_Threshold));
                  end if;

                  if Handler.Dict_Source = Dict_Sources.Text_List then
                     declare
                        Selected, Pending : String_Lists.List;
                     begin
                        Simple_Dictionary_And_Pending
                          (Counter,
                           Handler.Dict_Size,
                           Selected,
                           Pending,
                           Method,
                           Handler.Max_Pending);

                        return Optimize_Dictionary
                          (To_Dictionary (Selected, Handler.Vlen_Verbatim),
                           Pending,
                           Input,
                           Handler.Job_Count,
                           Method);
                     end;
                  else

                     return To_Dictionary
                       (Simple_Dictionary (Counter, Handler.Dict_Size, Method),



                        Handler.Vlen_Verbatim);
                  end if;
               end;
         end case;
      end To_Dictionary;

   end Dictionary_Subprograms;










|
<
<
<









|

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
|
|
|
|
|
|
|
|

|
|
|
|
|
|
|
|
>
|
|
>
>
>
|
<
<







1072
1073
1074
1075
1076
1077
1078
1079



1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090





















1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115


1116
1117
1118
1119
1120
1121
1122


      function To_Dictionary
        (Handler : in Callback'Class;
         Input : in String_Lists.List;
         Data_List : in String_Lists.List;
         Method : in Methods)
        return Dictionary is



      begin
         case Handler.Dict_Source is
            when Dict_Sources.S_Expression =>
               return Adjust_Dictionary
                 (Handler,
                  To_Dictionary (Input, Handler.Vlen_Verbatim),
                  Data_List,
                  Method);

            when Dict_Sources.Text_List =>
               declare





















                  Selected, Pending : String_Lists.List;
               begin
                  Simple_Dictionary_And_Pending
                    (Make_Word_Counter (Handler, Input),
                     Handler.Dict_Size,
                     Selected,
                     Pending,
                     Method,
                     Handler.Max_Pending);

                  return Optimize_Dictionary
                    (To_Dictionary (Selected, Handler.Vlen_Verbatim),
                     Pending,
                     Input,
                     Handler.Job_Count,
                     Method);
               end;

            when Dict_Sources.Unoptimized_Text_List =>
               return To_Dictionary
                 (Simple_Dictionary
                    (Make_Word_Counter (Handler, Input),
                     Handler.Dict_Size,
                     Method),
                  Handler.Vlen_Verbatim);


         end case;
      end To_Dictionary;

   end Dictionary_Subprograms;