Natools

Check-in [88e525eaf4]
Login
Overview
Comment:tools/smaz: add support for dictionary generation from a word list
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 88e525eaf4f3f4096d79011c8edeea9cf63c0d48
User & Date: nat on 2016-09-29 21:58:48
Other Links: manifest | tags
Context
2016-09-30
20:26
smaz: make Dict_Entry accessor publicly visible check-in: 48bf48d1c0 user: nat tags: trunk
2016-09-29
21:58
tools/smaz: add support for dictionary generation from a word list check-in: 88e525eaf4 user: nat tags: trunk
2016-09-28
21:25
smaz-tools: add the simplest dictionary constructor from word counts check-in: fb05dda137 user: nat tags: trunk
Changes

Modified tools/smaz.adb from [fcd1ed8809] to [acb9db3271].

35
36
37
38
39
40
41






42
43
44
45

46
47
48
49
50
51

52
53
54
55
56
57
58
59
60
61
62
63
64
65

66
67
68
69
70
71
72

   package Actions is
      type Enum is
        (Nothing,
         Decode,
         Encode);
   end Actions;







   package Options is
      type Id is
        (Output_Ada_Dictionary,

         Decode,
         Encode,
         Output_Hash,
         Help,
         Stat_Output,
         No_Stat_Output,

         Sx_Output,
         No_Sx_Output);
   end Options;

   package Getopt is new Natools.Getopt_Long (Options.Id);

   type Callback is new Getopt.Handlers.Callback with record
      Display_Help : Boolean := False;
      Need_Dictionary : Boolean := False;
      Stat_Output : Boolean := False;
      Sx_Output : Boolean := False;
      Action : Actions.Enum := Actions.Nothing;
      Ada_Dictionary : Ada.Strings.Unbounded.Unbounded_String;
      Hash_Package : Ada.Strings.Unbounded.Unbounded_String;

   end record;

   overriding procedure Option
     (Handler  : in out Callback;
      Id       : in Options.Id;
      Argument : in String);








>
>
>
>
>
>



|
>






>














>







35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

   package Actions is
      type Enum is
        (Nothing,
         Decode,
         Encode);
   end Actions;

   package Dict_Sources is
      type Enum is
        (S_Expression,
         Word_List);
   end Dict_Sources;

   package Options is
      type Id is
        (Output_Ada_Dict,
         Dictionary_Input,
         Decode,
         Encode,
         Output_Hash,
         Help,
         Stat_Output,
         No_Stat_Output,
         Word_List_Input,
         Sx_Output,
         No_Sx_Output);
   end Options;

   package Getopt is new Natools.Getopt_Long (Options.Id);

   type Callback is new Getopt.Handlers.Callback with record
      Display_Help : Boolean := False;
      Need_Dictionary : Boolean := False;
      Stat_Output : Boolean := False;
      Sx_Output : Boolean := False;
      Action : Actions.Enum := Actions.Nothing;
      Ada_Dictionary : Ada.Strings.Unbounded.Unbounded_String;
      Hash_Package : Ada.Strings.Unbounded.Unbounded_String;
      Dict_Source : Dict_Sources.Enum := Dict_Sources.S_Expression;
   end record;

   overriding procedure Option
     (Handler  : in out Callback;
      Id       : in Options.Id;
      Argument : in String);

90
91
92
93
94
95
96






97
98
99
100
101
102
103
      --  print the given dictionary in the given file

   procedure Print_Help
     (Opt : in Getopt.Configuration;
      Output : in Ada.Text_IO.File_Type);
      --  Print the help text to the given file








   overriding procedure Option
     (Handler  : in out Callback;
      Id       : in Options.Id;
      Argument : in String) is
   begin
      case Id is







>
>
>
>
>
>







99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
      --  print the given dictionary in the given file

   procedure Print_Help
     (Opt : in Getopt.Configuration;
      Output : in Ada.Text_IO.File_Type);
      --  Print the help text to the given file

   function To_Dictionary
     (Handler : in Callback'Class;
      Input : in Natools.Smaz.Tools.String_Lists.List)
     return Natools.Smaz.Dictionary;
      --  Convert the input into a dictionary given the option in Handler


   overriding procedure Option
     (Handler  : in out Callback;
      Id       : in Options.Id;
      Argument : in String) is
   begin
      case Id is
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141






142
143
144
145
146
147
148
149
150
151
152

153
154
155
156
157

158
159
160
161
162
163
164
165

         when Options.No_Stat_Output =>
            Handler.Stat_Output := False;

         when Options.No_Sx_Output =>
            Handler.Sx_Output := False;

         when Options.Output_Ada_Dictionary =>
            Handler.Need_Dictionary := True;

            if Argument'Length > 0 then
               Handler.Ada_Dictionary
                 := Ada.Strings.Unbounded.To_Unbounded_String (Argument);
            else
               Handler.Ada_Dictionary
                 := Ada.Strings.Unbounded.To_Unbounded_String ("-");
            end if;

         when Options.Output_Hash =>
            Handler.Need_Dictionary := True;
            Handler.Hash_Package
              := Ada.Strings.Unbounded.To_Unbounded_String (Argument);

         when Options.Stat_Output =>
            Handler.Stat_Output := True;

         when Options.Sx_Output =>
            Handler.Sx_Output := True;






      end case;
   end Option;


   function Getopt_Config return Getopt.Configuration is
      use Getopt;
      use Options;
      R : Getopt.Configuration;
   begin
      R.Add_Option ("ada-dict", 'A', Optional_Argument, Output_Ada_Dictionary);
      R.Add_Option ("decode",   'd', No_Argument,       Decode);

      R.Add_Option ("encode",   'e', No_Argument,       Encode);
      R.Add_Option ("help",     'h', No_Argument,       Help);
      R.Add_Option ("hash-pkg", 'H', Required_Argument, Output_Hash);
      R.Add_Option ("stats",    's', No_Argument,       Stat_Output);
      R.Add_Option ("no-stats", 'S', No_Argument,       No_Stat_Output);

      R.Add_Option ("s-expr",   'x', No_Argument,       Sx_Output);
      R.Add_Option ("no-s-expr", 'X', No_Argument,       No_Sx_Output);

      return R;
   end Getopt_Config;


   procedure Print_Dictionary







|




















>
>
>
>
>
>









|
|
>
|
|
|
|
|
>
|







129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188

         when Options.No_Stat_Output =>
            Handler.Stat_Output := False;

         when Options.No_Sx_Output =>
            Handler.Sx_Output := False;

         when Options.Output_Ada_Dict =>
            Handler.Need_Dictionary := True;

            if Argument'Length > 0 then
               Handler.Ada_Dictionary
                 := Ada.Strings.Unbounded.To_Unbounded_String (Argument);
            else
               Handler.Ada_Dictionary
                 := Ada.Strings.Unbounded.To_Unbounded_String ("-");
            end if;

         when Options.Output_Hash =>
            Handler.Need_Dictionary := True;
            Handler.Hash_Package
              := Ada.Strings.Unbounded.To_Unbounded_String (Argument);

         when Options.Stat_Output =>
            Handler.Stat_Output := True;

         when Options.Sx_Output =>
            Handler.Sx_Output := True;

         when Options.Dictionary_Input =>
            Handler.Dict_Source := Dict_Sources.S_Expression;

         when Options.Word_List_Input =>
            Handler.Dict_Source := Dict_Sources.Word_List;
      end case;
   end Option;


   function Getopt_Config return Getopt.Configuration is
      use Getopt;
      use Options;
      R : Getopt.Configuration;
   begin
      R.Add_Option ("ada-dict",  'A', Optional_Argument, Output_Ada_Dict);
      R.Add_Option ("decode",    'd', No_Argument,       Decode);
      R.Add_Option ("dict",      'D', No_Argument,       Dictionary_Input);
      R.Add_Option ("encode",    'e', No_Argument,       Encode);
      R.Add_Option ("help",      'h', No_Argument,       Help);
      R.Add_Option ("hash-pkg",  'H', Required_Argument, Output_Hash);
      R.Add_Option ("stats",     's', No_Argument,       Stat_Output);
      R.Add_Option ("no-stats",  'S', No_Argument,       No_Stat_Output);
      R.Add_Option ("word-list", 'w', No_Argument,       Word_List_Input);
      R.Add_Option ("s-expr",    'x', No_Argument,       Sx_Output);
      R.Add_Option ("no-s-expr", 'X', No_Argument,       No_Sx_Output);

      return R;
   end Getopt_Config;


   procedure Print_Dictionary
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
                 & "Do not output filter statistics");

            when Options.No_Sx_Output =>
               New_Line (Output);
               Put_Line (Output, Indent & Indent
                 & "Do not output filtered results in a S-expression");

            when Options.Output_Ada_Dictionary =>
               Put_Line (Output, "=[filename]");
               Put_Line (Output, Indent & Indent
                 & "Output the current dictionary as Ada code in the given");
               Put_Line (Output, Indent & Indent
                 & "file, or standard output if filename is ""-""");

            when Options.Output_Hash =>







|







264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
                 & "Do not output filter statistics");

            when Options.No_Sx_Output =>
               New_Line (Output);
               Put_Line (Output, Indent & Indent
                 & "Do not output filtered results in a S-expression");

            when Options.Output_Ada_Dict =>
               Put_Line (Output, "=[filename]");
               Put_Line (Output, Indent & Indent
                 & "Output the current dictionary as Ada code in the given");
               Put_Line (Output, Indent & Indent
                 & "file, or standard output if filename is ""-""");

            when Options.Output_Hash =>
264
265
266
267
268
269
270










271
272
273
























274
275
276
277
278
279
280
               Put_Line (Output, Indent & Indent
                 & "Output filter statistics");

            when Options.Sx_Output =>
               New_Line (Output);
               Put_Line (Output, Indent & Indent
                 & "Output filtered results in a S-expression");










         end case;
      end loop;
   end Print_Help;

























   Opt_Config : constant Getopt.Configuration := Getopt_Config;
   Handler : Callback;
   Input_List, Input_Data : Natools.Smaz.Tools.String_Lists.List;
begin
   Process_Command_Line :
   begin







>
>
>
>
>
>
>
>
>
>



>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
               Put_Line (Output, Indent & Indent
                 & "Output filter statistics");

            when Options.Sx_Output =>
               New_Line (Output);
               Put_Line (Output, Indent & Indent
                 & "Output filtered results in a S-expression");

            when Options.Dictionary_Input =>
               New_Line (Output);
               Put_Line (Output, Indent & Indent
                 & "Read dictionary directly in input S-expression (default)");

            when Options.Word_List_Input =>
               New_Line (Output);
               Put_Line (Output, Indent & Indent
                 & "Compute dictionary from word list in input S-expression");
         end case;
      end loop;
   end Print_Help;

   function To_Dictionary
     (Handler : in Callback'Class;
      Input : in Natools.Smaz.Tools.String_Lists.List)
     return Natools.Smaz.Dictionary is
   begin
      case Handler.Dict_Source is
         when Dict_Sources.S_Expression =>
            return Natools.Smaz.Tools.To_Dictionary (Input, True);

         when Dict_Sources.Word_List =>
            declare
               Counter : Natools.Smaz.Tools.Word_Counter;
            begin
               for S of Input loop
                  Natools.Smaz.Tools.Add_Substrings (Counter, S, 1, 3);
               end loop;

               return Natools.Smaz.Tools.To_Dictionary
                 (Natools.Smaz.Tools.Most_Common_Words (Counter, 254),
                  True);
            end;
      end case;
   end To_Dictionary;

   Opt_Config : constant Getopt.Configuration := Getopt_Config;
   Handler : Callback;
   Input_List, Input_Data : Natools.Smaz.Tools.String_Lists.List;
begin
   Process_Command_Line :
   begin
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
      end if;
   end Read_Input_List;


   Build_Dictionary :
   declare
      Dictionary : Natools.Smaz.Dictionary
        := Natools.Smaz.Tools.To_Dictionary (Input_List, True);
      Sx_Output : Natools.S_Expressions.Printers.Canonical
        (Ada.Text_IO.Text_Streams.Stream (Ada.Text_IO.Current_Output));
      Ada_Dictionary : constant String
        := Ada.Strings.Unbounded.To_String (Handler.Ada_Dictionary);
      Hash_Package : constant String
        := Ada.Strings.Unbounded.To_String (Handler.Hash_Package);
   begin







|







372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
      end if;
   end Read_Input_List;


   Build_Dictionary :
   declare
      Dictionary : Natools.Smaz.Dictionary
        := To_Dictionary (Handler, Input_List);
      Sx_Output : Natools.S_Expressions.Printers.Canonical
        (Ada.Text_IO.Text_Streams.Stream (Ada.Text_IO.Current_Output));
      Ada_Dictionary : constant String
        := Ada.Strings.Unbounded.To_String (Handler.Ada_Dictionary);
      Hash_Package : constant String
        := Ada.Strings.Unbounded.To_String (Handler.Hash_Package);
   begin