Natools

Check-in [db2278efbb]
Login
Overview
Comment:tools/smaz: replace "word list" with clearer "[sample] text list"
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: db2278efbbf160585552b8542635b45ffbf9e766
User & Date: nat on 2016-10-22 19:21:42
Other Links: manifest | tags
Context
2016-10-23
21:23
smaz-tools: new primitive to remove an entry from a dictionary check-in: 134a6f8380 user: nat tags: trunk
2016-10-22
19:21
tools/smaz: replace "word list" with clearer "[sample] text list" check-in: db2278efbb user: nat tags: trunk
2016-10-21
19:32
tools/smaz: new command-line option to filter substrings by count check-in: 637ebd90fa user: nat tags: trunk
Changes

Modified tools/smaz.adb from [71cfa2a6e9] to [6c1366e4ae].

43
44
45
46
47
48
49
50

51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69

70
71
72
73
74
75
76
43
44
45
46
47
48
49

50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68

69
70
71
72
73
74
75
76







-
+


















-
+







         Encode,
         Evaluate);
   end Actions;

   package Dict_Sources is
      type Enum is
        (S_Expression,
         Word_List);
         Text_List);
   end Dict_Sources;

   package Options is
      type Id is
        (Output_Ada_Dict,
         Dictionary_Input,
         Decode,
         Encode,
         Evaluate,
         Filter_Threshold,
         Output_Hash,
         Job_Count,
         Help,
         Sx_Dict_Output,
         Min_Sub_Size,
         Max_Sub_Size,
         Stat_Output,
         No_Stat_Output,
         Word_List_Input,
         Text_List_Input,
         Max_Word_Size,
         Sx_Output,
         No_Sx_Output);
   end Options;

   package Getopt is new Natools.Getopt_Long (Options.Id);

184
185
186
187
188
189
190
191
192


193
194
195
196
197
198
199
184
185
186
187
188
189
190


191
192
193
194
195
196
197
198
199







-
-
+
+








         when Options.Sx_Output =>
            Handler.Sx_Output := True;

         when Options.Dictionary_Input =>
            Handler.Dict_Source := Dict_Sources.S_Expression;

         when Options.Word_List_Input =>
            Handler.Dict_Source := Dict_Sources.Word_List;
         when Options.Text_List_Input =>
            Handler.Dict_Source := Dict_Sources.Text_List;

         when Options.Sx_Dict_Output =>
            Handler.Need_Dictionary := True;
            Handler.Sx_Dict_Output := True;

         when Options.Min_Sub_Size =>
            Handler.Min_Sub_Size := Positive'Value (Argument);
229
230
231
232
233
234
235
236

237
238
239
240
241
242
243
229
230
231
232
233
234
235

236
237
238
239
240
241
242
243







-
+







      R.Add_Option ("hash-pkg",      'H', Required_Argument, Output_Hash);
      R.Add_Option ("jobs",          'j', Required_Argument, Job_Count);
      R.Add_Option ("sx-dict",       'L', No_Argument,       Sx_Dict_Output);
      R.Add_Option ("min-substring", 'm', Required_Argument, Min_Sub_Size);
      R.Add_Option ("max-substring", 'M', Required_Argument, Max_Sub_Size);
      R.Add_Option ("stats",         's', No_Argument,       Stat_Output);
      R.Add_Option ("no-stats",      'S', No_Argument,       No_Stat_Output);
      R.Add_Option ("word-list",     'w', No_Argument,       Word_List_Input);
      R.Add_Option ("text-list",     't', No_Argument,       Text_List_Input);
      R.Add_Option ("max-word-len",  'W', Required_Argument, Max_Word_Size);
      R.Add_Option ("s-expr",        'x', No_Argument,       Sx_Output);
      R.Add_Option ("no-s-expr",     'X', No_Argument,       No_Sx_Output);

      return R;
   end Getopt_Config;

440
441
442
443
444
445
446
447

448
449
450


451
452
453
454
455
456
457
440
441
442
443
444
445
446

447
448
449

450
451
452
453
454
455
456
457
458







-
+


-
+
+







                 & "Output filtered results in a S-expression");

            when Options.Dictionary_Input =>
               New_Line (Output);
               Put_Line (Output, Indent & Indent
                 & "Read dictionary directly in input S-expression (default)");

            when Options.Word_List_Input =>
            when Options.Text_List_Input =>
               New_Line (Output);
               Put_Line (Output, Indent & Indent
                 & "Compute dictionary from word list in input S-expression");
                 & "Compute dictionary from sample texts"
                 & " in input S-expression");

            when Options.Sx_Dict_Output =>
               New_Line (Output);
               Put_Line (Output, Indent & Indent
                 & "Output the dictionary as a S-expression");

            when Options.Min_Sub_Size =>
496
497
498
499
500
501
502
503

504
505
506
507
508
509
510
497
498
499
500
501
502
503

504
505
506
507
508
509
510
511







-
+







   is
      use type Natools.Smaz.Tools.String_Count;
   begin
      case Handler.Dict_Source is
         when Dict_Sources.S_Expression =>
            return Natools.Smaz.Tools.To_Dictionary (Input, True);

         when Dict_Sources.Word_List =>
         when Dict_Sources.Text_List =>
            declare
               Counter : Natools.Smaz.Tools.Word_Counter;
            begin
               for S of Input loop
                  Natools.Smaz.Tools.Add_Substrings
                    (Counter, S, Handler.Min_Sub_Size, Handler.Max_Sub_Size);