53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
+
|
package Options is
type Id is
(Output_Ada_Dict,
Dictionary_Input,
Decode,
Encode,
Evaluate,
Filter_Threshold,
Output_Hash,
Job_Count,
Help,
Sx_Dict_Output,
Min_Sub_Size,
Max_Sub_Size,
Stat_Output,
|
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
|
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
|
+
+
+
+
+
|
Handler.Max_Sub_Size := Positive'Value (Argument);
when Options.Max_Word_Size =>
Handler.Max_Word_Size := Positive'Value (Argument);
when Options.Job_Count =>
Handler.Job_Count := Natural'Value (Argument);
when Options.Filter_Threshold =>
Handler.Filter_Threshold
:= Natools.Smaz.Tools.String_Count'Value (Argument);
end case;
end Option;
function Getopt_Config return Getopt.Configuration is
use Getopt;
use Options;
R : Getopt.Configuration;
begin
R.Add_Option ("ada-dict", 'A', Optional_Argument, Output_Ada_Dict);
R.Add_Option ("decode", 'd', No_Argument, Decode);
R.Add_Option ("dict", 'D', No_Argument, Dictionary_Input);
R.Add_Option ("encode", 'e', No_Argument, Encode);
R.Add_Option ("evaluate", 'E', No_Argument, Evaluate);
R.Add_Option ("filter", 'F', Required_Argument, Filter_Threshold);
R.Add_Option ("help", 'h', No_Argument, Help);
R.Add_Option ("hash-pkg", 'H', Required_Argument, Output_Hash);
R.Add_Option ("jobs", 'j', Required_Argument, Job_Count);
R.Add_Option ("sx-dict", 'L', No_Argument, Sx_Dict_Output);
R.Add_Option ("min-substring", 'm', Required_Argument, Min_Sub_Size);
R.Add_Option ("max-substring", 'M', Required_Argument, Max_Sub_Size);
R.Add_Option ("stats", 's', No_Argument, Stat_Output);
|
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
|
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
|
+
+
+
+
+
+
+
-
+
+
+
+
+
+
+
+
|
Put_Line (Output, Indent & Indent
& "Evaluate the dictionary on the input given corpus");
when Options.Job_Count =>
New_Line (Output);
Put_Line (Output, Indent & Indent
& "Number of parallel jobs in long calculations");
when Options.Filter_Threshold =>
Put_Line (Output, " <threshold>");
Put_Line (Output, Indent & Indent
& "Before building a dictionary from substrings, remove");
Put_Line (Output, Indent & Indent
& "substrings whose count is below the threshold.");
end case;
end loop;
end Print_Help;
function To_Dictionary
(Handler : in Callback'Class;
Input : in Natools.Smaz.Tools.String_Lists.List)
return Natools.Smaz.Dictionary is
return Natools.Smaz.Dictionary
is
use type Natools.Smaz.Tools.String_Count;
begin
case Handler.Dict_Source is
when Dict_Sources.S_Expression =>
return Natools.Smaz.Tools.To_Dictionary (Input, True);
when Dict_Sources.Word_List =>
declare
Counter : Natools.Smaz.Tools.Word_Counter;
begin
for S of Input loop
Natools.Smaz.Tools.Add_Substrings
(Counter, S, Handler.Min_Sub_Size, Handler.Max_Sub_Size);
if Handler.Max_Word_Size > Handler.Max_Sub_Size then
Natools.Smaz.Tools.Add_Words
(Counter, S,
Handler.Max_Sub_Size + 1, Handler.Max_Word_Size);
end if;
end loop;
if Handler.Filter_Threshold > 0 then
Natools.Smaz.Tools.Filter_By_Count
(Counter, Handler.Filter_Threshold);
end if;
return Natools.Smaz.Tools.To_Dictionary
(Natools.Smaz.Tools.Simple_Dictionary (Counter, 254),
True);
end;
end case;
end To_Dictionary;
|