︙ | | | ︙ | |
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
|
overriding procedure Argument
(Handler : in out Callback;
Argument : in String)
is null;
procedure Evaluate_Dictionary
(Job_Count : in Natural;
Dict : in Natools.Smaz_256.Dictionary;
Corpus : in Natools.Smaz_Tools.String_Lists.List;
Compressed_Size : out Ada.Streams.Stream_Element_Count;
Counts : out Tools_256.Dictionary_Counts);
-- Dispatch to parallel or non-parallel version of Evaluate_Dictionary
-- depending on Job_Count.
function Getopt_Config return Getopt.Configuration;
-- Build the configuration object
procedure Optimization_Round
(Dict : in out Holders.Holder;
Score : in out Ada.Streams.Stream_Element_Count;
Counts : in out Tools_256.Dictionary_Counts;
|
<
<
<
<
<
<
<
<
<
|
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
overriding procedure Argument
(Handler : in out Callback;
Argument : in String)
is null;
function Getopt_Config return Getopt.Configuration;
-- Build the configuration object
procedure Optimization_Round
(Dict : in out Holders.Holder;
Score : in out Ada.Streams.Stream_Element_Count;
Counts : in out Tools_256.Dictionary_Counts;
|
︙ | | | ︙ | |
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
|
function To_Dictionary
(Handler : in Callback'Class;
Input : in Natools.Smaz_Tools.String_Lists.List)
return Natools.Smaz_256.Dictionary;
-- Convert the input into a dictionary given the option in Handler
generic
type Dictionary (<>) is private;
type Dictionary_Entry is (<>);
type String_Count is range <>;
type Dictionary_Counts is array (Dictionary_Entry) of String_Count;
with package String_Lists
is new Ada.Containers.Indefinite_Doubly_Linked_Lists (String);
with procedure Evaluate_Dictionary_Partial
(Dict : in Dictionary;
Corpus_Entry : in String;
Compressed_Size : in out Ada.Streams.Stream_Element_Count;
Counts : in out Dictionary_Counts);
package Dictionary_Subprograms is
procedure Parallel_Evaluate_Dictionary
(Job_Count : in Positive;
Dict : in Dictionary;
Corpus : in String_Lists.List;
Compressed_Size : out Ada.Streams.Stream_Element_Count;
Counts : out Dictionary_Counts);
-- Return the same results as Natools.Smaz.Tools.Evaluate_Dictionary,
-- but hopefully more quickly, using Job_Count tasks.
end Dictionary_Subprograms;
package body Dictionary_Subprograms is
procedure Parallel_Evaluate_Dictionary
(Job_Count : in Positive;
Dict : in Dictionary;
Corpus : in String_Lists.List;
Compressed_Size : out Ada.Streams.Stream_Element_Count;
Counts : out Dictionary_Counts)
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
|
function To_Dictionary
(Handler : in Callback'Class;
Input : in Natools.Smaz_Tools.String_Lists.List)
return Natools.Smaz_256.Dictionary;
-- Convert the input into a dictionary given the option in Handler
procedure Use_Dictionary (Dict : in out Natools.Smaz_256.Dictionary);
-- Update Dictionary.Hash so that it can be actually used
generic
type Dictionary (<>) is private;
type Dictionary_Entry is (<>);
type String_Count is range <>;
type Dictionary_Counts is array (Dictionary_Entry) of String_Count;
with package String_Lists
is new Ada.Containers.Indefinite_Doubly_Linked_Lists (String);
with procedure Evaluate_Dictionary
(Dict : in Dictionary;
Corpus : in String_Lists.List;
Compressed_Size : out Ada.Streams.Stream_Element_Count;
Counts : out Dictionary_Counts);
with procedure Evaluate_Dictionary_Partial
(Dict : in Dictionary;
Corpus_Entry : in String;
Compressed_Size : in out Ada.Streams.Stream_Element_Count;
Counts : in out Dictionary_Counts);
with procedure Use_Dictionary (Dict : in out Dictionary) is <>;
package Dictionary_Subprograms is
procedure Evaluate_Dictionary
(Job_Count : in Natural;
Dict : in Dictionary;
Corpus : in String_Lists.List;
Compressed_Size : out Ada.Streams.Stream_Element_Count;
Counts : out Dictionary_Counts);
-- Dispatch to parallel or non-parallel version of
-- Evaluate_Dictionary depending on Job_Count.
procedure Parallel_Evaluate_Dictionary
(Job_Count : in Positive;
Dict : in Dictionary;
Corpus : in String_Lists.List;
Compressed_Size : out Ada.Streams.Stream_Element_Count;
Counts : out Dictionary_Counts);
-- Return the same results as Natools.Smaz.Tools.Evaluate_Dictionary,
-- but hopefully more quickly, using Job_Count tasks.
end Dictionary_Subprograms;
package body Dictionary_Subprograms is
procedure Evaluate_Dictionary
(Job_Count : in Natural;
Dict : in Dictionary;
Corpus : in String_Lists.List;
Compressed_Size : out Ada.Streams.Stream_Element_Count;
Counts : out Dictionary_Counts)
is
Actual_Dict : Dictionary := Dict;
begin
Use_Dictionary (Actual_Dict);
if Job_Count > 0 then
Parallel_Evaluate_Dictionary (Job_Count,
Actual_Dict, Corpus, Compressed_Size, Counts);
else
Evaluate_Dictionary
(Actual_Dict, Corpus, Compressed_Size, Counts);
end if;
end Evaluate_Dictionary;
procedure Parallel_Evaluate_Dictionary
(Job_Count : in Positive;
Dict : in Dictionary;
Corpus : in String_Lists.List;
Compressed_Size : out Ada.Streams.Stream_Element_Count;
Counts : out Dictionary_Counts)
|
︙ | | | ︙ | |
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
|
Compressed_Size := 0;
Counts := (others => 0);
Parallel_Run (Cursor, Job_Count);
end Parallel_Evaluate_Dictionary;
end Dictionary_Subprograms;
package Dict_256 is new Dictionary_Subprograms
(Dictionary => Natools.Smaz_256.Dictionary,
Dictionary_Entry => Ada.Streams.Stream_Element,
String_Count => Natools.Smaz_Tools.String_Count,
Dictionary_Counts => Tools_256.Dictionary_Counts,
String_Lists => Natools.Smaz_Tools.String_Lists,
Evaluate_Dictionary_Partial => Tools_256.Evaluate_Dictionary_Partial);
overriding procedure Option
(Handler : in out Callback;
Id : in Options.Id;
Argument : in String) is
begin
|
>
>
>
|
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
|
Compressed_Size := 0;
Counts := (others => 0);
Parallel_Run (Cursor, Job_Count);
end Parallel_Evaluate_Dictionary;
end Dictionary_Subprograms;
package Dict_256 is new Dictionary_Subprograms
(Dictionary => Natools.Smaz_256.Dictionary,
Dictionary_Entry => Ada.Streams.Stream_Element,
String_Count => Natools.Smaz_Tools.String_Count,
Dictionary_Counts => Tools_256.Dictionary_Counts,
String_Lists => Natools.Smaz_Tools.String_Lists,
Evaluate_Dictionary => Tools_256.Evaluate_Dictionary,
Evaluate_Dictionary_Partial => Tools_256.Evaluate_Dictionary_Partial);
overriding procedure Option
(Handler : in out Callback;
Id : in Options.Id;
Argument : in String) is
begin
|
︙ | | | ︙ | |
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
|
Handler.Vlen_Verbatim := True;
when Options.No_Vlen_Verbatim =>
Handler.Vlen_Verbatim := False;
end case;
end Option;
procedure Evaluate_Dictionary
(Job_Count : in Natural;
Dict : in Natools.Smaz_256.Dictionary;
Corpus : in Natools.Smaz_Tools.String_Lists.List;
Compressed_Size : out Ada.Streams.Stream_Element_Count;
Counts : out Tools_256.Dictionary_Counts)
is
Actual_Dict : Natools.Smaz_256.Dictionary := Dict;
begin
Natools.Smaz_Tools.Set_Dictionary_For_Trie_Search
(Tools_256.To_String_List (Actual_Dict));
Actual_Dict.Hash := Natools.Smaz_Tools.Trie_Search'Access;
for I in Actual_Dict.Offsets'Range loop
if Natools.Smaz_Tools.Trie_Search (Natools.Smaz_256.Dict_Entry
(Actual_Dict, I)) /= Natural (I)
then
Ada.Text_IO.Put_Line
(Ada.Text_IO.Current_Error,
"Fail at" & Ada.Streams.Stream_Element'Image (I)
& " -> " & Natools.String_Escapes.C_Escape_Hex
(Natools.Smaz_256.Dict_Entry (Actual_Dict, I), True)
& " ->" & Natural'Image (Natools.Smaz_Tools.Trie_Search
(Natools.Smaz_256.Dict_Entry (Actual_Dict, I))));
end if;
end loop;
if Job_Count > 0 then
Dict_256.Parallel_Evaluate_Dictionary (Job_Count,
Actual_Dict, Corpus, Compressed_Size, Counts);
else
Tools_256.Evaluate_Dictionary
(Actual_Dict, Corpus, Compressed_Size, Counts);
end if;
end Evaluate_Dictionary;
function Getopt_Config return Getopt.Configuration is
use Getopt;
use Options;
R : Getopt.Configuration;
begin
R.Add_Option ("ada-dict", 'A', Optional_Argument, Output_Ada_Dict);
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
446
447
448
449
450
451
452
453
454
455
456
457
458
459
|
Handler.Vlen_Verbatim := True;
when Options.No_Vlen_Verbatim =>
Handler.Vlen_Verbatim := False;
end case;
end Option;
function Getopt_Config return Getopt.Configuration is
use Getopt;
use Options;
R : Getopt.Configuration;
begin
R.Add_Option ("ada-dict", 'A', Optional_Argument, Output_Ada_Dict);
|
︙ | | | ︙ | |
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
|
Word : constant String
:= Natools.Smaz_Tools.String_Lists.Element (Position);
New_Dict : constant Natools.Smaz_256.Dictionary
:= Tools_256.Append_String (Base, Word);
New_Score : Ada.Streams.Stream_Element_Count;
New_Counts : Tools_256.Dictionary_Counts;
begin
Evaluate_Dictionary
(Job_Count, New_Dict, Input_Texts, New_Score, New_Counts);
if New_Score < Score then
Dict := Holders.To_Holder (New_Dict);
Score := New_Score;
Counts := New_Counts;
New_Value := Ada.Strings.Unbounded.To_Unbounded_String (Word);
|
|
|
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
|
Word : constant String
:= Natools.Smaz_Tools.String_Lists.Element (Position);
New_Dict : constant Natools.Smaz_256.Dictionary
:= Tools_256.Append_String (Base, Word);
New_Score : Ada.Streams.Stream_Element_Count;
New_Counts : Tools_256.Dictionary_Counts;
begin
Dict_256.Evaluate_Dictionary
(Job_Count, New_Dict, Input_Texts, New_Score, New_Counts);
if New_Score < Score then
Dict := Holders.To_Holder (New_Dict);
Score := New_Score;
Counts := New_Counts;
New_Value := Ada.Strings.Unbounded.To_Unbounded_String (Word);
|
︙ | | | ︙ | |
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
|
is
Holder : Holders.Holder := Holders.To_Holder (Base);
Pending : Natools.Smaz_Tools.String_Lists.List := Pending_Words;
Score : Ada.Streams.Stream_Element_Count;
Counts : Tools_256.Dictionary_Counts;
Running : Boolean := True;
begin
Evaluate_Dictionary (Job_Count, Base, Input_Texts, Score, Counts);
while Running loop
Optimization_Round
(Holder,
Score,
Counts,
Pending,
|
>
|
|
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
|
is
Holder : Holders.Holder := Holders.To_Holder (Base);
Pending : Natools.Smaz_Tools.String_Lists.List := Pending_Words;
Score : Ada.Streams.Stream_Element_Count;
Counts : Tools_256.Dictionary_Counts;
Running : Boolean := True;
begin
Dict_256.Evaluate_Dictionary
(Job_Count, Base, Input_Texts, Score, Counts);
while Running loop
Optimization_Round
(Holder,
Score,
Counts,
Pending,
|
︙ | | | ︙ | |
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
|
end if;
when Actions.Evaluate =>
declare
Total_Size : Ada.Streams.Stream_Element_Count;
Counts : Tools_256.Dictionary_Counts;
begin
Evaluate_Dictionary (Handler.Job_Count,
Dictionary, Data_List, Total_Size, Counts);
if Handler.Sx_Output then
Sx_Output.Open_List;
Sx_Output.Append_String (Ada.Strings.Fixed.Trim
(Ada.Streams.Stream_Element_Count'Image (Total_Size),
Ada.Strings.Both));
|
|
|
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
|
end if;
when Actions.Evaluate =>
declare
Total_Size : Ada.Streams.Stream_Element_Count;
Counts : Tools_256.Dictionary_Counts;
begin
Dict_256.Evaluate_Dictionary (Handler.Job_Count,
Dictionary, Data_List, Total_Size, Counts);
if Handler.Sx_Output then
Sx_Output.Open_List;
Sx_Output.Append_String (Ada.Strings.Fixed.Trim
(Ada.Streams.Stream_Element_Count'Image (Total_Size),
Ada.Strings.Both));
|
︙ | | | ︙ | |
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
|
(Counter, Handler.Dict_Size, Handler.Score_Method),
Handler.Vlen_Verbatim);
end if;
end;
end case;
end To_Dictionary;
Opt_Config : constant Getopt.Configuration := Getopt_Config;
Handler : Callback;
Input_List, Input_Data : Natools.Smaz_Tools.String_Lists.List;
begin
Process_Command_Line :
begin
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
|
(Counter, Handler.Dict_Size, Handler.Score_Method),
Handler.Vlen_Verbatim);
end if;
end;
end case;
end To_Dictionary;
procedure Use_Dictionary (Dict : in out Natools.Smaz_256.Dictionary) is
begin
Natools.Smaz_Tools.Set_Dictionary_For_Trie_Search
(Tools_256.To_String_List (Dict));
Dict.Hash := Natools.Smaz_Tools.Trie_Search'Access;
for I in Dict.Offsets'Range loop
if Natools.Smaz_Tools.Trie_Search (Natools.Smaz_256.Dict_Entry
(Dict, I)) /= Natural (I)
then
Ada.Text_IO.Put_Line
(Ada.Text_IO.Current_Error,
"Fail at" & Ada.Streams.Stream_Element'Image (I)
& " -> " & Natools.String_Escapes.C_Escape_Hex
(Natools.Smaz_256.Dict_Entry (Dict, I), True)
& " ->" & Natural'Image (Natools.Smaz_Tools.Trie_Search
(Natools.Smaz_256.Dict_Entry (Dict, I))));
end if;
end loop;
end Use_Dictionary;
Opt_Config : constant Getopt.Configuration := Getopt_Config;
Handler : Callback;
Input_List, Input_Data : Natools.Smaz_Tools.String_Lists.List;
begin
Process_Command_Line :
begin
|
︙ | | | ︙ | |