365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
|
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
|
+
+
-
+
+
|
return Word_Counter;
-- Make a word counter from an input word list
procedure Optimization_Round
(Dict : in out Holders.Holder;
Score : in out Ada.Streams.Stream_Element_Count;
Counts : in out Dictionary_Counts;
First : in Dictionary_Entry;
Pending_Words : in out String_Lists.List;
Input_Texts : in String_Lists.List;
Job_Count : in Natural;
Method : in Methods;
Updated : out Boolean);
-- Try to improve on Dict by replacing a single entry from it with
-- one of the substring in Pending_Words.
function Optimize_Dictionary
(Base : in Dictionary;
First : in Dictionary_Entry;
Pending_Words : in String_Lists.List;
Input_Texts : in String_Lists.List;
Job_Count : in Natural;
Method : in Methods)
return Dictionary;
-- Optimize the dictionary on Input_Texts, starting with Base and
-- adding substrings from Pending_Words.
-- adding substrings from Pending_Words. Operates only on words
-- at First and beyond.
procedure Parallel_Evaluate_Dictionary
(Job_Count : in Positive;
Dict : in Dictionary;
Corpus : in String_Lists.List;
Compressed_Size : out Ada.Streams.Stream_Element_Count;
Counts : out Dictionary_Counts);
|
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
|
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
|
+
-
+
-
|
end Make_Word_Counter;
procedure Optimization_Round
(Dict : in out Holders.Holder;
Score : in out Ada.Streams.Stream_Element_Count;
Counts : in out Dictionary_Counts;
First : in Dictionary_Entry;
Pending_Words : in out String_Lists.List;
Input_Texts : in String_Lists.List;
Job_Count : in Natural;
Method : in Methods;
Updated : out Boolean)
is
use type Ada.Streams.Stream_Element_Offset;
New_Value : Ada.Strings.Unbounded.Unbounded_String;
New_Position : String_Lists.Cursor;
Worst_Index : constant Dictionary_Entry
:= Worst_Element
(Dict.Element, Counts, Method,
(Dict.Element, Counts, Method, First, Last_Code (Dict.Element));
Dictionary_Entry'First, Last_Code (Dict.Element));
Worst_Value : constant String
:= Dict_Entry (Dict.Element, Worst_Index);
Worst_Count : constant String_Count := Counts (Worst_Index);
Base : constant Dictionary
:= Remove_Element (Dict.Element, Worst_Index);
Old_Score : constant Ada.Streams.Stream_Element_Count := Score;
begin
|
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
|
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
|
+
+
|
& ')');
end if;
end Optimization_Round;
function Optimize_Dictionary
(Base : in Dictionary;
First : in Dictionary_Entry;
Pending_Words : in String_Lists.List;
Input_Texts : in String_Lists.List;
Job_Count : in Natural;
Method : in Methods)
return Dictionary
is
Holder : Holders.Holder := Holders.To_Holder (Base);
Pending : String_Lists.List := Pending_Words;
Score : Ada.Streams.Stream_Element_Count;
Counts : Dictionary_Counts;
Running : Boolean := True;
begin
Evaluate_Dictionary
(Job_Count, Base, Input_Texts, Score, Counts);
while Running loop
Optimization_Round
(Holder,
Score,
Counts,
First,
Pending,
Input_Texts,
Job_Count,
Method,
Running);
end loop;
|
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
|
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
|
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
+
+
+
+
+
|
(Handler,
To_Dictionary (Input, Handler.Vlen_Verbatim),
Data_List,
Method);
when Dict_Sources.Text_List =>
declare
Needed : constant Integer
:= Handler.Dict_Size
- Natural (Handler.Forced_Words.Length);
Selected, Pending : String_Lists.List;
First : Dictionary_Entry := Dictionary_Entry'First;
begin
if Needed <= 0 then
for Word of reverse Handler.Forced_Words loop
Selected.Prepend (Word);
if Positive (Selected.Length) = Handler.Dict_Size then
return To_Dictionary
(Selected, Handler.Vlen_Verbatim);
end if;
end loop;
end if;
Simple_Dictionary_And_Pending
(Make_Word_Counter (Handler, Input),
Handler.Dict_Size,
Needed,
Selected,
Pending,
Method,
Handler.Max_Pending);
for Word of reverse Handler.Forced_Words loop
Selected.Prepend (Word);
First := Dictionary_Entry'Succ (First);
end loop;
return Optimize_Dictionary
(To_Dictionary (Selected, Handler.Vlen_Verbatim),
First,
Pending,
Input,
Handler.Job_Count,
Method);
end;
when Dict_Sources.Unoptimized_Text_List =>
|