Index: src/natools-smaz-tools.adb ================================================================== --- src/natools-smaz-tools.adb +++ src/natools-smaz-tools.adb @@ -434,10 +434,65 @@ Add_Word (Counter, Phrase (First .. Next - 1)); end if; end loop Main_Loop; end Add_Words; + + procedure Evaluate_Dictionary + (Dict : in Dictionary; + Corpus : in String_Lists.List; + Compressed_Size : out Ada.Streams.Stream_Element_Count; + Counts : out Dictionary_Counts) + is + Verbatim_Code_Count : constant Ada.Streams.Stream_Element_Offset + := Ada.Streams.Stream_Element_Offset + (Ada.Streams.Stream_Element'Last - Dict.Dict_Last); + + Verbatim_Length : Ada.Streams.Stream_Element_Offset; + Input_Byte : Ada.Streams.Stream_Element; + begin + Compressed_Size := 0; + + for I in Counts'Range loop + Counts (I) := 0; + end loop; + + for S of Corpus loop + declare + use type Ada.Streams.Stream_Element_Offset; + Compressed : constant Ada.Streams.Stream_Element_Array + := Compress (Dict, S); + Index : Ada.Streams.Stream_Element_Offset := Compressed'First; + begin + Compressed_Size := Compressed_Size + Compressed'Length; + + while Index in Compressed'Range loop + Input_Byte := Compressed (Index); + + if Input_Byte in Dict.Offsets'Range then + Counts (Input_Byte) := Counts (Input_Byte) + 1; + Index := Index + 1; + else + if not Dict.Variable_Length_Verbatim then + Verbatim_Length := Ada.Streams.Stream_Element_Offset + (Ada.Streams.Stream_Element'Last - Input_Byte) + 1; + elsif Input_Byte < Ada.Streams.Stream_Element'Last then + Verbatim_Length := Ada.Streams.Stream_Element_Offset + (Ada.Streams.Stream_Element'Last - Input_Byte); + else + Index := Index + 1; + Verbatim_Length := Ada.Streams.Stream_Element_Offset + (Compressed (Index)) + Verbatim_Code_Count - 1; + end if; + + Index := Index + Verbatim_Length + 1; + end if; + end loop; + end; + end loop; + end Evaluate_Dictionary; + function Simple_Dictionary (Counter : in Word_Counter; Word_Count : in Natural) return String_Lists.List Index: src/natools-smaz-tools.ads ================================================================== --- src/natools-smaz-tools.ads +++ src/natools-smaz-tools.ads @@ -102,10 +102,22 @@ (Counter : in Word_Counter; Word_Count : in Natural) return String_Lists.List; -- Return the Word_Count words in Counter that have the highest score, -- the score being count * length. + + type Dictionary_Counts is + array (Ada.Streams.Stream_Element) of String_Count; + + procedure Evaluate_Dictionary + (Dict : in Dictionary; + Corpus : in String_Lists.List; + Compressed_Size : out Ada.Streams.Stream_Element_Count; + Counts : out Dictionary_Counts); + -- Compress all strings of Corpus, returning the total number of + -- compressed bytes and the number of uses for each dictionary + -- element. private package Word_Maps is new Ada.Containers.Indefinite_Ordered_Maps (String, String_Count);