Index: src/natools-smaz-tools.adb ================================================================== --- src/natools-smaz-tools.adb +++ src/natools-smaz-tools.adb @@ -393,6 +393,45 @@ else Word_Maps.Insert (Counter.Map, Word, Count); end if; end Add_Word; + + function Simple_Dictionary + (Counter : in Word_Counter; + Word_Count : in Natural) + return String_Lists.List + is + use type Ada.Containers.Count_Type; + Target_Count : constant Ada.Containers.Count_Type + := Ada.Containers.Count_Type (Word_Count); + Set : Scored_Word_Sets.Set; + Result : String_Lists.List; + begin + for Cursor in Word_Maps.Iterate (Counter.Map) loop + Scored_Word_Sets.Include (Set, To_Scored_Word (Cursor)); + + if Scored_Word_Sets.Length (Set) > Target_Count then + Scored_Word_Sets.Delete_Last (Set); + end if; + end loop; + + for Cursor in Scored_Word_Sets.Iterate (Set) loop + Result.Append (Scored_Word_Sets.Element (Cursor).Word); + end loop; + + return Result; + end Simple_Dictionary; + + + function To_Scored_Word (Cursor : in Word_Maps.Cursor) + return Scored_Word + is + Word : constant String := Word_Maps.Key (Cursor); + begin + return Scored_Word' + (Size => Word'Length, + Word => Word, + Score => Score_Value (Word_Maps.Element (Cursor)) * Word'Length); + end To_Scored_Word; + end Natools.Smaz.Tools; Index: src/natools-smaz-tools.ads ================================================================== --- src/natools-smaz-tools.ads +++ src/natools-smaz-tools.ads @@ -23,10 +23,11 @@ with Ada.Containers.Indefinite_Doubly_Linked_Lists; with Natools.S_Expressions; private with Ada.Containers.Indefinite_Ordered_Maps; +private with Ada.Containers.Indefinite_Ordered_Sets; package Natools.Smaz.Tools is pragma Preelaborate; package String_Lists is new Ada.Containers.Indefinite_Doubly_Linked_Lists @@ -86,15 +87,40 @@ Min_Size : in Positive; Max_Size : in Positive); -- Include all the substrings of Phrase whose lengths are -- between Min_Size and Max_Size. + function Simple_Dictionary + (Counter : in Word_Counter; + Word_Count : in Natural) + return String_Lists.List; + -- Return the Word_Count words in Counter that have the highest score, + -- the score being count * length. + private package Word_Maps is new Ada.Containers.Indefinite_Ordered_Maps (String, String_Count); type Word_Counter is record Map : Word_Maps.Map; end record; + + type Score_Value is range 0 .. 2 ** 31 - 1; + + type Scored_Word (Size : Natural) is record + Word : String (1 .. Size); + Score : Score_Value; + end record; + + function "<" (Left, Right : Scored_Word) return Boolean + is (Left.Score > Right.Score + or else (Left.Score = Right.Score and then Left.Word < Right.Word)); + + function To_Scored_Word (Cursor : in Word_Maps.Cursor) + return Scored_Word; + + package Scored_Word_Sets is new Ada.Containers.Indefinite_Ordered_Sets + (Scored_Word); + end Natools.Smaz.Tools;