Index: src/natools-smaz-tools.adb ================================================================== --- src/natools-smaz-tools.adb +++ src/natools-smaz-tools.adb @@ -346,6 +346,53 @@ Values => Values, Hash => Dummy_Hash'Access); end; end To_Dictionary; + + + ------------------- + -- Word Counting -- + ------------------- + + procedure Add_Substrings + (Counter : in out Word_Counter; + Phrase : in String; + Min_Size : in Positive; + Max_Size : in Positive) is + begin + for First in Phrase'First .. Phrase'Last - Min_Size + 1 loop + for Last in First + Min_Size - 1 + .. Natural'Min (First + Max_Size - 1, Phrase'Last) + loop + Add_Word (Counter, Phrase (First .. Last)); + end loop; + end loop; + end Add_Substrings; + + + procedure Add_Word + (Counter : in out Word_Counter; + Word : in String; + Count : in String_Count := 1) + is + procedure Update + (Key : in String; Element : in out String_Count); + + procedure Update + (Key : in String; Element : in out String_Count) + is + pragma Unreferenced (Key); + begin + Element := Element + Count; + end Update; + + Cursor : constant Word_Maps.Cursor := Word_Maps.Find (Counter.Map, Word); + begin + if Word_Maps.Has_Element (Cursor) then + Word_Maps.Update_Element (Counter.Map, Cursor, Update'Access); + else + Word_Maps.Insert (Counter.Map, Word, Count); + end if; + end Add_Word; + end Natools.Smaz.Tools; Index: src/natools-smaz-tools.ads ================================================================== --- src/natools-smaz-tools.ads +++ src/natools-smaz-tools.ads @@ -22,10 +22,12 @@ ------------------------------------------------------------------------------ with Ada.Containers.Indefinite_Doubly_Linked_Lists; with Natools.S_Expressions; +private with Ada.Containers.Indefinite_Ordered_Maps; + package Natools.Smaz.Tools is pragma Preelaborate; package String_Lists is new Ada.Containers.Indefinite_Doubly_Linked_Lists (String); @@ -63,7 +65,36 @@ List_For_Linear_Search : String_Lists.List; function Linear_Search (Value : String) return Natural; -- Function and data source for inefficient but dynamic function -- that can be used with Dictionary.Hash. + + type String_Count is range 0 .. 2 ** 31 - 1; + -- Type for a number of substring occurrences + + type Word_Counter is private; + -- Accumulate frequency/occurrence counts for a set of strings + + procedure Add_Word + (Counter : in out Word_Counter; + Word : in String; + Count : in String_Count := 1); + -- Include Count number of occurrences of Word in Counter + + procedure Add_Substrings + (Counter : in out Word_Counter; + Phrase : in String; + Min_Size : in Positive; + Max_Size : in Positive); + -- Include all the substrings of Phrase whose lengths are + -- between Min_Size and Max_Size. + +private + + package Word_Maps is new Ada.Containers.Indefinite_Ordered_Maps + (String, String_Count); + + type Word_Counter is record + Map : Word_Maps.Map; + end record; end Natools.Smaz.Tools;