Index: src/natools-smaz-tools.adb ================================================================== --- src/natools-smaz-tools.adb +++ src/natools-smaz-tools.adb @@ -11,25 +11,104 @@ -- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -- -- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -- -- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -- -- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -- ------------------------------------------------------------------------------ + +with Ada.Unchecked_Deallocation; package body Natools.Smaz.Tools is package Sx renames Natools.S_Expressions; + + function Build_Node + (Map : Dictionary_Maps.Map; + Empty_Value : Natural) + return Trie_Node; function Dummy_Hash (Value : String) return Natural; -- Placeholder for Hash member, always raises Program_Error function Image (B : Boolean) return String; -- Return correctly-cased image of B + procedure Free is new Ada.Unchecked_Deallocation + (Trie_Node, Trie_Node_Access); + ------------------------------ -- Local Helper Subprograms -- ------------------------------ + + function Build_Node + (Map : Dictionary_Maps.Map; + Empty_Value : Natural) + return Trie_Node + is + function First_Character (S : String) return Character + is (S (S'First)); + function Is_Current (Cursor : Dictionary_Maps.Cursor; C : Character) + return Boolean + is (Dictionary_Maps.Has_Element (Cursor) + and then First_Character (Dictionary_Maps.Key (Cursor)) = C); + + function Suffix (S : String) return String; + + function Suffix (S : String) return String is + begin + return S (S'First + 1 .. S'Last); + end Suffix; + + use type Ada.Containers.Count_Type; + Cursor : Dictionary_Maps.Cursor; + Result : Trie_Node + := (Ada.Finalization.Controlled with + Is_Leaf => False, + Index => Empty_Value, + Children => (others => null)); + begin + pragma Assert (Dictionary_Maps.Length (Map) >= 1); + + Cursor := Dictionary_Maps.Find (Map, ""); + + if Dictionary_Maps.Has_Element (Cursor) then + Result.Index := Natural (Dictionary_Maps.Element (Cursor)); + end if; + + for C in Character'Range loop + Cursor := Dictionary_Maps.Ceiling (Map, (1 => C)); + + if Is_Current (Cursor, C) then + if not Is_Current (Dictionary_Maps.Next (Cursor), C) + and then Dictionary_Maps.Key (Cursor) = (1 => C) + then + Result.Children (C) + := new Trie_Node'(Ada.Finalization.Controlled with + Is_Leaf => True, + Index => Natural (Dictionary_Maps.Element (Cursor))); + else + declare + New_Map : Dictionary_Maps.Map; + begin + loop + Dictionary_Maps.Insert + (New_Map, + Suffix (Dictionary_Maps.Key (Cursor)), + Dictionary_Maps.Element (Cursor)); + Dictionary_Maps.Next (Cursor); + exit when not Is_Current (Cursor, C); + end loop; + + Result.Children (C) + := new Trie_Node'(Build_Node (New_Map, Empty_Value)); + end; + end if; + end if; + end loop; + + return Result; + end Build_Node; function Dummy_Hash (Value : String) return Natural is pragma Unreferenced (Value); begin raise Program_Error with "Dummy_Hash called"; @@ -339,10 +418,44 @@ --------------------------------- -- Dynamic Dictionary Searches -- --------------------------------- + + overriding procedure Adjust (Node : in out Trie_Node) is + begin + if not Node.Is_Leaf then + for C in Node.Children'Range loop + if Node.Children (C) /= null then + Node.Children (C) := new Trie_Node'(Node.Children (C).all); + end if; + end loop; + end if; + end Adjust; + + + overriding procedure Finalize (Node : in out Trie_Node) is + begin + if not Node.Is_Leaf then + for C in Node.Children'Range loop + Free (Node.Children (C)); + end loop; + end if; + end Finalize; + + + procedure Initialize (Trie : out Search_Trie; Dict : in Dictionary) is + Map : Dictionary_Maps.Map; + begin + for I in Dict.Offsets'Range loop + Dictionary_Maps.Insert (Map, Dict_Entry (Dict, I), I); + end loop; + + Trie := (Not_Found => Natural (Dict.Dict_Last) + 1, + Root => Build_Node (Map, Natural (Dict.Dict_Last) + 1)); + end Initialize; + function Linear_Search (Value : String) return Natural is Result : Ada.Streams.Stream_Element := 0; begin for S of List_For_Linear_Search loop @@ -363,10 +476,38 @@ else return Natural (Ada.Streams.Stream_Element'Last); end if; end Map_Search; + + function Search (Trie : in Search_Trie; Value : in String) return Natural is + Index : Positive := Value'First; + Position : Trie_Node_Access; + begin + if Value'Length = 0 then + return Trie.Not_Found; + end if; + + Position := Trie.Root.Children (Value (Index)); + + loop + if Position = null then + return Trie.Not_Found; + end if; + + Index := Index + 1; + + if Index not in Value'Range then + return Position.Index; + elsif Position.Is_Leaf then + return Trie.Not_Found; + end if; + + Position := Position.Children (Value (Index)); + end loop; + end Search; + procedure Set_Dictionary_For_Map_Search (Dict : in Dictionary) is begin Dictionary_Maps.Clear (Search_Map); Index: src/natools-smaz-tools.ads ================================================================== --- src/natools-smaz-tools.ads +++ src/natools-smaz-tools.ads @@ -24,10 +24,11 @@ with Ada.Containers.Indefinite_Doubly_Linked_Lists; with Natools.S_Expressions; private with Ada.Containers.Indefinite_Ordered_Maps; private with Ada.Containers.Indefinite_Ordered_Sets; +private with Ada.Finalization; package Natools.Smaz.Tools is pragma Preelaborate; package String_Lists is new Ada.Containers.Indefinite_Doubly_Linked_Lists @@ -72,10 +73,16 @@ procedure Set_Dictionary_For_Map_Search (Dict : in Dictionary); function Map_Search (Value : String) return Natural; -- Function and data source for logarithmic search using standard -- ordered map, that can be used with Dictionary.Hash. + type Search_Trie is private; + procedure Initialize (Trie : out Search_Trie; Dict : in Dictionary); + function Search (Trie : in Search_Trie; Value : in String) return Natural; + -- Trie-based search in a dynamic dictionary, for lookup whose + -- speed-vs-memory is even more skewed towards speed. + type String_Count is range 0 .. 2 ** 31 - 1; -- Type for a number of substring occurrences type Word_Counter is private; -- Accumulate frequency/occurrence counts for a set of strings @@ -156,7 +163,29 @@ package Dictionary_Maps is new Ada.Containers.Indefinite_Ordered_Maps (String, Ada.Streams.Stream_Element); Search_Map : Dictionary_Maps.Map; + + type Trie_Node; + type Trie_Node_Access is access Trie_Node; + type Trie_Node_Array is array (Character) of Trie_Node_Access; + + type Trie_Node (Is_Leaf : Boolean) is new Ada.Finalization.Controlled + with record + Index : Natural; + + case Is_Leaf is + when True => null; + when False => Children : Trie_Node_Array; + end case; + end record; + + overriding procedure Adjust (Node : in out Trie_Node); + overriding procedure Finalize (Node : in out Trie_Node); + + type Search_Trie is record + Not_Found : Natural; + Root : Trie_Node (False); + end record; end Natools.Smaz.Tools;