Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| Comment: | smaz_tools: add a dictionary-independent version of the dynamic hashes | 
|---|---|
| Timelines: | family | ancestors | descendants | both | trunk | 
| Files: | files | file ages | folders | 
| SHA1: | 
24c6ae742efb13e43fefbc7240871102 | 
| User & Date: | nat 2016-11-23 21:10:58.360 | 
Context
| 
   2016-11-24 
 | ||
| 21:01 | smaz_tools: add a Dummy_Hash function for dictionary-specific tools check-in: 4aa72a0c16 user: nat tags: trunk | |
| 
   2016-11-23 
 | ||
| 21:10 | smaz_tools: add a dictionary-independent version of the dynamic hashes check-in: 24c6ae742e user: nat tags: trunk | |
| 
   2016-11-22 
 | ||
| 20:04 | smaz_tools: new package for dictionary-independent tools check-in: b87eafd22c user: nat tags: trunk | |
Changes
Changes to src/natools-smaz_tools.adb.
| ︙ | |||
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26  | 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131  | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +  | 
-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF         --
-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR  --
-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   --
-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN    --
-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF  --
-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.           --
------------------------------------------------------------------------------
with Ada.Unchecked_Deallocation;
package body Natools.Smaz_Tools is
   package Sx renames Natools.S_Expressions;
   function Build_Node
     (Map : Dictionary_Maps.Map;
      Empty_Value : Natural)
     return Trie_Node;
   procedure Set_Map
     (Map : in out Dictionary_Maps.Map;
      List : in String_Lists.List);
      --  Set Map contents to match List by index number
   procedure Free is new Ada.Unchecked_Deallocation
     (Trie_Node, Trie_Node_Access);
   ------------------------------
   -- Local Helper Subprograms --
   ------------------------------
   function Build_Node
     (Map : Dictionary_Maps.Map;
      Empty_Value : Natural)
     return Trie_Node
   is
      function First_Character (S : String) return Character
        is (S (S'First));
      function Is_Current (Cursor : Dictionary_Maps.Cursor; C : Character)
        return Boolean
        is (Dictionary_Maps.Has_Element (Cursor)
            and then First_Character (Dictionary_Maps.Key (Cursor)) = C);
      function Suffix (S : String) return String;
      function Suffix (S : String) return String is
      begin
         return S (S'First + 1 .. S'Last);
      end Suffix;
      use type Ada.Containers.Count_Type;
      Cursor : Dictionary_Maps.Cursor;
      Result : Trie_Node
        := (Ada.Finalization.Controlled with
            Is_Leaf => False,
            Index => Empty_Value,
            Children => (others => null));
   begin
      pragma Assert (Dictionary_Maps.Length (Map) >= 1);
      Cursor := Dictionary_Maps.Find (Map, "");
      if Dictionary_Maps.Has_Element (Cursor) then
         Result.Index := Dictionary_Maps.Element (Cursor);
      end if;
      for C in Character'Range loop
         Cursor := Dictionary_Maps.Ceiling (Map, (1 => C));
         if Is_Current (Cursor, C) then
            if not Is_Current (Dictionary_Maps.Next (Cursor), C)
              and then Dictionary_Maps.Key (Cursor) = (1 => C)
            then
               Result.Children (C)
                 := new Trie_Node'(Ada.Finalization.Controlled with
                     Is_Leaf => True,
                     Index => Dictionary_Maps.Element (Cursor));
            else
               declare
                  New_Map : Dictionary_Maps.Map;
               begin
                  loop
                     Dictionary_Maps.Insert
                       (New_Map,
                        Suffix (Dictionary_Maps.Key (Cursor)),
                        Dictionary_Maps.Element (Cursor));
                     Dictionary_Maps.Next (Cursor);
                     exit when not Is_Current (Cursor, C);
                  end loop;
                  Result.Children (C)
                    := new Trie_Node'(Build_Node (New_Map, Empty_Value));
               end;
            end if;
         end if;
      end loop;
      return Result;
   end Build_Node;
   procedure Set_Map
     (Map : in out Dictionary_Maps.Map;
      List : in String_Lists.List)
   is
      I : Natural := 0;
   begin
      Dictionary_Maps.Clear (Map);
      for S of List loop
         Dictionary_Maps.Insert (Map, S, I);
         I := I + 1;
      end loop;
   end Set_Map;
   ----------------------
   -- Public Interface --
   ----------------------
   procedure Read_List
 | 
| ︙ | |||
49 50 51 52 53 54 55 56 57 58 59 60 61 62  | 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278  | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +  | 
               exit Read_Loop;
         end case;
         Descriptor.Next (Event);
      end loop Read_Loop;
   end Read_List;
   ---------------------------------
   -- Dynamic Dictionary Searches --
   ---------------------------------
   overriding procedure Adjust (Node : in out Trie_Node) is
   begin
      if not Node.Is_Leaf then
         for C in Node.Children'Range loop
            if Node.Children (C) /= null then
               Node.Children (C) := new Trie_Node'(Node.Children (C).all);
            end if;
         end loop;
      end if;
   end Adjust;
   overriding procedure Finalize (Node : in out Trie_Node) is
   begin
      if not Node.Is_Leaf then
         for C in Node.Children'Range loop
            Free (Node.Children (C));
         end loop;
      end if;
   end Finalize;
   procedure Initialize
     (Trie : out Search_Trie;
      List : in String_Lists.List)
   is
      Map : Dictionary_Maps.Map;
      Not_Found : constant Natural := Natural (String_Lists.Length (List));
   begin
      Set_Map (Map, List);
      Trie := (Not_Found => Not_Found,
               Root => Build_Node (Map, Not_Found));
   end Initialize;
   function Linear_Search (Value : String) return Natural is
      Result : Natural := 0;
   begin
      for S of List_For_Linear_Search loop
         exit when S = Value;
         Result := Result + 1;
      end loop;
      return Result;
   end Linear_Search;
   function Map_Search (Value : String) return Natural is
      Cursor : constant Dictionary_Maps.Cursor
        := Dictionary_Maps.Find (Search_Map, Value);
   begin
      if Dictionary_Maps.Has_Element (Cursor) then
         return Natural (Dictionary_Maps.Element (Cursor));
      else
         return Natural (Dictionary_Maps.Length (Search_Map));
      end if;
   end Map_Search;
   function Search (Trie : in Search_Trie; Value : in String) return Natural is
      Index : Positive := Value'First;
      Position : Trie_Node_Access;
   begin
      if Value'Length = 0 then
         return Trie.Not_Found;
      end if;
      Position := Trie.Root.Children (Value (Index));
      loop
         if Position = null then
            return Trie.Not_Found;
         end if;
         Index := Index + 1;
         if Index not in Value'Range then
            return Position.Index;
         elsif Position.Is_Leaf then
            return Trie.Not_Found;
         end if;
         Position := Position.Children (Value (Index));
      end loop;
   end Search;
   procedure Set_Dictionary_For_Map_Search (List : in String_Lists.List) is
   begin
      Set_Map (Search_Map, List);
   end Set_Dictionary_For_Map_Search;
   procedure Set_Dictionary_For_Trie_Search (List : in String_Lists.List) is
   begin
      Initialize (Trie_For_Search, List);
   end Set_Dictionary_For_Trie_Search;
   function Trie_Search (Value : String) return Natural is
   begin
      return Search (Trie_For_Search, Value);
   end Trie_Search;
   -------------------
   -- Word Counting --
   -------------------
   procedure Add_Substrings
 | 
| ︙ | 
Changes to src/natools-smaz_tools.ads.
| ︙ | |||
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47  | 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70  | + + + + + + + + + + + + + + + + + + + + + + +  | 
------------------------------------------------------------------------------
with Ada.Containers.Indefinite_Doubly_Linked_Lists;
with Natools.S_Expressions;
private with Ada.Containers.Indefinite_Ordered_Maps;
private with Ada.Containers.Indefinite_Ordered_Sets;
private with Ada.Finalization;
package Natools.Smaz_Tools is
   pragma Preelaborate;
   package String_Lists is new Ada.Containers.Indefinite_Doubly_Linked_Lists
     (String);
   procedure Read_List
     (List : out String_Lists.List;
      Descriptor : in out S_Expressions.Descriptor'Class);
      --  Read atoms from Descriptor to fill List
   List_For_Linear_Search : String_Lists.List;
   function Linear_Search (Value : String) return Natural;
      --  Function and data source for inefficient but dynamic function
      --  that can be used with Dictionary.Hash.
   procedure Set_Dictionary_For_Map_Search (List : in String_Lists.List);
   function Map_Search (Value : String) return Natural;
      --  Function and data source for logarithmic search using standard
      --  ordered map, that can be used with Dictionary.Hash.
   type Search_Trie is private;
   procedure Initialize (Trie : out Search_Trie; List : in String_Lists.List);
   function Search (Trie : in Search_Trie; Value : in String) return Natural;
      --  Trie-based search in a dynamic dictionary, for lookup whose
      --  speed-vs-memory is even more skewed towards speed.
   procedure Set_Dictionary_For_Trie_Search (List : in String_Lists.List);
   function Trie_Search (Value : String) return Natural;
      --  Function and data source for trie-based search that can be
      --  used with Dictionary.Hash.
   type String_Count is range 0 .. 2 ** 31 - 1;
      --  Type for a number of substring occurrences
   package Methods is
      type Enum is (Encoded, Frequency, Gain);
   end Methods;
 | 
| ︙ | |||
149 150 151 152 153 154 155 156 157  | 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209  | + + + + + + + + + + + + + + + + + + + + + + + + + + + + +  | 
   function To_Scored_Word
     (Cursor : in Word_Maps.Cursor;
      Method : in Methods.Enum)
     return Scored_Word;
   package Scored_Word_Sets is new Ada.Containers.Indefinite_Ordered_Sets
     (Scored_Word);
   package Dictionary_Maps is new Ada.Containers.Indefinite_Ordered_Maps
     (String, Natural);
   Search_Map : Dictionary_Maps.Map;
   type Trie_Node;
   type Trie_Node_Access is access Trie_Node;
   type Trie_Node_Array is array (Character) of Trie_Node_Access;
   type Trie_Node (Is_Leaf : Boolean) is new Ada.Finalization.Controlled
     with record
      Index : Natural;
      case Is_Leaf is
         when True  => null;
         when False => Children : Trie_Node_Array;
      end case;
   end record;
   overriding procedure Adjust (Node : in out Trie_Node);
   overriding procedure Finalize (Node : in out Trie_Node);
   type Search_Trie is record
      Not_Found : Natural;
      Root : Trie_Node (False);
   end record;
   Trie_For_Search : Search_Trie;
end Natools.Smaz_Tools;
 |