Natools

Check-in [a901e5c1a7]
Login
Overview
Comment:smaz-tools: add an accumulator for word count (for dictionary building)
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: a901e5c1a7a758d0994fca801c50bf31afdd21ec
User & Date: nat on 2016-09-27 21:05:11
Other Links: manifest | tags
Context
2016-09-28
21:25
smaz-tools: add the simplest dictionary constructor from word counts check-in: fb05dda137 user: nat tags: trunk
2016-09-27
21:05
smaz-tools: add an accumulator for word count (for dictionary building) check-in: a901e5c1a7 user: nat tags: trunk
2016-09-26
21:22
tools/smaz: add decompression of an input list of encoded strings check-in: 6f2cf4bf88 user: nat tags: trunk
Changes

Modified src/natools-smaz-tools.adb from [2480b82e45] to [285569477e].

344
345
346
347
348
349
350















































351
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

            Max_Word_Length => Max_Word_Length,
            Offsets => Offsets,
            Values => Values,
            Hash => Dummy_Hash'Access);
      end;
   end To_Dictionary;



   -------------------
   -- Word Counting --
   -------------------

   procedure Add_Substrings
     (Counter : in out Word_Counter;
      Phrase : in String;
      Min_Size : in Positive;
      Max_Size : in Positive) is
   begin
      for First in Phrase'First .. Phrase'Last - Min_Size + 1 loop
         for Last in First + Min_Size - 1
           .. Natural'Min (First + Max_Size - 1, Phrase'Last)
         loop
            Add_Word (Counter, Phrase (First .. Last));
         end loop;
      end loop;
   end Add_Substrings;


   procedure Add_Word
     (Counter : in out Word_Counter;
      Word : in String;
      Count : in String_Count := 1)
   is
      procedure Update
        (Key : in String; Element : in out String_Count);

      procedure Update
        (Key : in String; Element : in out String_Count)
      is
         pragma Unreferenced (Key);
      begin
         Element := Element + Count;
      end Update;

      Cursor : constant Word_Maps.Cursor := Word_Maps.Find (Counter.Map, Word);
   begin
      if Word_Maps.Has_Element (Cursor) then
         Word_Maps.Update_Element (Counter.Map, Cursor, Update'Access);
      else
         Word_Maps.Insert (Counter.Map, Word, Count);
      end if;
   end Add_Word;

end Natools.Smaz.Tools;

Modified src/natools-smaz-tools.ads from [27bd58b9c3] to [06e6fe04f7].

20
21
22
23
24
25
26


27
28
29
30
31
32
33
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35







+
+







-- generated and hard-coded, so the final client shouldn't need this        --
-- package.                                                                 --
------------------------------------------------------------------------------

with Ada.Containers.Indefinite_Doubly_Linked_Lists;
with Natools.S_Expressions;

private with Ada.Containers.Indefinite_Ordered_Maps;

package Natools.Smaz.Tools is
   pragma Preelaborate;

   package String_Lists is new Ada.Containers.Indefinite_Doubly_Linked_Lists
     (String);

   procedure Read_List
61
62
63
64
65
66
67
68





























69
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100








+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

      --  All the defaults value are what was used to generate the constant
      --  in Natools.Smaz.Original.

   List_For_Linear_Search : String_Lists.List;
   function Linear_Search (Value : String) return Natural;
      --  Function and data source for inefficient but dynamic function
      --  that can be used with Dictionary.Hash.

   type String_Count is range 0 .. 2 ** 31 - 1;
      --  Type for a number of substring occurrences

   type Word_Counter is private;
      --  Accumulate frequency/occurrence counts for a set of strings

   procedure Add_Word
     (Counter : in out Word_Counter;
      Word : in String;
      Count : in String_Count := 1);
      --  Include Count number of occurrences of Word in Counter

   procedure Add_Substrings
     (Counter : in out Word_Counter;
      Phrase : in String;
      Min_Size : in Positive;
      Max_Size : in Positive);
      --  Include all the substrings of Phrase whose lengths are
      --  between Min_Size and Max_Size.

private

   package Word_Maps is new Ada.Containers.Indefinite_Ordered_Maps
     (String, String_Count);

   type Word_Counter is record
      Map : Word_Maps.Map;
   end record;

end Natools.Smaz.Tools;