Overview
Comment: | smaz-tools: add the simplest dictionary constructor from word counts |
---|---|
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
fb05dda13730a30ec5f67792598f4aa4 |
User & Date: | nat on 2016-09-28 21:25:45 |
Other Links: | manifest | tags |
Context
2016-09-29
| ||
21:58 | tools/smaz: add support for dictionary generation from a word list check-in: 88e525eaf4 user: nat tags: trunk | |
2016-09-28
| ||
21:25 | smaz-tools: add the simplest dictionary constructor from word counts check-in: fb05dda137 user: nat tags: trunk | |
2016-09-27
| ||
21:05 | smaz-tools: add an accumulator for word count (for dictionary building) check-in: a901e5c1a7 user: nat tags: trunk | |
Changes
Modified src/natools-smaz-tools.adb from [285569477e] to [99ea3f33bb].
︙ | ︙ | |||
391 392 393 394 395 396 397 398 | if Word_Maps.Has_Element (Cursor) then Word_Maps.Update_Element (Counter.Map, Cursor, Update'Access); else Word_Maps.Insert (Counter.Map, Word, Count); end if; end Add_Word; end Natools.Smaz.Tools; | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 | if Word_Maps.Has_Element (Cursor) then Word_Maps.Update_Element (Counter.Map, Cursor, Update'Access); else Word_Maps.Insert (Counter.Map, Word, Count); end if; end Add_Word; function Simple_Dictionary (Counter : in Word_Counter; Word_Count : in Natural) return String_Lists.List is use type Ada.Containers.Count_Type; Target_Count : constant Ada.Containers.Count_Type := Ada.Containers.Count_Type (Word_Count); Set : Scored_Word_Sets.Set; Result : String_Lists.List; begin for Cursor in Word_Maps.Iterate (Counter.Map) loop Scored_Word_Sets.Include (Set, To_Scored_Word (Cursor)); if Scored_Word_Sets.Length (Set) > Target_Count then Scored_Word_Sets.Delete_Last (Set); end if; end loop; for Cursor in Scored_Word_Sets.Iterate (Set) loop Result.Append (Scored_Word_Sets.Element (Cursor).Word); end loop; return Result; end Simple_Dictionary; function To_Scored_Word (Cursor : in Word_Maps.Cursor) return Scored_Word is Word : constant String := Word_Maps.Key (Cursor); begin return Scored_Word' (Size => Word'Length, Word => Word, Score => Score_Value (Word_Maps.Element (Cursor)) * Word'Length); end To_Scored_Word; end Natools.Smaz.Tools; |
Modified src/natools-smaz-tools.ads from [06e6fe04f7] to [1f1e29a7d3].
︙ | ︙ | |||
21 22 23 24 25 26 27 28 29 30 31 32 33 34 | -- package. -- ------------------------------------------------------------------------------ with Ada.Containers.Indefinite_Doubly_Linked_Lists; with Natools.S_Expressions; private with Ada.Containers.Indefinite_Ordered_Maps; package Natools.Smaz.Tools is pragma Preelaborate; package String_Lists is new Ada.Containers.Indefinite_Doubly_Linked_Lists (String); | > | 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | -- package. -- ------------------------------------------------------------------------------ with Ada.Containers.Indefinite_Doubly_Linked_Lists; with Natools.S_Expressions; private with Ada.Containers.Indefinite_Ordered_Maps; private with Ada.Containers.Indefinite_Ordered_Sets; package Natools.Smaz.Tools is pragma Preelaborate; package String_Lists is new Ada.Containers.Indefinite_Doubly_Linked_Lists (String); |
︙ | ︙ | |||
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | (Counter : in out Word_Counter; Phrase : in String; Min_Size : in Positive; Max_Size : in Positive); -- Include all the substrings of Phrase whose lengths are -- between Min_Size and Max_Size. private package Word_Maps is new Ada.Containers.Indefinite_Ordered_Maps (String, String_Count); type Word_Counter is record Map : Word_Maps.Map; end record; end Natools.Smaz.Tools; | > > > > > > > > > > > > > > > > > > > > > > > > > | 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 | (Counter : in out Word_Counter; Phrase : in String; Min_Size : in Positive; Max_Size : in Positive); -- Include all the substrings of Phrase whose lengths are -- between Min_Size and Max_Size. function Simple_Dictionary (Counter : in Word_Counter; Word_Count : in Natural) return String_Lists.List; -- Return the Word_Count words in Counter that have the highest score, -- the score being count * length. private package Word_Maps is new Ada.Containers.Indefinite_Ordered_Maps (String, String_Count); type Word_Counter is record Map : Word_Maps.Map; end record; type Score_Value is range 0 .. 2 ** 31 - 1; type Scored_Word (Size : Natural) is record Word : String (1 .. Size); Score : Score_Value; end record; function "<" (Left, Right : Scored_Word) return Boolean is (Left.Score > Right.Score or else (Left.Score = Right.Score and then Left.Word < Right.Word)); function To_Scored_Word (Cursor : in Word_Maps.Cursor) return Scored_Word; package Scored_Word_Sets is new Ada.Containers.Indefinite_Ordered_Sets (Scored_Word); end Natools.Smaz.Tools; |