Natools

Check-in [d7fe38e744]
Login
Overview
Comment:smaz_generic-tools: add dictionary evalution subprograms
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: d7fe38e744cd8d32c21c39c993b0494f5190ac56
User & Date: nat on 2016-11-26 22:48:15
Other Links: manifest | tags
Context
2016-11-27
22:32
smaz_generic-tools: add conversion from dictionary to word list check-in: 00a41d7915 user: nat tags: trunk
2016-11-26
22:48
smaz_generic-tools: add dictionary evalution subprograms check-in: d7fe38e744 user: nat tags: trunk
2016-11-25
22:59
smaz_generic-tools: new package for dictionary-dependent tools check-in: f85a938c62 user: nat tags: trunk
Changes

Modified src/natools-smaz_generic-tools.adb from [556c2662ab] to [50f7d5ba31].

372
373
374
375
376
377
378

















































































379
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

            Max_Word_Length => Max_Word_Length,
            Offsets => Offsets,
            Values => Values,
            Hash => Smaz_Tools.Dummy_Hash'Access);
      end;
   end To_Dictionary;



   ---------------------------
   -- Dictionary Evaluation --
   ---------------------------

   procedure Evaluate_Dictionary
     (Dict : in Dictionary;
      Corpus : in String_Lists.List;
      Compressed_Size : out Ada.Streams.Stream_Element_Count;
      Counts : out Dictionary_Counts) is
   begin
      Compressed_Size := 0;
      Counts := (others => 0);

      for S of Corpus loop
         Evaluate_Dictionary_Partial
           (Dict, S, Compressed_Size, Counts);
      end loop;
   end Evaluate_Dictionary;


   procedure Evaluate_Dictionary_Partial
     (Dict : in Dictionary;
      Corpus_Entry : in String;
      Compressed_Size : in out Ada.Streams.Stream_Element_Count;
      Counts : in out Dictionary_Counts)
   is
      use type Ada.Streams.Stream_Element_Offset;
      use type Smaz_Tools.String_Count;

      Verbatim_Length : Natural;
      Code : Dictionary_Code;
      Compressed : constant Ada.Streams.Stream_Element_Array
        := Compress (Dict, Corpus_Entry);
      Index : Ada.Streams.Stream_Element_Offset := Compressed'First;
   begin
      Compressed_Size := Compressed_Size + Compressed'Length;

      while Index in Compressed'Range loop
         Read_Code
           (Compressed, Index,
            Code, Verbatim_Length,
            Dict.Last_Code, Dict.Variable_Length_Verbatim);

         if Verbatim_Length > 0 then
            Skip_Verbatim (Compressed, Index, Verbatim_Length);
         else
            Counts (Code) := Counts (Code) + 1;
         end if;
      end loop;
   end Evaluate_Dictionary_Partial;


   function Worst_Index
     (Dict : in Dictionary;
      Counts : in Dictionary_Counts;
      Method : in Smaz_Tools.Methods.Enum)
     return Dictionary_Code
   is
      use type Smaz_Tools.Score_Value;

      Result : Dictionary_Code := Dictionary_Code'First;
      Worst_Score : Smaz_Tools.Score_Value
        := Score (Dict, Counts, Result, Method);
      S : Smaz_Tools.Score_Value;
   begin
      for I in Dictionary_Code'Succ (Dictionary_Code'First)
               .. Dict.Last_Code
      loop
         S := Score (Dict, Counts, I, Method);

         if S < Worst_Score then
            Result := I;
            Worst_Score := S;
         end if;
      end loop;

      return Result;
   end Worst_Index;

end Natools.Smaz_Generic.Tools;

Modified src/natools-smaz_generic-tools.ads from [9c01fe72a2] to [22086ada23].

91
92
93
94
95
96
97






















98
99
100
101
102
103
104
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







                                    Append_String'Result.Last_Code)
                        = Value;
      --  Return a new dictionary with Value appended


   type Dictionary_Counts is
     array (Dictionary_Code) of Smaz_Tools.String_Count;

   procedure Evaluate_Dictionary
     (Dict : in Dictionary;
      Corpus : in String_Lists.List;
      Compressed_Size : out Ada.Streams.Stream_Element_Count;
      Counts : out Dictionary_Counts);
   procedure Evaluate_Dictionary_Partial
     (Dict : in Dictionary;
      Corpus_Entry : in String;
      Compressed_Size : in out Ada.Streams.Stream_Element_Count;
      Counts : in out Dictionary_Counts);
      --  Compress all strings of Corpus, returning the total number of
      --  compressed bytes and the number of uses for each dictionary
      --  element.

   function Worst_Index
     (Dict : in Dictionary;
      Counts : in Dictionary_Counts;
      Method : in Smaz_Tools.Methods.Enum)
     return Dictionary_Code;
      --  Return the element with worst score


   function Score_Encoded
     (Dict : in Dictionary;
      Counts : in Dictionary_Counts;
      E : in Dictionary_Code)
     return Smaz_Tools.Score_Value
     is (Smaz_Tools.Score_Encoded (Counts (E), Dict_Entry_Length (Dict, E)));