Natools

Check-in [17783bc63e]
Login
Overview
Comment:smaz-tools: add Evaluate_Dictionary_Partial to work on a single string
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 17783bc63e6186357baf2f2539aa915d9c646586
User & Date: nat on 2016-10-10 14:21:14
Other Links: manifest | tags
Context
2016-10-11
15:37
parallelism: new package providing framework for simple parallelizations check-in: a45910b245 user: nat tags: trunk
2016-10-10
14:21
smaz-tools: add Evaluate_Dictionary_Partial to work on a single string check-in: 17783bc63e user: nat tags: trunk
2016-10-09
17:49
tools/smaz: add a statistics to the evaluation output check-in: 7b31b329d7 user: nat tags: trunk
Changes

Modified src/natools-smaz-tools.adb from [627ca11783] to [c548ec8fb3].

437
438
439
440
441
442
443
444
















445


446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
   end Add_Words;


   procedure Evaluate_Dictionary
     (Dict : in Dictionary;
      Corpus : in String_Lists.List;
      Compressed_Size : out Ada.Streams.Stream_Element_Count;
      Counts : out Dictionary_Counts)
















   is


      Verbatim_Code_Count : constant Ada.Streams.Stream_Element_Offset
        := Ada.Streams.Stream_Element_Offset
           (Ada.Streams.Stream_Element'Last - Dict.Dict_Last);

      Verbatim_Length : Ada.Streams.Stream_Element_Offset;
      Input_Byte : Ada.Streams.Stream_Element;
   begin
      Compressed_Size := 0;

      for I in Counts'Range loop
         Counts (I) := 0;
      end loop;

      for S of Corpus loop
         declare
            use type Ada.Streams.Stream_Element_Offset;
            Compressed : constant Ada.Streams.Stream_Element_Array
              := Compress (Dict, S);
            Index : Ada.Streams.Stream_Element_Offset := Compressed'First;
         begin
            Compressed_Size := Compressed_Size + Compressed'Length;

            while Index in Compressed'Range loop
               Input_Byte := Compressed (Index);

               if Input_Byte in Dict.Offsets'Range then
                  Counts (Input_Byte) := Counts (Input_Byte) + 1;
                  Index := Index + 1;
               else
                  if not Dict.Variable_Length_Verbatim then
                     Verbatim_Length := Ada.Streams.Stream_Element_Offset
                       (Ada.Streams.Stream_Element'Last - Input_Byte) + 1;
                  elsif Input_Byte < Ada.Streams.Stream_Element'Last then
                     Verbatim_Length := Ada.Streams.Stream_Element_Offset
                       (Ada.Streams.Stream_Element'Last - Input_Byte);
                  else
                     Index := Index + 1;
                     Verbatim_Length := Ada.Streams.Stream_Element_Offset
                       (Compressed (Index)) + Verbatim_Code_Count - 1;
                  end if;

                  Index := Index + Verbatim_Length + 1;
               end if;
            end loop;
         end;
      end loop;
   end Evaluate_Dictionary;


   function Simple_Dictionary
     (Counter : in Word_Counter;
      Word_Count : in Natural)
     return String_Lists.List
   is







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
>






<
<
<
<
<
<
<
<
<
<
|
|
|
|
|

|
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

|
|
|
<
<
|







437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469










470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497


498
499
500
501
502
503
504
505
   end Add_Words;


   procedure Evaluate_Dictionary
     (Dict : in Dictionary;
      Corpus : in String_Lists.List;
      Compressed_Size : out Ada.Streams.Stream_Element_Count;
      Counts : out Dictionary_Counts) is
   begin
      Compressed_Size := 0;
      Counts := (others => 0);

      for S of Corpus loop
         Evaluate_Dictionary_Partial
           (Dict, S, Compressed_Size, Counts);
      end loop;
   end Evaluate_Dictionary;


   procedure Evaluate_Dictionary_Partial
     (Dict : in Dictionary;
      Corpus_Entry : in String;
      Compressed_Size : in out Ada.Streams.Stream_Element_Count;
      Counts : in out Dictionary_Counts)
   is
      use type Ada.Streams.Stream_Element_Offset;

      Verbatim_Code_Count : constant Ada.Streams.Stream_Element_Offset
        := Ada.Streams.Stream_Element_Offset
           (Ada.Streams.Stream_Element'Last - Dict.Dict_Last);

      Verbatim_Length : Ada.Streams.Stream_Element_Offset;
      Input_Byte : Ada.Streams.Stream_Element;










      Compressed : constant Ada.Streams.Stream_Element_Array
        := Compress (Dict, Corpus_Entry);
      Index : Ada.Streams.Stream_Element_Offset := Compressed'First;
   begin
      Compressed_Size := Compressed_Size + Compressed'Length;

      while Index in Compressed'Range loop
         Input_Byte := Compressed (Index);

         if Input_Byte in Dict.Offsets'Range then
            Counts (Input_Byte) := Counts (Input_Byte) + 1;
            Index := Index + 1;
         else
            if not Dict.Variable_Length_Verbatim then
               Verbatim_Length := Ada.Streams.Stream_Element_Offset
                 (Ada.Streams.Stream_Element'Last - Input_Byte) + 1;
            elsif Input_Byte < Ada.Streams.Stream_Element'Last then
               Verbatim_Length := Ada.Streams.Stream_Element_Offset
                 (Ada.Streams.Stream_Element'Last - Input_Byte);
            else
               Index := Index + 1;
               Verbatim_Length := Ada.Streams.Stream_Element_Offset
                 (Compressed (Index)) + Verbatim_Code_Count - 1;
            end if;

            Index := Index + Verbatim_Length + 1;
         end if;
      end loop;


   end Evaluate_Dictionary_Partial;


   function Simple_Dictionary
     (Counter : in Word_Counter;
      Word_Count : in Natural)
     return String_Lists.List
   is

Modified src/natools-smaz-tools.ads from [d2561626f6] to [33e77934d4].

109
110
111
112
113
114
115





116
117
118
119
120
121
122
     array (Ada.Streams.Stream_Element) of String_Count;

   procedure Evaluate_Dictionary
     (Dict : in Dictionary;
      Corpus : in String_Lists.List;
      Compressed_Size : out Ada.Streams.Stream_Element_Count;
      Counts : out Dictionary_Counts);





      --  Compress all strings of Corpus, returning the total number of
      --  compressed bytes and the number of uses for each dictionary
      --  element.

private

   package Word_Maps is new Ada.Containers.Indefinite_Ordered_Maps







>
>
>
>
>







109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
     array (Ada.Streams.Stream_Element) of String_Count;

   procedure Evaluate_Dictionary
     (Dict : in Dictionary;
      Corpus : in String_Lists.List;
      Compressed_Size : out Ada.Streams.Stream_Element_Count;
      Counts : out Dictionary_Counts);
   procedure Evaluate_Dictionary_Partial
     (Dict : in Dictionary;
      Corpus_Entry : in String;
      Compressed_Size : in out Ada.Streams.Stream_Element_Count;
      Counts : in out Dictionary_Counts);
      --  Compress all strings of Corpus, returning the total number of
      --  compressed bytes and the number of uses for each dictionary
      --  element.

private

   package Word_Maps is new Ada.Containers.Indefinite_Ordered_Maps