Natools

Check-in [e6f252142c]
Login
Overview
Comment:tools/smaz: add support for base-4096 variant
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: e6f252142c2b7b2006ecda8e6568eceeacfdf76d
User & Date: nat on 2017-01-31 22:33:08
Other Links: manifest | tags
Context
2017-02-01
21:37
smaz_tests: add a base-4096 test harness check-in: 1d5eb501bc user: nat tags: trunk
2017-01-31
22:33
tools/smaz: add support for base-4096 variant check-in: e6f252142c user: nat tags: trunk
2017-01-30
21:33
smaz_4096: new instance of generic Smaz, base-64 and large dictionary check-in: c57a785ddb user: nat tags: trunk
Changes

Modified tools/smaz.adb from [b795eb2684] to [94617b0aa3].

1
2
3
4
5
6
7
8
9
------------------------------------------------------------------------------
-- Copyright (c) 2016, Natacha Porté                                        --
--                                                                          --
-- Permission to use, copy, modify, and distribute this software for any    --
-- purpose with or without fee is hereby granted, provided that the above   --
-- copyright notice and this permission notice appear in all copies.        --
--                                                                          --
-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES --
-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF         --

|







1
2
3
4
5
6
7
8
9
------------------------------------------------------------------------------
-- Copyright (c) 2016-2017, Natacha Porté                                   --
--                                                                          --
-- Permission to use, copy, modify, and distribute this software for any    --
-- purpose with or without fee is hereby granted, provided that the above   --
-- copyright notice and this permission notice appear in all copies.        --
--                                                                          --
-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES --
-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF         --
29
30
31
32
33
34
35

36
37

38
39
40
41
42
43
44
45
46
47

48
49
50
51
52
53
54
55
56
57
58
59
60
61
62

63
64
65
66
67
68
69
70
71
72
73
74
75
76

77
78
79
80
81
82
83
with Natools.Getopt_Long;
with Natools.Parallelism;
with Natools.S_Expressions.Parsers;
with Natools.S_Expressions.Printers;
with Natools.Smaz;
with Natools.Smaz.Tools;
with Natools.Smaz_256;

with Natools.Smaz_64;
with Natools.Smaz_Generic.Tools;

with Natools.Smaz_Implementations.Base_64_Tools;
with Natools.Smaz_Tools;
with Natools.Smaz_Tools.GNAT;
with Natools.String_Escapes;

procedure Smaz is
   function To_SEA (S : String) return Ada.Streams.Stream_Element_Array
     renames Natools.S_Expressions.To_Atom;

   package Tools_256 is new Natools.Smaz_256.Tools;

   package Tools_64 is new Natools.Smaz_64.Tools;

   package Methods renames Natools.Smaz_Tools.Methods;

   package Actions is
      type Enum is
        (Nothing,
         Decode,
         Encode,
         Evaluate);
   end Actions;

   package Algorithms is
      type Enum is
        (Base_256,

         Base_64,
         Base_256_Retired);
   end Algorithms;

   package Dict_Sources is
      type Enum is
        (S_Expression,
         Text_List,
         Unoptimized_Text_List);
   end Dict_Sources;

   package Options is
      type Id is
        (Base_256,

         Base_64,
         Output_Ada_Dict,
         Check_Roundtrip,
         Dictionary_Input,
         Decode,
         Encode,
         Evaluate,







>


>










>















>














>







29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
with Natools.Getopt_Long;
with Natools.Parallelism;
with Natools.S_Expressions.Parsers;
with Natools.S_Expressions.Printers;
with Natools.Smaz;
with Natools.Smaz.Tools;
with Natools.Smaz_256;
with Natools.Smaz_4096;
with Natools.Smaz_64;
with Natools.Smaz_Generic.Tools;
with Natools.Smaz_Implementations.Base_4096;
with Natools.Smaz_Implementations.Base_64_Tools;
with Natools.Smaz_Tools;
with Natools.Smaz_Tools.GNAT;
with Natools.String_Escapes;

procedure Smaz is
   function To_SEA (S : String) return Ada.Streams.Stream_Element_Array
     renames Natools.S_Expressions.To_Atom;

   package Tools_256 is new Natools.Smaz_256.Tools;
   package Tools_4096 is new Natools.Smaz_4096.Tools;
   package Tools_64 is new Natools.Smaz_64.Tools;

   package Methods renames Natools.Smaz_Tools.Methods;

   package Actions is
      type Enum is
        (Nothing,
         Decode,
         Encode,
         Evaluate);
   end Actions;

   package Algorithms is
      type Enum is
        (Base_256,
         Base_4096,
         Base_64,
         Base_256_Retired);
   end Algorithms;

   package Dict_Sources is
      type Enum is
        (S_Expression,
         Text_List,
         Unoptimized_Text_List);
   end Dict_Sources;

   package Options is
      type Id is
        (Base_256,
         Base_4096,
         Base_64,
         Output_Ada_Dict,
         Check_Roundtrip,
         Dictionary_Input,
         Decode,
         Encode,
         Evaluate,
138
139
140
141
142
143
144


145
146
147
148
149
150
151
     (Handler  : in out Callback;
      Argument : in String)
     is null;


   function Activate_Dictionary (Dict : in Natools.Smaz_256.Dictionary)
     return Natools.Smaz_256.Dictionary;


   function Activate_Dictionary (Dict : in Natools.Smaz_64.Dictionary)
     return Natools.Smaz_64.Dictionary;
   function Activate_Dictionary (Dict : in Natools.Smaz.Dictionary)
     return Natools.Smaz.Dictionary;
      --  Update Dictionary.Hash so that it can be actually used

   procedure Build_Perfect_Hash







>
>







143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
     (Handler  : in out Callback;
      Argument : in String)
     is null;


   function Activate_Dictionary (Dict : in Natools.Smaz_256.Dictionary)
     return Natools.Smaz_256.Dictionary;
   function Activate_Dictionary (Dict : in Natools.Smaz_4096.Dictionary)
     return Natools.Smaz_4096.Dictionary;
   function Activate_Dictionary (Dict : in Natools.Smaz_64.Dictionary)
     return Natools.Smaz_64.Dictionary;
   function Activate_Dictionary (Dict : in Natools.Smaz.Dictionary)
     return Natools.Smaz.Dictionary;
      --  Update Dictionary.Hash so that it can be actually used

   procedure Build_Perfect_Hash
160
161
162
163
164
165
166



167
168
169
170
171
172
173
174
175
176
177
178
179




180
181
182
183
184
185
186

   function Getopt_Config return Getopt.Configuration;
      --  Build the configuration object

   function Last_Code (Dict : in Natools.Smaz_256.Dictionary)
     return Ada.Streams.Stream_Element
     is (Dict.Last_Code);



   function Last_Code (Dict : in Natools.Smaz_64.Dictionary)
     return Natools.Smaz_Implementations.Base_64_Tools.Base_64_Digit
     is (Dict.Last_Code);
   function Last_Code (Dict : in Natools.Smaz.Dictionary)
     return Ada.Streams.Stream_Element
     is (Dict.Dict_Last);
      --  Return the last valid entry

   procedure Print_Dictionary
     (Output : in Ada.Text_IO.File_Type;
      Dictionary : in Natools.Smaz_256.Dictionary;
      Hash_Package_Name : in String := "");
   procedure Print_Dictionary




     (Output : in Ada.Text_IO.File_Type;
      Dictionary : in Natools.Smaz_64.Dictionary;
      Hash_Package_Name : in String := "");
   procedure Print_Dictionary
     (Output : in Ada.Text_IO.File_Type;
      Dictionary : in Natools.Smaz.Dictionary;
      Hash_Package_Name : in String := "");







>
>
>













>
>
>
>







167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200

   function Getopt_Config return Getopt.Configuration;
      --  Build the configuration object

   function Last_Code (Dict : in Natools.Smaz_256.Dictionary)
     return Ada.Streams.Stream_Element
     is (Dict.Last_Code);
   function Last_Code (Dict : in Natools.Smaz_4096.Dictionary)
     return Natools.Smaz_Implementations.Base_4096.Base_4096_Digit
     is (Dict.Last_Code);
   function Last_Code (Dict : in Natools.Smaz_64.Dictionary)
     return Natools.Smaz_Implementations.Base_64_Tools.Base_64_Digit
     is (Dict.Last_Code);
   function Last_Code (Dict : in Natools.Smaz.Dictionary)
     return Ada.Streams.Stream_Element
     is (Dict.Dict_Last);
      --  Return the last valid entry

   procedure Print_Dictionary
     (Output : in Ada.Text_IO.File_Type;
      Dictionary : in Natools.Smaz_256.Dictionary;
      Hash_Package_Name : in String := "");
   procedure Print_Dictionary
     (Output : in Ada.Text_IO.File_Type;
      Dictionary : in Natools.Smaz_4096.Dictionary;
      Hash_Package_Name : in String := "");
   procedure Print_Dictionary
     (Output : in Ada.Text_IO.File_Type;
      Dictionary : in Natools.Smaz_64.Dictionary;
      Hash_Package_Name : in String := "");
   procedure Print_Dictionary
     (Output : in Ada.Text_IO.File_Type;
      Dictionary : in Natools.Smaz.Dictionary;
      Hash_Package_Name : in String := "");
1022
1023
1024
1025
1026
1027
1028































1029
1030
1031
1032
1033
1034
1035
      Score_Frequency => Tools_256.Score_Frequency'Access,
      Score_Gain => Tools_256.Score_Gain'Access,
      Simple_Dictionary => Natools.Smaz_Tools.Simple_Dictionary,
      Simple_Dictionary_And_Pending
        => Natools.Smaz_Tools.Simple_Dictionary_And_Pending,
      To_Dictionary => Tools_256.To_Dictionary,
      Worst_Element => Tools_256.Worst_Index);
































   package Dict_64 is new Dictionary_Subprograms
     (Dictionary => Natools.Smaz_64.Dictionary,
      Dictionary_Entry
        => Natools.Smaz_Implementations.Base_64_Tools.Base_64_Digit,
      Methods => Natools.Smaz_Tools.Methods.Enum,
      Score_Value => Natools.Smaz_Tools.Score_Value,







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
      Score_Frequency => Tools_256.Score_Frequency'Access,
      Score_Gain => Tools_256.Score_Gain'Access,
      Simple_Dictionary => Natools.Smaz_Tools.Simple_Dictionary,
      Simple_Dictionary_And_Pending
        => Natools.Smaz_Tools.Simple_Dictionary_And_Pending,
      To_Dictionary => Tools_256.To_Dictionary,
      Worst_Element => Tools_256.Worst_Index);

   package Dict_4096 is new Dictionary_Subprograms
     (Dictionary => Natools.Smaz_4096.Dictionary,
      Dictionary_Entry
        => Natools.Smaz_Implementations.Base_4096.Base_4096_Digit,
      Methods => Natools.Smaz_Tools.Methods.Enum,
      Score_Value => Natools.Smaz_Tools.Score_Value,
      String_Count => Natools.Smaz_Tools.String_Count,
      Word_Counter => Natools.Smaz_Tools.Word_Counter,
      Dictionary_Counts => Tools_4096.Dictionary_Counts,
      String_Lists => Natools.Smaz_Tools.String_Lists,
      Add_Substrings => Natools.Smaz_Tools.Add_Substrings,
      Add_Words => Natools.Smaz_Tools.Add_Words,
      Append_String => Tools_4096.Append_String,
      Build_Perfect_Hash => Natools.Smaz_Tools.GNAT.Build_Perfect_Hash,
      Compress => Natools.Smaz_4096.Compress,
      Decompress => Natools.Smaz_4096.Decompress,
      Dict_Entry => Natools.Smaz_4096.Dict_Entry,
      Evaluate_Dictionary => Tools_4096.Evaluate_Dictionary,
      Evaluate_Dictionary_Partial => Tools_4096.Evaluate_Dictionary_Partial,
      Filter_By_Count => Natools.Smaz_Tools.Filter_By_Count,
      Last_Code => Last_Code,
      Remove_Element => Tools_4096.Remove_Element,
      Score_Encoded => Tools_4096.Score_Encoded'Access,
      Score_Frequency => Tools_4096.Score_Frequency'Access,
      Score_Gain => Tools_4096.Score_Gain'Access,
      Simple_Dictionary => Natools.Smaz_Tools.Simple_Dictionary,
      Simple_Dictionary_And_Pending
        => Natools.Smaz_Tools.Simple_Dictionary_And_Pending,
      To_Dictionary => Tools_4096.To_Dictionary,
      Worst_Element => Tools_4096.Worst_Index);

   package Dict_64 is new Dictionary_Subprograms
     (Dictionary => Natools.Smaz_64.Dictionary,
      Dictionary_Entry
        => Natools.Smaz_Implementations.Base_64_Tools.Base_64_Digit,
      Methods => Natools.Smaz_Tools.Methods.Enum,
      Score_Value => Natools.Smaz_Tools.Score_Value,
1189
1190
1191
1192
1193
1194
1195



1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212















1213
1214
1215
1216
1217
1218
1219

         when Options.Base_256_Retired =>
            Handler.Algorithm := Algorithms.Base_256_Retired;

         when Options.Base_64 =>
            Handler.Algorithm := Algorithms.Base_64;




         when Options.Check_Roundtrip =>
            Handler.Check_Roundtrip := True;
      end case;
   end Option;


   function Activate_Dictionary (Dict : in Natools.Smaz_256.Dictionary)
     return Natools.Smaz_256.Dictionary
   is
      Result : Natools.Smaz_256.Dictionary := Dict;
   begin
      Natools.Smaz_Tools.Set_Dictionary_For_Trie_Search
        (Tools_256.To_String_List (Result));
      Result.Hash := Natools.Smaz_Tools.Trie_Search'Access;

      pragma Assert (Natools.Smaz_256.Is_Valid (Result));
















      return Result;
   end Activate_Dictionary;


   function Activate_Dictionary (Dict : in Natools.Smaz_64.Dictionary)
     return Natools.Smaz_64.Dictionary
   is







>
>
>

















>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282

         when Options.Base_256_Retired =>
            Handler.Algorithm := Algorithms.Base_256_Retired;

         when Options.Base_64 =>
            Handler.Algorithm := Algorithms.Base_64;

         when Options.Base_4096 =>
            Handler.Algorithm := Algorithms.Base_4096;

         when Options.Check_Roundtrip =>
            Handler.Check_Roundtrip := True;
      end case;
   end Option;


   function Activate_Dictionary (Dict : in Natools.Smaz_256.Dictionary)
     return Natools.Smaz_256.Dictionary
   is
      Result : Natools.Smaz_256.Dictionary := Dict;
   begin
      Natools.Smaz_Tools.Set_Dictionary_For_Trie_Search
        (Tools_256.To_String_List (Result));
      Result.Hash := Natools.Smaz_Tools.Trie_Search'Access;

      pragma Assert (Natools.Smaz_256.Is_Valid (Result));

      return Result;
   end Activate_Dictionary;


   function Activate_Dictionary (Dict : in Natools.Smaz_4096.Dictionary)
     return Natools.Smaz_4096.Dictionary
   is
      Result : Natools.Smaz_4096.Dictionary := Dict;
   begin
      Natools.Smaz_Tools.Set_Dictionary_For_Trie_Search
        (Tools_4096.To_String_List (Result));
      Result.Hash := Natools.Smaz_Tools.Trie_Search'Access;

      pragma Assert (Natools.Smaz_4096.Is_Valid (Result));

      return Result;
   end Activate_Dictionary;


   function Activate_Dictionary (Dict : in Natools.Smaz_64.Dictionary)
     return Natools.Smaz_64.Dictionary
   is
1284
1285
1286
1287
1288
1289
1290

1291
1292
1293
1294
1295
1296
1297

   function Getopt_Config return Getopt.Configuration is
      use Getopt;
      use Options;
      R : Getopt.Configuration;
   begin
      R.Add_Option ("base-256",      '2', No_Argument,       Base_256);

      R.Add_Option ("base-64",       '6', No_Argument,       Base_64);
      R.Add_Option ("ada-dict",      'A', Optional_Argument, Output_Ada_Dict);
      R.Add_Option ("check",         'C', No_Argument,       Check_Roundtrip);
      R.Add_Option ("decode",        'd', No_Argument,       Decode);
      R.Add_Option ("dict",          'D', No_Argument,       Dictionary_Input);
      R.Add_Option ("encode",        'e', No_Argument,       Encode);
      R.Add_Option ("evaluate",      'E', No_Argument,       Evaluate);







>







1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361

   function Getopt_Config return Getopt.Configuration is
      use Getopt;
      use Options;
      R : Getopt.Configuration;
   begin
      R.Add_Option ("base-256",      '2', No_Argument,       Base_256);
      R.Add_Option ("base-4096",     '4', No_Argument,       Base_4096);
      R.Add_Option ("base-64",       '6', No_Argument,       Base_64);
      R.Add_Option ("ada-dict",      'A', Optional_Argument, Output_Ada_Dict);
      R.Add_Option ("check",         'C', No_Argument,       Check_Roundtrip);
      R.Add_Option ("decode",        'd', No_Argument,       Decode);
      R.Add_Option ("dict",          'D', No_Argument,       Dictionary_Input);
      R.Add_Option ("encode",        'e', No_Argument,       Encode);
      R.Add_Option ("evaluate",      'E', No_Argument,       Evaluate);
1331
1332
1333
1334
1335
1336
1337

























1338
1339
1340
1341
1342
1343
1344
      begin
         Ada.Text_IO.Put_Line (Output, Line);
      end Put_Line;

      procedure Print_Dictionary_In_Ada is
        new Tools_256.Print_Dictionary_In_Ada (Put_Line);
   begin

























      if Hash_Package_Name'Length > 0 then
         Print_Dictionary_In_Ada
           (Dictionary,
            Hash_Image => Hash_Package_Name & ".Hash'Access");
      else
         Print_Dictionary_In_Ada (Dictionary);
      end if;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
      begin
         Ada.Text_IO.Put_Line (Output, Line);
      end Put_Line;

      procedure Print_Dictionary_In_Ada is
        new Tools_256.Print_Dictionary_In_Ada (Put_Line);
   begin
      if Hash_Package_Name'Length > 0 then
         Print_Dictionary_In_Ada
           (Dictionary,
            Hash_Image => Hash_Package_Name & ".Hash'Access");
      else
         Print_Dictionary_In_Ada (Dictionary);
      end if;
   end Print_Dictionary;


   procedure Print_Dictionary
     (Output : in Ada.Text_IO.File_Type;
      Dictionary : in Natools.Smaz_4096.Dictionary;
      Hash_Package_Name : in String := "")
   is
      procedure Put_Line (Line : in String);

      procedure Put_Line (Line : in String) is
      begin
         Ada.Text_IO.Put_Line (Output, Line);
      end Put_Line;

      procedure Print_Dictionary_In_Ada is
        new Tools_4096.Print_Dictionary_In_Ada (Put_Line);
   begin
      if Hash_Package_Name'Length > 0 then
         Print_Dictionary_In_Ada
           (Dictionary,
            Hash_Image => Hash_Package_Name & ".Hash'Access");
      else
         Print_Dictionary_In_Ada (Dictionary);
      end if;
1549
1550
1551
1552
1553
1554
1555





1556
1557
1558
1559
1560
1561
1562
                 & "Use retired base-256 implementation");

            when Options.Base_64 =>
               New_Line (Output);
               Put_Line (Output, Indent & Indent
                 & "Use base-64 implementation");






            when Options.Check_Roundtrip =>
               New_Line (Output);
               Put_Line (Output, Indent & Indent
                 & "Check roundtrip of compression or decompression");
         end case;
      end loop;
   end Print_Help;







>
>
>
>
>







1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
                 & "Use retired base-256 implementation");

            when Options.Base_64 =>
               New_Line (Output);
               Put_Line (Output, Indent & Indent
                 & "Use base-64 implementation");

            when Options.Base_4096 =>
               New_Line (Output);
               Put_Line (Output, Indent & Indent
                 & "Use base-4096 implementation");

            when Options.Check_Roundtrip =>
               New_Line (Output);
               Put_Line (Output, Indent & Indent
                 & "Check roundtrip of compression or decompression");
         end case;
      end loop;
   end Print_Help;
1608
1609
1610
1611
1612
1613
1614



1615
1616
1617
1618
1619
1620
1621

   case Handler.Algorithm is
      when Algorithms.Base_256 =>
         Dict_256.Process
           (Handler, Input_List, Input_Data, Handler.Score_Method);
      when Algorithms.Base_64 =>
         Dict_64.Process



           (Handler, Input_List, Input_Data, Handler.Score_Method);
      when Algorithms.Base_256_Retired =>
         declare
            Converted_Input_List : Natools.Smaz.Tools.String_Lists.List;
            Converted_Input_Data : Natools.Smaz.Tools.String_Lists.List;
         begin
            Convert (Input_List, Converted_Input_List);







>
>
>







1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718

   case Handler.Algorithm is
      when Algorithms.Base_256 =>
         Dict_256.Process
           (Handler, Input_List, Input_Data, Handler.Score_Method);
      when Algorithms.Base_64 =>
         Dict_64.Process
           (Handler, Input_List, Input_Data, Handler.Score_Method);
      when Algorithms.Base_4096 =>
         Dict_4096.Process
           (Handler, Input_List, Input_Data, Handler.Score_Method);
      when Algorithms.Base_256_Retired =>
         declare
            Converted_Input_List : Natools.Smaz.Tools.String_Lists.List;
            Converted_Input_Data : Natools.Smaz.Tools.String_Lists.List;
         begin
            Convert (Input_List, Converted_Input_List);