Natools

Check-in [f44feb5e3e]
Login
Overview
Comment:tools/smaz: add a roundtrip check option to help debug new code
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: f44feb5e3ebeedfd55804b2b729c6d7e4e29569b
User & Date: nat on 2016-12-17 22:48:31
Other Links: manifest | tags
Context
2016-12-18
21:16
smaz_implementations-base_64_tools: new package for paddingless base-64 check-in: fd2ccb00b9 user: nat tags: trunk
2016-12-17
22:48
tools/smaz: add a roundtrip check option to help debug new code check-in: f44feb5e3e user: nat tags: trunk
2016-12-16
20:55
smaz_generic: optimize compression

For some reason it seems even with -O3, calling Dict_Entry involves a string copy, which makes `memcpy` the larger time consumer of the copmpression algorithm. Inlining it manually improves performance a lot. check-in: 674fadc74b user: nat tags: trunk

Changes

Modified tools/smaz.adb from [cd9e4a4669] to [f7952f4102].

67
68
69
70
71
72
73

74
75
76
77
78
79
80
         Unoptimized_Text_List);
   end Dict_Sources;

   package Options is
      type Id is
        (Base_256,
         Output_Ada_Dict,

         Dictionary_Input,
         Decode,
         Encode,
         Evaluate,
         Filter_Threshold,
         Output_Hash,
         Job_Count,







>







67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
         Unoptimized_Text_List);
   end Dict_Sources;

   package Options is
      type Id is
        (Base_256,
         Output_Ada_Dict,
         Check_Roundtrip,
         Dictionary_Input,
         Decode,
         Encode,
         Evaluate,
         Filter_Threshold,
         Output_Hash,
         Job_Count,
116
117
118
119
120
121
122

123
124
125
126
127
128
129
      Job_Count : Natural := 0;
      Filter_Threshold : Natools.Smaz_Tools.String_Count := 0;
      Score_Method : Methods.Enum := Methods.Encoded;
      Action : Actions.Enum := Actions.Nothing;
      Ada_Dictionary : Ada.Strings.Unbounded.Unbounded_String;
      Hash_Package : Ada.Strings.Unbounded.Unbounded_String;
      Dict_Source : Dict_Sources.Enum := Dict_Sources.S_Expression;

   end record;

   overriding procedure Option
     (Handler  : in out Callback;
      Id       : in Options.Id;
      Argument : in String);








>







117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
      Job_Count : Natural := 0;
      Filter_Threshold : Natools.Smaz_Tools.String_Count := 0;
      Score_Method : Methods.Enum := Methods.Encoded;
      Action : Actions.Enum := Actions.Nothing;
      Ada_Dictionary : Ada.Strings.Unbounded.Unbounded_String;
      Hash_Package : Ada.Strings.Unbounded.Unbounded_String;
      Dict_Source : Dict_Sources.Enum := Dict_Sources.S_Expression;
      Check_Roundtrip : Boolean := False;
   end record;

   overriding procedure Option
     (Handler  : in out Callback;
      Id       : in Options.Id;
      Argument : in String);

641
642
643
644
645
646
647
























648
649
650
651
652
653
654
               if Handler.Sx_Output then
                  Sx_Output.Open_List;
                  for S of Data_List loop
                     Sx_Output.Append_String (Decompress (Dict, To_SEA (S)));
                  end loop;
                  Sx_Output.Close_List;
               end if;

























               if Handler.Stat_Output then
                  declare
                     procedure Print_Line (Original, Output : Natural);

                     procedure Print_Line (Original, Output : Natural) is
                     begin







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
               if Handler.Sx_Output then
                  Sx_Output.Open_List;
                  for S of Data_List loop
                     Sx_Output.Append_String (Decompress (Dict, To_SEA (S)));
                  end loop;
                  Sx_Output.Close_List;
               end if;

               if Handler.Check_Roundtrip then
                  for S of Data_List loop
                     declare
                        use type Ada.Streams.Stream_Element_Array;
                        Input : constant Ada.Streams.Stream_Element_Array
                          := To_SEA (S);
                        Processed : constant String
                          := Decompress (Dict, Input);
                        Roundtrip : constant Ada.Streams.Stream_Element_Array
                          := Compress (Dict, Processed);
                     begin
                        if Input /= Roundtrip then
                           Sx_Output.Open_List;
                           Sx_Output.Append_String
                             ("decompress-roundtrip-failed");
                           Sx_Output.Append_Atom (Input);
                           Sx_Output.Append_String (Processed);
                           Sx_Output.Append_Atom (Roundtrip);
                           Sx_Output.Close_List;
                        end if;
                     end;
                  end loop;
               end if;

               if Handler.Stat_Output then
                  declare
                     procedure Print_Line (Original, Output : Natural);

                     procedure Print_Line (Original, Output : Natural) is
                     begin
682
683
684
685
686
687
688





















689
690
691
692
693
694
695
               if Handler.Sx_Output then
                  Sx_Output.Open_List;
                  for S of Data_List loop
                     Sx_Output.Append_Atom (Compress (Dict, S));
                  end loop;
                  Sx_Output.Close_List;
               end if;






















               if Handler.Stat_Output then
                  declare
                     procedure Print_Line (Original, Output, Base64 : Natural);

                     procedure Print_Line
                       (Original, Output, Base64 : in Natural) is







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
               if Handler.Sx_Output then
                  Sx_Output.Open_List;
                  for S of Data_List loop
                     Sx_Output.Append_Atom (Compress (Dict, S));
                  end loop;
                  Sx_Output.Close_List;
               end if;

               if Handler.Check_Roundtrip then
                  for S of Data_List loop
                     declare
                        Processed : constant Ada.Streams.Stream_Element_Array
                          := Compress (Dict, S);
                        Roundtrip : constant String
                          := Decompress (Dict, Processed);
                     begin
                        if S /= Roundtrip then
                           Sx_Output.Open_List;
                           Sx_Output.Append_String
                             ("compress-roundtrip-failed");
                           Sx_Output.Append_String (S);
                           Sx_Output.Append_Atom (Processed);
                           Sx_Output.Append_String (Roundtrip);
                           Sx_Output.Close_List;
                        end if;
                     end;
                  end loop;
               end if;

               if Handler.Stat_Output then
                  declare
                     procedure Print_Line (Original, Output, Base64 : Natural);

                     procedure Print_Line
                       (Original, Output, Base64 : in Natural) is
1093
1094
1095
1096
1097
1098
1099



1100
1101
1102
1103
1104
1105
1106
            Handler.Vlen_Verbatim := False;

         when Options.Base_256 =>
            Handler.Algorithm := Algorithms.Base_256;

         when Options.Base_256_Retired =>
            Handler.Algorithm := Algorithms.Base_256_Retired;



      end case;
   end Option;


   function Activate_Dictionary (Dict : in Natools.Smaz_256.Dictionary)
     return Natools.Smaz_256.Dictionary
   is







>
>
>







1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
            Handler.Vlen_Verbatim := False;

         when Options.Base_256 =>
            Handler.Algorithm := Algorithms.Base_256;

         when Options.Base_256_Retired =>
            Handler.Algorithm := Algorithms.Base_256_Retired;

         when Options.Check_Roundtrip =>
            Handler.Check_Roundtrip := True;
      end case;
   end Option;


   function Activate_Dictionary (Dict : in Natools.Smaz_256.Dictionary)
     return Natools.Smaz_256.Dictionary
   is
1172
1173
1174
1175
1176
1177
1178

1179
1180
1181
1182
1183
1184
1185
   function Getopt_Config return Getopt.Configuration is
      use Getopt;
      use Options;
      R : Getopt.Configuration;
   begin
      R.Add_Option ("base-256",      '2', No_Argument,       Base_256);
      R.Add_Option ("ada-dict",      'A', Optional_Argument, Output_Ada_Dict);

      R.Add_Option ("decode",        'd', No_Argument,       Decode);
      R.Add_Option ("dict",          'D', No_Argument,       Dictionary_Input);
      R.Add_Option ("encode",        'e', No_Argument,       Encode);
      R.Add_Option ("evaluate",      'E', No_Argument,       Evaluate);
      R.Add_Option ("filter",        'F', Required_Argument, Filter_Threshold);
      R.Add_Option ("help",          'h', No_Argument,       Help);
      R.Add_Option ("hash-pkg",      'H', Required_Argument, Output_Hash);







>







1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
   function Getopt_Config return Getopt.Configuration is
      use Getopt;
      use Options;
      R : Getopt.Configuration;
   begin
      R.Add_Option ("base-256",      '2', No_Argument,       Base_256);
      R.Add_Option ("ada-dict",      'A', Optional_Argument, Output_Ada_Dict);
      R.Add_Option ("check",         'C', No_Argument,       Check_Roundtrip);
      R.Add_Option ("decode",        'd', No_Argument,       Decode);
      R.Add_Option ("dict",          'D', No_Argument,       Dictionary_Input);
      R.Add_Option ("encode",        'e', No_Argument,       Encode);
      R.Add_Option ("evaluate",      'E', No_Argument,       Evaluate);
      R.Add_Option ("filter",        'F', Required_Argument, Filter_Threshold);
      R.Add_Option ("help",          'h', No_Argument,       Help);
      R.Add_Option ("hash-pkg",      'H', Required_Argument, Output_Hash);
1403
1404
1405
1406
1407
1408
1409





1410
1411
1412
1413
1414
1415
1416
               Put_Line (Output, Indent & Indent
                 & "Use base-256 implementation (default)");

            when Options.Base_256_Retired =>
               New_Line (Output);
               Put_Line (Output, Indent & Indent
                 & "Use retired base-256 implementation");





         end case;
      end loop;
   end Print_Help;


   Opt_Config : constant Getopt.Configuration := Getopt_Config;
   Handler : Callback;







>
>
>
>
>







1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
               Put_Line (Output, Indent & Indent
                 & "Use base-256 implementation (default)");

            when Options.Base_256_Retired =>
               New_Line (Output);
               Put_Line (Output, Indent & Indent
                 & "Use retired base-256 implementation");

            when Options.Check_Roundtrip =>
               New_Line (Output);
               Put_Line (Output, Indent & Indent
                 & "Check roundtrip of compression or decompression");
         end case;
      end loop;
   end Print_Help;


   Opt_Config : constant Getopt.Configuration := Getopt_Config;
   Handler : Callback;
1430
1431
1432
1433
1434
1435
1436
1437

1438
1439
1440
1441
1442
1443
1444
      Print_Help (Opt_Config, Ada.Text_IO.Current_Output);
   end if;

   if not Handler.Need_Dictionary then
      return;
   end if;

   if not (Handler.Stat_Output or Handler.Sx_Output) then

      Handler.Sx_Output := True;
   end if;

   Read_Input_List :
   declare
      use type Actions.Enum;








|
>







1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
      Print_Help (Opt_Config, Ada.Text_IO.Current_Output);
   end if;

   if not Handler.Need_Dictionary then
      return;
   end if;

   if not (Handler.Stat_Output or Handler.Sx_Output or Handler.Check_Roundtrip)
   then
      Handler.Sx_Output := True;
   end if;

   Read_Input_List :
   declare
      use type Actions.Enum;