Overview
Comment: | tools/smaz: add a roundtrip check option to help debug new code |
---|---|
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
f44feb5e3ebeedfd55804b2b729c6d7e |
User & Date: | nat on 2016-12-17 22:48:31 |
Other Links: | manifest | tags |
Context
2016-12-18
| ||
21:16 | smaz_implementations-base_64_tools: new package for paddingless base-64 check-in: fd2ccb00b9 user: nat tags: trunk | |
2016-12-17
| ||
22:48 | tools/smaz: add a roundtrip check option to help debug new code check-in: f44feb5e3e user: nat tags: trunk | |
2016-12-16
| ||
20:55 |
smaz_generic: optimize compression
For some reason it seems even with -O3, calling Dict_Entry involves a string copy, which makes `memcpy` the larger time consumer of the copmpression algorithm. Inlining it manually improves performance a lot. check-in: 674fadc74b user: nat tags: trunk | |
Changes
Modified tools/smaz.adb from [cd9e4a4669] to [f7952f4102].
︙ | ︙ | |||
67 68 69 70 71 72 73 74 75 76 77 78 79 80 | Unoptimized_Text_List); end Dict_Sources; package Options is type Id is (Base_256, Output_Ada_Dict, Dictionary_Input, Decode, Encode, Evaluate, Filter_Threshold, Output_Hash, Job_Count, | > | 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | Unoptimized_Text_List); end Dict_Sources; package Options is type Id is (Base_256, Output_Ada_Dict, Check_Roundtrip, Dictionary_Input, Decode, Encode, Evaluate, Filter_Threshold, Output_Hash, Job_Count, |
︙ | ︙ | |||
116 117 118 119 120 121 122 123 124 125 126 127 128 129 | Job_Count : Natural := 0; Filter_Threshold : Natools.Smaz_Tools.String_Count := 0; Score_Method : Methods.Enum := Methods.Encoded; Action : Actions.Enum := Actions.Nothing; Ada_Dictionary : Ada.Strings.Unbounded.Unbounded_String; Hash_Package : Ada.Strings.Unbounded.Unbounded_String; Dict_Source : Dict_Sources.Enum := Dict_Sources.S_Expression; end record; overriding procedure Option (Handler : in out Callback; Id : in Options.Id; Argument : in String); | > | 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | Job_Count : Natural := 0; Filter_Threshold : Natools.Smaz_Tools.String_Count := 0; Score_Method : Methods.Enum := Methods.Encoded; Action : Actions.Enum := Actions.Nothing; Ada_Dictionary : Ada.Strings.Unbounded.Unbounded_String; Hash_Package : Ada.Strings.Unbounded.Unbounded_String; Dict_Source : Dict_Sources.Enum := Dict_Sources.S_Expression; Check_Roundtrip : Boolean := False; end record; overriding procedure Option (Handler : in out Callback; Id : in Options.Id; Argument : in String); |
︙ | ︙ | |||
641 642 643 644 645 646 647 648 649 650 651 652 653 654 | if Handler.Sx_Output then Sx_Output.Open_List; for S of Data_List loop Sx_Output.Append_String (Decompress (Dict, To_SEA (S))); end loop; Sx_Output.Close_List; end if; if Handler.Stat_Output then declare procedure Print_Line (Original, Output : Natural); procedure Print_Line (Original, Output : Natural) is begin | > > > > > > > > > > > > > > > > > > > > > > > > | 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 | if Handler.Sx_Output then Sx_Output.Open_List; for S of Data_List loop Sx_Output.Append_String (Decompress (Dict, To_SEA (S))); end loop; Sx_Output.Close_List; end if; if Handler.Check_Roundtrip then for S of Data_List loop declare use type Ada.Streams.Stream_Element_Array; Input : constant Ada.Streams.Stream_Element_Array := To_SEA (S); Processed : constant String := Decompress (Dict, Input); Roundtrip : constant Ada.Streams.Stream_Element_Array := Compress (Dict, Processed); begin if Input /= Roundtrip then Sx_Output.Open_List; Sx_Output.Append_String ("decompress-roundtrip-failed"); Sx_Output.Append_Atom (Input); Sx_Output.Append_String (Processed); Sx_Output.Append_Atom (Roundtrip); Sx_Output.Close_List; end if; end; end loop; end if; if Handler.Stat_Output then declare procedure Print_Line (Original, Output : Natural); procedure Print_Line (Original, Output : Natural) is begin |
︙ | ︙ | |||
682 683 684 685 686 687 688 689 690 691 692 693 694 695 | if Handler.Sx_Output then Sx_Output.Open_List; for S of Data_List loop Sx_Output.Append_Atom (Compress (Dict, S)); end loop; Sx_Output.Close_List; end if; if Handler.Stat_Output then declare procedure Print_Line (Original, Output, Base64 : Natural); procedure Print_Line (Original, Output, Base64 : in Natural) is | > > > > > > > > > > > > > > > > > > > > > | 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 | if Handler.Sx_Output then Sx_Output.Open_List; for S of Data_List loop Sx_Output.Append_Atom (Compress (Dict, S)); end loop; Sx_Output.Close_List; end if; if Handler.Check_Roundtrip then for S of Data_List loop declare Processed : constant Ada.Streams.Stream_Element_Array := Compress (Dict, S); Roundtrip : constant String := Decompress (Dict, Processed); begin if S /= Roundtrip then Sx_Output.Open_List; Sx_Output.Append_String ("compress-roundtrip-failed"); Sx_Output.Append_String (S); Sx_Output.Append_Atom (Processed); Sx_Output.Append_String (Roundtrip); Sx_Output.Close_List; end if; end; end loop; end if; if Handler.Stat_Output then declare procedure Print_Line (Original, Output, Base64 : Natural); procedure Print_Line (Original, Output, Base64 : in Natural) is |
︙ | ︙ | |||
1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 | Handler.Vlen_Verbatim := False; when Options.Base_256 => Handler.Algorithm := Algorithms.Base_256; when Options.Base_256_Retired => Handler.Algorithm := Algorithms.Base_256_Retired; end case; end Option; function Activate_Dictionary (Dict : in Natools.Smaz_256.Dictionary) return Natools.Smaz_256.Dictionary is | > > > | 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 | Handler.Vlen_Verbatim := False; when Options.Base_256 => Handler.Algorithm := Algorithms.Base_256; when Options.Base_256_Retired => Handler.Algorithm := Algorithms.Base_256_Retired; when Options.Check_Roundtrip => Handler.Check_Roundtrip := True; end case; end Option; function Activate_Dictionary (Dict : in Natools.Smaz_256.Dictionary) return Natools.Smaz_256.Dictionary is |
︙ | ︙ | |||
1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 | function Getopt_Config return Getopt.Configuration is use Getopt; use Options; R : Getopt.Configuration; begin R.Add_Option ("base-256", '2', No_Argument, Base_256); R.Add_Option ("ada-dict", 'A', Optional_Argument, Output_Ada_Dict); R.Add_Option ("decode", 'd', No_Argument, Decode); R.Add_Option ("dict", 'D', No_Argument, Dictionary_Input); R.Add_Option ("encode", 'e', No_Argument, Encode); R.Add_Option ("evaluate", 'E', No_Argument, Evaluate); R.Add_Option ("filter", 'F', Required_Argument, Filter_Threshold); R.Add_Option ("help", 'h', No_Argument, Help); R.Add_Option ("hash-pkg", 'H', Required_Argument, Output_Hash); | > | 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 | function Getopt_Config return Getopt.Configuration is use Getopt; use Options; R : Getopt.Configuration; begin R.Add_Option ("base-256", '2', No_Argument, Base_256); R.Add_Option ("ada-dict", 'A', Optional_Argument, Output_Ada_Dict); R.Add_Option ("check", 'C', No_Argument, Check_Roundtrip); R.Add_Option ("decode", 'd', No_Argument, Decode); R.Add_Option ("dict", 'D', No_Argument, Dictionary_Input); R.Add_Option ("encode", 'e', No_Argument, Encode); R.Add_Option ("evaluate", 'E', No_Argument, Evaluate); R.Add_Option ("filter", 'F', Required_Argument, Filter_Threshold); R.Add_Option ("help", 'h', No_Argument, Help); R.Add_Option ("hash-pkg", 'H', Required_Argument, Output_Hash); |
︙ | ︙ | |||
1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 | Put_Line (Output, Indent & Indent & "Use base-256 implementation (default)"); when Options.Base_256_Retired => New_Line (Output); Put_Line (Output, Indent & Indent & "Use retired base-256 implementation"); end case; end loop; end Print_Help; Opt_Config : constant Getopt.Configuration := Getopt_Config; Handler : Callback; | > > > > > | 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 | Put_Line (Output, Indent & Indent & "Use base-256 implementation (default)"); when Options.Base_256_Retired => New_Line (Output); Put_Line (Output, Indent & Indent & "Use retired base-256 implementation"); when Options.Check_Roundtrip => New_Line (Output); Put_Line (Output, Indent & Indent & "Check roundtrip of compression or decompression"); end case; end loop; end Print_Help; Opt_Config : constant Getopt.Configuration := Getopt_Config; Handler : Callback; |
︙ | ︙ | |||
1430 1431 1432 1433 1434 1435 1436 | Print_Help (Opt_Config, Ada.Text_IO.Current_Output); end if; if not Handler.Need_Dictionary then return; end if; | | > | 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 | Print_Help (Opt_Config, Ada.Text_IO.Current_Output); end if; if not Handler.Need_Dictionary then return; end if; if not (Handler.Stat_Output or Handler.Sx_Output or Handler.Check_Roundtrip) then Handler.Sx_Output := True; end if; Read_Input_List : declare use type Actions.Enum; |
︙ | ︙ |