Overview
Comment: | tools/smaz: refactor dictionary processing in a dedicated procedure |
---|---|
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
ef7006737f2005f3e52f1e30da1d717a |
User & Date: | nat on 2016-12-01 20:10:43 |
Other Links: | manifest | tags |
Context
2016-12-02
| ||
21:12 | tools/smaz: genericize Parallel_Evaluate_Dictionary check-in: 79a36ec957 user: nat tags: trunk | |
2016-12-01
| ||
20:10 | tools/smaz: refactor dictionary processing in a dedicated procedure check-in: ef7006737f user: nat tags: trunk | |
2016-11-30
| ||
22:30 | natools.gpr: add a profiling mode to the build check-in: 2a24860505 user: nat tags: trunk | |
Changes
Modified tools/smaz.adb from [b5bd7e07f7] to [d614874f0e].
︙ | ︙ | |||
178 179 180 181 182 183 184 185 186 187 188 189 190 191 | -- print the given dictionary in the given file procedure Print_Help (Opt : in Getopt.Configuration; Output : in Ada.Text_IO.File_Type); -- Print the help text to the given file function To_Dictionary (Handler : in Callback'Class; Input : in Natools.Smaz_Tools.String_Lists.List) return Natools.Smaz_256.Dictionary; -- Convert the input into a dictionary given the option in Handler | > > > > > > | 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 | -- print the given dictionary in the given file procedure Print_Help (Opt : in Getopt.Configuration; Output : in Ada.Text_IO.File_Type); -- Print the help text to the given file procedure Process (Handler : in Callback'Class; Word_List : in Natools.Smaz_Tools.String_Lists.List; Data_List : in Natools.Smaz_Tools.String_Lists.List); -- Perform the requested operations function To_Dictionary (Handler : in Callback'Class; Input : in Natools.Smaz_Tools.String_Lists.List) return Natools.Smaz_256.Dictionary; -- Convert the input into a dictionary given the option in Handler |
︙ | ︙ | |||
737 738 739 740 741 742 743 | Put_Line (Output, Indent & Indent & "Disable variable-length verbatim in built dictionary"); end case; end loop; end Print_Help; | | | | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | | | | | 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 | Put_Line (Output, Indent & Indent & "Disable variable-length verbatim in built dictionary"); end case; end loop; end Print_Help; procedure Process (Handler : in Callback'Class; Word_List : in Natools.Smaz_Tools.String_Lists.List; Data_List : in Natools.Smaz_Tools.String_Lists.List) is Dictionary : Natools.Smaz_256.Dictionary := To_Dictionary (Handler, Word_List); Sx_Output : Natools.S_Expressions.Printers.Canonical (Ada.Text_IO.Text_Streams.Stream (Ada.Text_IO.Current_Output)); Ada_Dictionary : constant String := Ada.Strings.Unbounded.To_String (Handler.Ada_Dictionary); Hash_Package : constant String := Ada.Strings.Unbounded.To_String (Handler.Hash_Package); begin Dictionary.Hash := Natools.Smaz_Tools.Linear_Search'Access; Natools.Smaz_Tools.List_For_Linear_Search := Word_List; if Ada_Dictionary'Length > 0 then Print_Dictionary (Ada_Dictionary, Dictionary, Hash_Package); end if; if Hash_Package'Length > 0 then Natools.Smaz_Tools.GNAT.Build_Perfect_Hash (Word_List, Hash_Package); end if; if Handler.Sx_Dict_Output then Sx_Output.Open_List; for I in Dictionary.Offsets'Range loop Sx_Output.Append_String (Natools.Smaz_256.Dict_Entry (Dictionary, I)); end loop; Sx_Output.Close_List; end if; case Handler.Action is when Actions.Nothing => null; when Actions.Decode => if Handler.Sx_Output then Sx_Output.Open_List; for S of Data_List loop Sx_Output.Append_String (Natools.Smaz_256.Decompress (Dictionary, To_SEA (S))); end loop; Sx_Output.Close_List; end if; if Handler.Stat_Output then |
︙ | ︙ | |||
904 905 906 907 908 909 910 | & Natural'Image (Output) & Ada.Characters.Latin_1.HT & Float'Image (Float (Original) / Float (Output))); end Print_Line; Original_Total : Natural := 0; Output_Total : Natural := 0; begin | | | | 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 | & Natural'Image (Output) & Ada.Characters.Latin_1.HT & Float'Image (Float (Original) / Float (Output))); end Print_Line; Original_Total : Natural := 0; Output_Total : Natural := 0; begin for S of Data_List loop declare Original_Size : constant Natural := S'Length; Output_Size : constant Natural := Natools.Smaz_256.Decompress (Dictionary, To_SEA (S))'Length; begin Print_Line (Original_Size, Output_Size); Original_Total := Original_Total + Original_Size; Output_Total := Output_Total + Output_Size; end; end loop; Print_Line (Original_Total, Output_Total); end; end if; when Actions.Encode => if Handler.Sx_Output then Sx_Output.Open_List; for S of Data_List loop Sx_Output.Append_Atom (Natools.Smaz_256.Compress (Dictionary, S)); end loop; Sx_Output.Close_List; end if; if Handler.Stat_Output then |
︙ | ︙ | |||
952 953 954 955 956 957 958 | & Ada.Characters.Latin_1.HT & Float'Image (Float (Base64) / Float (Original))); end Print_Line; Original_Total : Natural := 0; Output_Total : Natural := 0; Base64_Total : Natural := 0; begin | | | 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 | & Ada.Characters.Latin_1.HT & Float'Image (Float (Base64) / Float (Original))); end Print_Line; Original_Total : Natural := 0; Output_Total : Natural := 0; Base64_Total : Natural := 0; begin for S of Data_List loop declare Original_Size : constant Natural := S'Length; Output_Size : constant Natural := Natools.Smaz_256.Compress (Dictionary, S)'Length; Base64_Size : constant Natural := ((Output_Size + 2) / 3) * 4; begin |
︙ | ︙ | |||
977 978 979 980 981 982 983 | when Actions.Evaluate => declare Total_Size : Ada.Streams.Stream_Element_Count; Counts : Tools_256.Dictionary_Counts; begin Evaluate_Dictionary (Handler.Job_Count, | | | 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 | when Actions.Evaluate => declare Total_Size : Ada.Streams.Stream_Element_Count; Counts : Tools_256.Dictionary_Counts; begin Evaluate_Dictionary (Handler.Job_Count, Dictionary, Data_List, Total_Size, Counts); if Handler.Sx_Output then Sx_Output.Open_List; Sx_Output.Append_String (Ada.Strings.Fixed.Trim (Ada.Streams.Stream_Element_Count'Image (Total_Size), Ada.Strings.Both)); |
︙ | ︙ | |||
1114 1115 1116 1117 1118 1119 1120 | Tools_256.Score_Frequency'Access); Print_Min_Max ("gain", Tools_256.Score_Gain'Access); end; end if; end; end case; | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 | Tools_256.Score_Frequency'Access); Print_Min_Max ("gain", Tools_256.Score_Gain'Access); end; end if; end; end case; end Process; function To_Dictionary (Handler : in Callback'Class; Input : in Natools.Smaz_Tools.String_Lists.List) return Natools.Smaz_256.Dictionary is use type Natools.Smaz_Tools.String_Count; use type Dict_Sources.Enum; begin case Handler.Dict_Source is when Dict_Sources.S_Expression => return Tools_256.To_Dictionary (Input, Handler.Vlen_Verbatim); when Dict_Sources.Text_List | Dict_Sources.Unoptimized_Text_List => declare Counter : Natools.Smaz_Tools.Word_Counter; begin for S of Input loop Natools.Smaz_Tools.Add_Substrings (Counter, S, Handler.Min_Sub_Size, Handler.Max_Sub_Size); if Handler.Max_Word_Size > Handler.Max_Sub_Size then Natools.Smaz_Tools.Add_Words (Counter, S, Handler.Max_Sub_Size + 1, Handler.Max_Word_Size); end if; end loop; if Handler.Filter_Threshold > 0 then Natools.Smaz_Tools.Filter_By_Count (Counter, Handler.Filter_Threshold); end if; if Handler.Dict_Source = Dict_Sources.Text_List then declare Selected, Pending : Natools.Smaz_Tools.String_Lists.List; begin Natools.Smaz_Tools.Simple_Dictionary_And_Pending (Counter, Handler.Dict_Size, Selected, Pending, Handler.Score_Method, Handler.Max_Pending); return Optimize_Dictionary (Tools_256.To_Dictionary (Selected, Handler.Vlen_Verbatim), Pending, Input, Handler.Job_Count, Handler.Score_Method); end; else return Tools_256.To_Dictionary (Natools.Smaz_Tools.Simple_Dictionary (Counter, Handler.Dict_Size, Handler.Score_Method), Handler.Vlen_Verbatim); end if; end; end case; end To_Dictionary; Opt_Config : constant Getopt.Configuration := Getopt_Config; Handler : Callback; Input_List, Input_Data : Natools.Smaz_Tools.String_Lists.List; begin Process_Command_Line : begin Opt_Config.Process (Handler); exception when Getopt.Option_Error => Print_Help (Opt_Config, Ada.Text_IO.Current_Error); Ada.Command_Line.Set_Exit_Status (Ada.Command_Line.Failure); return; end Process_Command_Line; if Handler.Display_Help then Print_Help (Opt_Config, Ada.Text_IO.Current_Output); end if; if not Handler.Need_Dictionary then return; end if; if not (Handler.Stat_Output or Handler.Sx_Output) then Handler.Sx_Output := True; end if; Read_Input_List : declare use type Actions.Enum; Input : constant access Ada.Streams.Root_Stream_Type'Class := Ada.Text_IO.Text_Streams.Stream (Ada.Text_IO.Current_Input); Parser : Natools.S_Expressions.Parsers.Stream_Parser (Input); begin Parser.Next; Natools.Smaz_Tools.Read_List (Input_List, Parser); if Handler.Action /= Actions.Nothing then Parser.Next; Natools.Smaz_Tools.Read_List (Input_Data, Parser); end if; end Read_Input_List; Process (Handler, Input_List, Input_Data); end Smaz; |