Index: tools/smaz.adb ================================================================== --- tools/smaz.adb +++ tools/smaz.adb @@ -103,10 +103,12 @@ Fast_Text_Input, Max_Word_Size, Sx_Output, No_Sx_Output, Force_Word, + Max_Dict_Size, + Min_Dict_Size, No_Vlen_Verbatim, Score_Method, Vlen_Verbatim); end Options; @@ -120,11 +122,12 @@ Sx_Output : Boolean := False; Sx_Dict_Output : Boolean := False; Min_Sub_Size : Positive := 1; Max_Sub_Size : Positive := 3; Max_Word_Size : Positive := 10; - Dict_Size : Positive := 254; + Max_Dict_Size : Positive := 254; + Min_Dict_Size : Positive := 254; Vlen_Verbatim : Boolean := True; Max_Pending : Ada.Containers.Count_Type := Ada.Containers.Count_Type'Last; Job_Count : Natural := 0; Filter_Threshold : Natools.Smaz_Tools.String_Count := 0; @@ -372,10 +375,12 @@ First : in Dictionary_Entry; Pending_Words : in out String_Lists.List; Input_Texts : in String_Lists.List; Job_Count : in Natural; Method : in Methods; + Min_Dict_Size : in Positive; + Max_Dict_Size : in Positive; Updated : out Boolean); -- Try to improve on Dict by replacing a single entry from it with -- one of the substring in Pending_Words. function Optimize_Dictionary @@ -382,11 +387,13 @@ (Base : in Dictionary; First : in Dictionary_Entry; Pending_Words : in String_Lists.List; Input_Texts : in String_Lists.List; Job_Count : in Natural; - Method : in Methods) + Method : in Methods; + Min_Dict_Size : in Positive; + Max_Dict_Size : in Positive) return Dictionary; -- Optimize the dictionary on Input_Texts, starting with Base and -- adding substrings from Pending_Words. Operates only on words -- at First and beyond. @@ -562,12 +569,16 @@ First : in Dictionary_Entry; Pending_Words : in out String_Lists.List; Input_Texts : in String_Lists.List; Job_Count : in Natural; Method : in Methods; + Min_Dict_Size : in Positive; + Max_Dict_Size : in Positive; Updated : out Boolean) is + pragma Unreferenced (Min_Dict_Size); + pragma Unreferenced (Max_Dict_Size); use type Ada.Streams.Stream_Element_Offset; New_Value : Ada.Strings.Unbounded.Unbounded_String; New_Position : String_Lists.Cursor; Worst_Index : constant Dictionary_Entry @@ -630,11 +641,13 @@ (Base : in Dictionary; First : in Dictionary_Entry; Pending_Words : in String_Lists.List; Input_Texts : in String_Lists.List; Job_Count : in Natural; - Method : in Methods) + Method : in Methods; + Min_Dict_Size : in Positive; + Max_Dict_Size : in Positive) return Dictionary is Holder : Holders.Holder := Holders.To_Holder (Base); Pending : String_Lists.List := Pending_Words; Score : Ada.Streams.Stream_Element_Count; @@ -652,10 +665,12 @@ First, Pending, Input_Texts, Job_Count, Method, + Min_Dict_Size, + Max_Dict_Size, Running); end loop; return Holder.Element; end Optimize_Dictionary; @@ -1097,23 +1112,22 @@ Method); when Dict_Sources.Text_List => declare Needed : constant Integer - := Handler.Dict_Size + := Handler.Max_Dict_Size - Natural (Handler.Forced_Words.Length); Selected, Pending : String_Lists.List; First : Dictionary_Entry := Dictionary_Entry'First; begin if Needed <= 0 then for Word of reverse Handler.Forced_Words loop Selected.Prepend (Word); - if Positive (Selected.Length) = Handler.Dict_Size then - return To_Dictionary - (Selected, Handler.Vlen_Verbatim); - end if; + exit when Positive (Selected.Length) + = Handler.Max_Dict_Size; end loop; + return To_Dictionary (Selected, Handler.Vlen_Verbatim); end if; Simple_Dictionary_And_Pending (Make_Word_Counter (Handler, Input), Needed, @@ -1131,17 +1145,19 @@ (To_Dictionary (Selected, Handler.Vlen_Verbatim), First, Pending, Input, Handler.Job_Count, - Method); + Method, + Handler.Min_Dict_Size, + Handler.Max_Dict_Size); end; when Dict_Sources.Unoptimized_Text_List => declare Needed : constant Integer - := Handler.Dict_Size + := Handler.Max_Dict_Size - Natural (Handler.Forced_Words.Length); All_Words : String_Lists.List; begin if Needed > 0 then All_Words := Simple_Dictionary @@ -1152,11 +1168,11 @@ end loop; else for Word of reverse Handler.Forced_Words loop All_Words.Prepend (Word); exit when Positive (All_Words.Length) - >= Handler.Dict_Size; + >= Handler.Max_Dict_Size; end loop; end if; return To_Dictionary (All_Words, Handler.Vlen_Verbatim); end; @@ -1379,11 +1395,12 @@ when Options.Max_Pending => Handler.Max_Pending := Ada.Containers.Count_Type'Value (Argument); when Options.Dict_Size => - Handler.Dict_Size := Positive'Value (Argument); + Handler.Min_Dict_Size := Positive'Value (Argument); + Handler.Max_Dict_Size := Positive'Value (Argument); when Options.Vlen_Verbatim => Handler.Vlen_Verbatim := True; when Options.No_Vlen_Verbatim => @@ -1411,10 +1428,16 @@ if Handler.Action in Actions.Nothing then Handler.Action := Actions.Adjust_Dictionary; end if; end if; + + when Options.Max_Dict_Size => + Handler.Max_Dict_Size := Positive'Value (Argument); + + when Options.Min_Dict_Size => + Handler.Min_Dict_Size := Positive'Value (Argument); end case; end Option; function Activate_Dictionary (Dict : in Natools.Smaz_256.Dictionary) @@ -1545,10 +1568,12 @@ R.Add_Option ("fast-text-list", 'T', No_Argument, Fast_Text_Input); R.Add_Option ("max-word-len", 'W', Required_Argument, Max_Word_Size); R.Add_Option ("s-expr", 'x', No_Argument, Sx_Output); R.Add_Option ("no-s-expr", 'X', No_Argument, No_Sx_Output); R.Add_Option ("force-word", Required_Argument, Force_Word); + R.Add_Option ("max-dict-size", Required_Argument, Max_Dict_Size); + R.Add_Option ("min-dict-size", Required_Argument, Min_Dict_Size); R.Add_Option ("no-vlen-verbatim", No_Argument, No_Vlen_Verbatim); R.Add_Option ("score-method", Required_Argument, Score_Method); R.Add_Option ("vlen-verbatim", No_Argument, Vlen_Verbatim); return R; @@ -1828,10 +1853,20 @@ Put_Line (Output, Indent & Indent & "Force into the dictionary," & " replacing the worst entry"); Put_Line (Output, Indent & Indent & "Can be specified multiple times to force many words."); + + when Options.Max_Dict_Size => + Put_Line (Output, " "); + Put_Line (Output, Indent & Indent + & "Maximum number of words in the dictionary to build"); + + when Options.Min_Dict_Size => + Put_Line (Output, " "); + Put_Line (Output, Indent & Indent + & "Minimum number of words in the dictionary to build"); end case; end loop; end Print_Help;