Index: tools/smaz.adb ================================================================== --- tools/smaz.adb +++ tools/smaz.adb @@ -55,10 +55,11 @@ (Output_Ada_Dict, Dictionary_Input, Decode, Encode, Evaluate, + Filter_Threshold, Output_Hash, Job_Count, Help, Sx_Dict_Output, Min_Sub_Size, @@ -81,10 +82,11 @@ Sx_Dict_Output : Boolean := False; Min_Sub_Size : Positive := 1; Max_Sub_Size : Positive := 3; Max_Word_Size : Positive := 10; Job_Count : Natural := 0; + Filter_Threshold : Natools.Smaz.Tools.String_Count := 0; Action : Actions.Enum := Actions.Nothing; Ada_Dictionary : Ada.Strings.Unbounded.Unbounded_String; Hash_Package : Ada.Strings.Unbounded.Unbounded_String; Dict_Source : Dict_Sources.Enum := Dict_Sources.S_Expression; end record; @@ -202,10 +204,14 @@ when Options.Max_Word_Size => Handler.Max_Word_Size := Positive'Value (Argument); when Options.Job_Count => Handler.Job_Count := Natural'Value (Argument); + + when Options.Filter_Threshold => + Handler.Filter_Threshold + := Natools.Smaz.Tools.String_Count'Value (Argument); end case; end Option; function Getopt_Config return Getopt.Configuration is @@ -216,10 +222,11 @@ R.Add_Option ("ada-dict", 'A', Optional_Argument, Output_Ada_Dict); R.Add_Option ("decode", 'd', No_Argument, Decode); R.Add_Option ("dict", 'D', No_Argument, Dictionary_Input); R.Add_Option ("encode", 'e', No_Argument, Encode); R.Add_Option ("evaluate", 'E', No_Argument, Evaluate); + R.Add_Option ("filter", 'F', Required_Argument, Filter_Threshold); R.Add_Option ("help", 'h', No_Argument, Help); R.Add_Option ("hash-pkg", 'H', Required_Argument, Output_Hash); R.Add_Option ("jobs", 'j', Required_Argument, Job_Count); R.Add_Option ("sx-dict", 'L', No_Argument, Sx_Dict_Output); R.Add_Option ("min-substring", 'm', Required_Argument, Min_Sub_Size); @@ -469,18 +476,27 @@ when Options.Job_Count => New_Line (Output); Put_Line (Output, Indent & Indent & "Number of parallel jobs in long calculations"); + + when Options.Filter_Threshold => + Put_Line (Output, " "); + Put_Line (Output, Indent & Indent + & "Before building a dictionary from substrings, remove"); + Put_Line (Output, Indent & Indent + & "substrings whose count is below the threshold."); end case; end loop; end Print_Help; function To_Dictionary (Handler : in Callback'Class; Input : in Natools.Smaz.Tools.String_Lists.List) - return Natools.Smaz.Dictionary is + return Natools.Smaz.Dictionary + is + use type Natools.Smaz.Tools.String_Count; begin case Handler.Dict_Source is when Dict_Sources.S_Expression => return Natools.Smaz.Tools.To_Dictionary (Input, True); @@ -496,10 +512,15 @@ Natools.Smaz.Tools.Add_Words (Counter, S, Handler.Max_Sub_Size + 1, Handler.Max_Word_Size); end if; end loop; + + if Handler.Filter_Threshold > 0 then + Natools.Smaz.Tools.Filter_By_Count + (Counter, Handler.Filter_Threshold); + end if; return Natools.Smaz.Tools.To_Dictionary (Natools.Smaz.Tools.Simple_Dictionary (Counter, 254), True); end;