Index: tools/smaz.adb ================================================================== --- tools/smaz.adb +++ tools/smaz.adb @@ -23,10 +23,11 @@ with Ada.Streams; with Ada.Strings.Fixed; with Ada.Strings.Unbounded; with Ada.Text_IO.Text_Streams; with Natools.Getopt_Long; +with Natools.Parallelism; with Natools.S_Expressions.Parsers; with Natools.S_Expressions.Printers; with Natools.Smaz.Tools; with Natools.Smaz.Tools.GNAT; with Natools.String_Escapes; @@ -55,10 +56,11 @@ Dictionary_Input, Decode, Encode, Evaluate, Output_Hash, + Job_Count, Help, Sx_Dict_Output, Min_Sub_Size, Max_Sub_Size, Stat_Output, @@ -78,10 +80,11 @@ Sx_Output : Boolean := False; Sx_Dict_Output : Boolean := False; Min_Sub_Size : Positive := 1; Max_Sub_Size : Positive := 3; Max_Word_Size : Positive := 10; + Job_Count : Natural := 0; Action : Actions.Enum := Actions.Nothing; Ada_Dictionary : Ada.Strings.Unbounded.Unbounded_String; Hash_Package : Ada.Strings.Unbounded.Unbounded_String; Dict_Source : Dict_Sources.Enum := Dict_Sources.S_Expression; end record; @@ -98,10 +101,19 @@ function Getopt_Config return Getopt.Configuration; -- Build the configuration object + procedure Parallel_Evaluate_Dictionary + (Job_Count : in Positive; + Dict : in Natools.Smaz.Dictionary; + Corpus : in Natools.Smaz.Tools.String_Lists.List; + Compressed_Size : out Ada.Streams.Stream_Element_Count; + Counts : out Natools.Smaz.Tools.Dictionary_Counts); + -- Return the same results as Natools.Smaz.Tools.Evaluate_Dictionary, + -- but hopefully more quickly, using Job_Count tasks. + procedure Print_Dictionary (Filename : in String; Dictionary : in Natools.Smaz.Dictionary; Hash_Package_Name : in String := ""); procedure Print_Dictionary @@ -187,10 +199,13 @@ when Options.Max_Sub_Size => Handler.Max_Sub_Size := Positive'Value (Argument); when Options.Max_Word_Size => Handler.Max_Word_Size := Positive'Value (Argument); + + when Options.Job_Count => + Handler.Job_Count := Natural'Value (Argument); end case; end Option; function Getopt_Config return Getopt.Configuration is @@ -203,10 +218,11 @@ R.Add_Option ("dict", 'D', No_Argument, Dictionary_Input); R.Add_Option ("encode", 'e', No_Argument, Encode); R.Add_Option ("evaluate", 'E', No_Argument, Evaluate); R.Add_Option ("help", 'h', No_Argument, Help); R.Add_Option ("hash-pkg", 'H', Required_Argument, Output_Hash); + R.Add_Option ("jobs", 'j', Required_Argument, Job_Count); R.Add_Option ("sx-dict", 'L', No_Argument, Sx_Dict_Output); R.Add_Option ("min-substring", 'm', Required_Argument, Min_Sub_Size); R.Add_Option ("max-substring", 'M', Required_Argument, Max_Sub_Size); R.Add_Option ("stats", 's', No_Argument, Stat_Output); R.Add_Option ("no-stats", 'S', No_Argument, No_Stat_Output); @@ -216,10 +232,92 @@ R.Add_Option ("no-s-expr", 'X', No_Argument, No_Sx_Output); return R; end Getopt_Config; + + procedure Parallel_Evaluate_Dictionary + (Job_Count : in Positive; + Dict : in Natools.Smaz.Dictionary; + Corpus : in Natools.Smaz.Tools.String_Lists.List; + Compressed_Size : out Ada.Streams.Stream_Element_Count; + Counts : out Natools.Smaz.Tools.Dictionary_Counts) + is + package String_Lists renames Natools.Smaz.Tools.String_Lists; + + type State is record + Position : String_Lists.Cursor; + Compressed_Size : Ada.Streams.Stream_Element_Count; + Counts : Natools.Smaz.Tools.Dictionary_Counts; + end record; + + procedure Initialize_Job + (Global : in out String_Lists.Cursor; + Job : out State); + + procedure Do_Job (Job : in out State); + + procedure Gather_Result + (Global : in out String_Lists.Cursor; + Job : in State); + + function Is_Finished (Global : in String_Lists.Cursor) return Boolean; + + + procedure Initialize_Job + (Global : in out String_Lists.Cursor; + Job : out State) is + begin + Job := (Position => Global, + Compressed_Size => 0, + Counts => (others => 0)); + String_Lists.Next (Global); + end Initialize_Job; + + + procedure Do_Job (Job : in out State) is + begin + Natools.Smaz.Tools.Evaluate_Dictionary_Partial + (Dict, + String_Lists.Element (Job.Position), + Job.Compressed_Size, + Job.Counts); + end Do_Job; + + + procedure Gather_Result + (Global : in out String_Lists.Cursor; + Job : in State) + is + pragma Unreferenced (Global); + use type Ada.Streams.Stream_Element_Count; + use type Natools.Smaz.Tools.String_Count; + begin + Compressed_Size := Compressed_Size + Job.Compressed_Size; + + for I in Counts'Range loop + Counts (I) := Counts (I) + Job.Counts (I); + end loop; + end Gather_Result; + + + function Is_Finished (Global : in String_Lists.Cursor) return Boolean is + begin + return not String_Lists.Has_Element (Global); + end Is_Finished; + + + procedure Parallel_Run is new Natools.Parallelism.Single_Accumulator_Run + (String_Lists.Cursor, State); + + Cursor : String_Lists.Cursor := String_Lists.First (Corpus); + begin + Compressed_Size := 0; + Counts := (others => 0); + Parallel_Run (Cursor, Job_Count); + end Parallel_Evaluate_Dictionary; + procedure Print_Dictionary (Filename : in String; Dictionary : in Natools.Smaz.Dictionary; Hash_Package_Name : in String := "") is @@ -358,10 +456,15 @@ when Options.Evaluate => New_Line (Output); Put_Line (Output, Indent & Indent & "Evaluate the dictionary on the input given corpus"); + + when Options.Job_Count => + New_Line (Output); + Put_Line (Output, Indent & Indent + & "Number of parallel jobs in long calculations"); end case; end loop; end Print_Help; function To_Dictionary @@ -569,12 +672,17 @@ when Actions.Evaluate => declare Total_Size : Ada.Streams.Stream_Element_Count; Counts : Natools.Smaz.Tools.Dictionary_Counts; begin - Natools.Smaz.Tools.Evaluate_Dictionary - (Dictionary, Input_Data, Total_Size, Counts); + if Handler.Job_Count > 0 then + Parallel_Evaluate_Dictionary (Handler.Job_Count, + Dictionary, Input_Data, Total_Size, Counts); + else + Natools.Smaz.Tools.Evaluate_Dictionary + (Dictionary, Input_Data, Total_Size, Counts); + end if; if Handler.Sx_Output then Sx_Output.Open_List; Sx_Output.Append_String (Ada.Strings.Fixed.Trim (Ada.Streams.Stream_Element_Count'Image (Total_Size),