Overview
Comment: | tools/smaz: add support for dictionary generation from a word list |
---|---|
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
88e525eaf4f3f4096d79011c8edeea9c |
User & Date: | nat on 2016-09-29 21:58:48 |
Other Links: | manifest | tags |
Context
2016-09-30
| ||
20:26 | smaz: make Dict_Entry accessor publicly visible check-in: 48bf48d1c0 user: nat tags: trunk | |
2016-09-29
| ||
21:58 | tools/smaz: add support for dictionary generation from a word list check-in: 88e525eaf4 user: nat tags: trunk | |
2016-09-28
| ||
21:25 | smaz-tools: add the simplest dictionary constructor from word counts check-in: fb05dda137 user: nat tags: trunk | |
Changes
Modified tools/smaz.adb from [fcd1ed8809] to [acb9db3271].
︙ | ︙ | |||
35 36 37 38 39 40 41 42 43 44 | package Actions is type Enum is (Nothing, Decode, Encode); end Actions; package Options is type Id is | > > > > > > | > > > | 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | package Actions is type Enum is (Nothing, Decode, Encode); end Actions; package Dict_Sources is type Enum is (S_Expression, Word_List); end Dict_Sources; package Options is type Id is (Output_Ada_Dict, Dictionary_Input, Decode, Encode, Output_Hash, Help, Stat_Output, No_Stat_Output, Word_List_Input, Sx_Output, No_Sx_Output); end Options; package Getopt is new Natools.Getopt_Long (Options.Id); type Callback is new Getopt.Handlers.Callback with record Display_Help : Boolean := False; Need_Dictionary : Boolean := False; Stat_Output : Boolean := False; Sx_Output : Boolean := False; Action : Actions.Enum := Actions.Nothing; Ada_Dictionary : Ada.Strings.Unbounded.Unbounded_String; Hash_Package : Ada.Strings.Unbounded.Unbounded_String; Dict_Source : Dict_Sources.Enum := Dict_Sources.S_Expression; end record; overriding procedure Option (Handler : in out Callback; Id : in Options.Id; Argument : in String); |
︙ | ︙ | |||
90 91 92 93 94 95 96 97 98 99 100 101 102 103 | -- print the given dictionary in the given file procedure Print_Help (Opt : in Getopt.Configuration; Output : in Ada.Text_IO.File_Type); -- Print the help text to the given file overriding procedure Option (Handler : in out Callback; Id : in Options.Id; Argument : in String) is begin case Id is | > > > > > > | 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 | -- print the given dictionary in the given file procedure Print_Help (Opt : in Getopt.Configuration; Output : in Ada.Text_IO.File_Type); -- Print the help text to the given file function To_Dictionary (Handler : in Callback'Class; Input : in Natools.Smaz.Tools.String_Lists.List) return Natools.Smaz.Dictionary; -- Convert the input into a dictionary given the option in Handler overriding procedure Option (Handler : in out Callback; Id : in Options.Id; Argument : in String) is begin case Id is |
︙ | ︙ | |||
114 115 116 117 118 119 120 | when Options.No_Stat_Output => Handler.Stat_Output := False; when Options.No_Sx_Output => Handler.Sx_Output := False; | | > > > > > > | | > | | | | | > | | 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 | when Options.No_Stat_Output => Handler.Stat_Output := False; when Options.No_Sx_Output => Handler.Sx_Output := False; when Options.Output_Ada_Dict => Handler.Need_Dictionary := True; if Argument'Length > 0 then Handler.Ada_Dictionary := Ada.Strings.Unbounded.To_Unbounded_String (Argument); else Handler.Ada_Dictionary := Ada.Strings.Unbounded.To_Unbounded_String ("-"); end if; when Options.Output_Hash => Handler.Need_Dictionary := True; Handler.Hash_Package := Ada.Strings.Unbounded.To_Unbounded_String (Argument); when Options.Stat_Output => Handler.Stat_Output := True; when Options.Sx_Output => Handler.Sx_Output := True; when Options.Dictionary_Input => Handler.Dict_Source := Dict_Sources.S_Expression; when Options.Word_List_Input => Handler.Dict_Source := Dict_Sources.Word_List; end case; end Option; function Getopt_Config return Getopt.Configuration is use Getopt; use Options; R : Getopt.Configuration; begin R.Add_Option ("ada-dict", 'A', Optional_Argument, Output_Ada_Dict); R.Add_Option ("decode", 'd', No_Argument, Decode); R.Add_Option ("dict", 'D', No_Argument, Dictionary_Input); R.Add_Option ("encode", 'e', No_Argument, Encode); R.Add_Option ("help", 'h', No_Argument, Help); R.Add_Option ("hash-pkg", 'H', Required_Argument, Output_Hash); R.Add_Option ("stats", 's', No_Argument, Stat_Output); R.Add_Option ("no-stats", 'S', No_Argument, No_Stat_Output); R.Add_Option ("word-list", 'w', No_Argument, Word_List_Input); R.Add_Option ("s-expr", 'x', No_Argument, Sx_Output); R.Add_Option ("no-s-expr", 'X', No_Argument, No_Sx_Output); return R; end Getopt_Config; procedure Print_Dictionary |
︙ | ︙ | |||
241 242 243 244 245 246 247 | & "Do not output filter statistics"); when Options.No_Sx_Output => New_Line (Output); Put_Line (Output, Indent & Indent & "Do not output filtered results in a S-expression"); | | | 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 | & "Do not output filter statistics"); when Options.No_Sx_Output => New_Line (Output); Put_Line (Output, Indent & Indent & "Do not output filtered results in a S-expression"); when Options.Output_Ada_Dict => Put_Line (Output, "=[filename]"); Put_Line (Output, Indent & Indent & "Output the current dictionary as Ada code in the given"); Put_Line (Output, Indent & Indent & "file, or standard output if filename is ""-"""); when Options.Output_Hash => |
︙ | ︙ | |||
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 | Put_Line (Output, Indent & Indent & "Output filter statistics"); when Options.Sx_Output => New_Line (Output); Put_Line (Output, Indent & Indent & "Output filtered results in a S-expression"); end case; end loop; end Print_Help; Opt_Config : constant Getopt.Configuration := Getopt_Config; Handler : Callback; Input_List, Input_Data : Natools.Smaz.Tools.String_Lists.List; begin Process_Command_Line : begin | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 | Put_Line (Output, Indent & Indent & "Output filter statistics"); when Options.Sx_Output => New_Line (Output); Put_Line (Output, Indent & Indent & "Output filtered results in a S-expression"); when Options.Dictionary_Input => New_Line (Output); Put_Line (Output, Indent & Indent & "Read dictionary directly in input S-expression (default)"); when Options.Word_List_Input => New_Line (Output); Put_Line (Output, Indent & Indent & "Compute dictionary from word list in input S-expression"); end case; end loop; end Print_Help; function To_Dictionary (Handler : in Callback'Class; Input : in Natools.Smaz.Tools.String_Lists.List) return Natools.Smaz.Dictionary is begin case Handler.Dict_Source is when Dict_Sources.S_Expression => return Natools.Smaz.Tools.To_Dictionary (Input, True); when Dict_Sources.Word_List => declare Counter : Natools.Smaz.Tools.Word_Counter; begin for S of Input loop Natools.Smaz.Tools.Add_Substrings (Counter, S, 1, 3); end loop; return Natools.Smaz.Tools.To_Dictionary (Natools.Smaz.Tools.Most_Common_Words (Counter, 254), True); end; end case; end To_Dictionary; Opt_Config : constant Getopt.Configuration := Getopt_Config; Handler : Callback; Input_List, Input_Data : Natools.Smaz.Tools.String_Lists.List; begin Process_Command_Line : begin |
︙ | ︙ | |||
315 316 317 318 319 320 321 | end if; end Read_Input_List; Build_Dictionary : declare Dictionary : Natools.Smaz.Dictionary | | | 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 | end if; end Read_Input_List; Build_Dictionary : declare Dictionary : Natools.Smaz.Dictionary := To_Dictionary (Handler, Input_List); Sx_Output : Natools.S_Expressions.Printers.Canonical (Ada.Text_IO.Text_Streams.Stream (Ada.Text_IO.Current_Output)); Ada_Dictionary : constant String := Ada.Strings.Unbounded.To_String (Handler.Ada_Dictionary); Hash_Package : constant String := Ada.Strings.Unbounded.To_String (Handler.Hash_Package); begin |
︙ | ︙ |