Changeset 90 for branches/rakefile-work
- Timestamp:
- 07/09/08 16:02:53 (4 months ago)
- Location:
- branches/rakefile-work
- Files:
-
- 6 modified
-
. (modified) (1 prop)
-
Rakefile (modified) (4 diffs)
-
convertdb.rb (modified) (3 diffs)
-
lib/wordnet.rb (modified) (1 diff)
-
lib/wordnet/constants.rb (modified) (4 diffs)
-
lib/wordnet/synset.rb (modified) (12 diffs)
Legend:
- Unmodified
- Added
- Removed
-
branches/rakefile-work
- Property svn:externals
-
old new 1 rake svn://deveiate.org/rake-tasklibs/trunk s1 rake svn://deveiate.org/rake-tasklibs/trunk
-
- Property svn:externals
-
branches/rakefile-work/Rakefile
r87 r90 1 1 #!rake 2 2 # 3 # Ruby-WordNet Rakefile 4 # 5 # Copyright (c) 2008, The FaerieMUD Consortium 3 # WordNet rakefile 4 # 5 # Based on various other Rakefiles, especially one by Ben Bleything 6 # 7 # Copyright (c) 2008 The FaerieMUD Consortium 6 8 # 7 9 # Authors: … … 12 14 require 'pathname' 13 15 basedir = Pathname.new( __FILE__ ).dirname 14 libdir = basedir + 'lib' 16 17 libdir = basedir + "lib" 18 extdir = basedir + "ext" 15 19 16 20 $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s ) 21 $LOAD_PATH.unshift( extdir.to_s ) unless $LOAD_PATH.include?( extdir.to_s ) 17 22 } 18 23 19 24 25 require 'rbconfig' 20 26 require 'rubygems' 21 27 require 'rake' 22 28 require 'rake/rdoctask' 29 require 'rake/testtask' 23 30 require 'rake/packagetask' 24 require 'rake/gempackagetask' 25 require 'pathname' 31 require 'rake/clean' 26 32 27 33 $dryrun = false 28 34 29 # Pathnameconstants35 ### Config constants 30 36 BASEDIR = Pathname.new( __FILE__ ).dirname.relative_path_from( Pathname.getwd ) 31 37 LIBDIR = BASEDIR + 'lib' 38 EXTDIR = BASEDIR + 'ext' 32 39 DOCSDIR = BASEDIR + 'docs' 33 RAKEDIR = BASEDIR + 'rake'34 RDOCDIR = BASEDIR + 'docs'35 40 PKGDIR = BASEDIR + 'pkg' 36 37 41 RAKE_TASKDIR = BASEDIR + 'rake' 38 ARTIFACTS_DIR = Pathname.new( ENV['CC_BUILD_ARTIFACTS'] || '' ) 39 40 TEXT_FILES = %w( Rakefile ChangeLog INSTALL README LICENSE ). 41 collect {|filename| BASEDIR + filename } 42 43 BUILD_FILES = %w( convertdb.rb utils.rb ) 42 43 PKG_NAME = 'wordnet' 44 PKG_SUMMARY = '' 45 VERSION_FILE = LIBDIR + 'wordnet.rb' 46 PKG_VERSION = VERSION_FILE.read[ /VERSION = '(\d+\.\d+\.\d+)'/, 1 ] 47 PKG_FILE_NAME = "#{PKG_NAME.downcase}-#{PKG_VERSION}" 48 GEM_FILE_NAME = "#{PKG_FILE_NAME}.gem" 49 50 RELEASE_NAME = "RELEASE_#{PKG_VERSION.gsub(/\./, '_')}" 51 52 ARTIFACTS_DIR = Pathname.new( ENV['CC_BUILD_ARTIFACTS'] || 'artifacts' ) 53 54 TEXT_FILES = %w( Rakefile ChangeLog README LICENSE ).collect {|filename| BASEDIR + filename } 55 LIB_FILES = Pathname.glob( LIBDIR + '**/*.rb' ).delete_if {|item| item =~ /\.svn/ } 56 EXT_FILES = Pathname.glob( EXTDIR + '**/*.{c,h,rb}' ).delete_if {|item| item =~ /\.svn/ } 44 57 45 58 SPECDIR = BASEDIR + 'spec' 46 SPEC_FILES = Pathname.glob( (SPECDIR + '**/*_spec.rb').to_s ). 47 delete_if {|item| item =~ /\.svn/ } 48 # Ideally, this should be automatically generated. 59 SPEC_FILES = Pathname.glob( SPECDIR + '**/*_spec.rb' ).delete_if {|item| item =~ /\.svn/ } 49 60 SPEC_EXCLUDES = 'spec,/Library/Ruby,/var/lib,/usr/local/lib' 50 61 51 LIB_FILES = Pathname.glob( LIBDIR + '**/*.rb'). 52 delete_if {|item| item =~ /\.svn/ } 53 54 RELEASE_FILES = TEXT_FILES + LIB_FILES + SPEC_FILES 55 62 63 RELEASE_FILES = FileList[ TEXT_FILES + SPEC_FILES + LIB_FILES + EXT_FILES ] 64 65 66 RCOV_OPTS = [ 67 '--exclude', SPEC_EXCLUDES, 68 '--xrefs', 69 '--save', 70 '--callsites' 71 ] 72 73 74 ### Load some task libraries that need to be loaded early 56 75 require RAKE_TASKDIR + 'helpers.rb' 57 58 ### Package constants59 PKG_NAME = 'wordnet'60 PKG_VERSION = find_pattern_in_file( /VERSION = '(\d+\.\d+\.\d+)'/, LIBDIR + 'wordnet.rb' ).first61 PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"62 63 RELEASE_NAME = "REL #{PKG_VERSION}"64 65 ### Load task libraries66 76 require RAKE_TASKDIR + 'svn.rb' 67 77 require RAKE_TASKDIR + 'verifytask.rb' 78 79 # Define some constants that depend on the 'svn' tasklib 80 PKG_BUILD = get_svn_rev( BASEDIR ) || 0 81 SNAPSHOT_PKG_NAME = "#{PKG_FILE_NAME}.#{PKG_BUILD}" 82 SNAPSHOT_GEM_NAME = "#{SNAPSHOT_PKG_NAME}.gem" 83 84 # Documentation constants 85 RDOC_OPTIONS = [ 86 '-w', '4', 87 '-SHN', 88 '-i', '.', 89 '-m', 'README', 90 '-W', 'http://deveiate.org/projects/Ruby-WordNet/browser/trunk/' 91 ] 92 93 # Release constants 94 SMTP_HOST = 'mail.faeriemud.org' 95 SMTP_PORT = 465 # SMTP + SSL 96 97 # Project constants 98 PROJECT_HOST = 'deveiate.org' 99 PROJECT_PUBDIR = "/usr/local/www/public/code" 100 PROJECT_DOCDIR = "#{PROJECT_PUBDIR}/#{PKG_NAME}" 101 PROJECT_SCPURL = "#{PROJECT_HOST}:#{PROJECT_DOCDIR}" 102 103 # Gem dependencies: gemname => version 104 DEPENDENCIES = { 105 # 'mongrel' => '', 106 } 107 108 # Non-gem requirements: packagename => version 109 REQUIREMENTS = { 110 # 'Apache' => '>= 2.2.6', 111 } 112 113 # RubyGem specification 114 GEMSPEC = Gem::Specification.new do |gem| 115 gem.name = PKG_NAME.downcase 116 gem.version = PKG_VERSION 117 118 gem.summary = PKG_SUMMARY 119 gem.description = <<-EOD 120 121 A Ruby implementation of the WordNet lexical dictionary 122 123 EOD 124 125 gem.authors = 'Michael Granger' 126 gem.email = 'ged@FaerieMUD.org' 127 gem.homepage = 'http://deveiate.org/projects/Ruby-WordNet' 128 gem.rubyforge_project = 'deveiate' 129 130 gem.has_rdoc = true 131 gem.rdoc_options = RDOC_OPTIONS 132 133 gem.files = RELEASE_FILES. 134 collect {|f| f.relative_path_from(BASEDIR).to_s } 135 gem.test_files = SPEC_FILES. 136 collect {|f| f.relative_path_from(BASEDIR).to_s } 137 138 DEPENDENCIES.each do |name, version| 139 version = '>= 0' if version.length.zero? 140 gem.add_dependency( name, version ) 141 end 142 143 REQUIREMENTS.each do |name, version| 144 gem.requirements << [ name, version ].compact.join(' ') 145 end 146 end 147 148 149 # Load any remaining task libraries 68 150 Pathname.glob( RAKE_TASKDIR + '*.rb' ).each do |tasklib| 151 RELEASE_FILES.include( tasklib ) 152 69 153 next if tasklib =~ %r{/(helpers|svn|verifytask)\.rb$} 70 154 begin … … 81 165 end 82 166 83 84 if Rake.application.options.trace 85 $trace = true 86 log "$trace is enabled" 87 end 88 89 if Rake.application.options.dryrun 90 $dryrun = true 91 log "$dryrun is enabled" 92 Rake.application.options.dryrun = false 93 end 167 $trace = Rake.application.options.trace ? true : false 168 $dryrun = Rake.application.options.dryrun ? true : false 169 170 # Load any project-specific rules defined in 'Rakefile.local' if it exists 171 LOCAL_RAKEFILE = BASEDIR + 'Rakefile.local' 172 if LOCAL_RAKEFILE.exist? 173 import LOCAL_RAKEFILE 174 RELEASE_FILES.include( LOCAL_RAKEFILE.to_s ) 175 end 176 177 178 ##################################################################### 179 ### T A S K S 180 ##################################################################### 94 181 95 182 ### Default task 96 task :default => [:clean, :spec, : verify, :package]183 task :default => [:clean, :spec, :rdoc, :package] 97 184 98 185 99 186 ### Task: clean 100 desc "Clean pkg, coverage, and rdoc; remove .bak files" 101 task :clean => [ :clobber_rdoc, :clobber_package, :clobber_coverage, :clobber_manual ] do 102 files = FileList['**/*.bak'] 103 files.clear_exclude 104 File.rm( files ) unless files.empty? 105 FileUtils.rm_rf( 'artifacts' ) 106 end 107 108 109 ### Task: docs -- Convenience task for rebuilding dynamic docs, including coverage, api 110 ### docs, and manual 111 task :docs => [ :manual, :coverage, :rdoc ] 112 113 114 ### Task: rdoc 115 Rake::RDocTask.new do |rdoc| 116 rdoc.rdoc_dir = 'docs' 117 rdoc.title = "Ruby WordNet" 118 119 rdoc.options += [ 120 '-w', '4', 121 '-SHN', 122 '-i', 'docs', 123 '-f', 'darkfish', 124 '-m', 'README', 125 '-W', 'http://deveiate.org/projects/Ruby-WordNet/browser/trunk/' 126 ] 127 128 rdoc.rdoc_files.include 'README' 129 rdoc.rdoc_files.include LIB_FILES.collect {|f| f.relative_path_from(BASEDIR).to_s } 130 end 131 132 133 ### Task: gem 134 gemspec = Gem::Specification.new do |gem| 135 pkg_build = get_svn_rev( BASEDIR ) || 0 136 137 gem.name = PKG_NAME 138 gem.version = "%s.%s" % [ PKG_VERSION, pkg_build ] 139 140 gem.summary = "Ruby-WordNet is a Ruby interface to the WordNet® Lexical Database" 141 gem.description = <<-EOD 142 Ruby-WordNet is a Ruby interface to the WordNet® Lexical Database. WordNet 143 is an online lexical reference system whose design is inspired by current 144 psycholinguistic theories of human lexical memory. English nouns, verbs, 145 adjectives and adverbs are organized into synonym sets, each representing 146 one underlying lexical concept. Different relations link the synonym sets. 147 EOD 148 149 gem.authors = "Michael Granger" 150 gem.homepage = "http://deveiate.org/projects/Ruby-WordNet/" 151 152 gem.has_rdoc = true 153 154 gem.files = RELEASE_FILES. 155 collect {|f| f.relative_path_from(BASEDIR).to_s } 156 gem.test_files = SPEC_FILES. 157 collect {|f| f.relative_path_from(BASEDIR).to_s } 158 159 gem.add_dependency( 'sequel' ) 160 end 161 Rake::GemPackageTask.new( gemspec ) do |task| 162 task.gem_spec = gemspec 163 task.need_tar = false 164 task.need_tar_gz = true 165 task.need_tar_bz2 = true 166 task.need_zip = true 167 end 168 169 170 ### Task: install 171 task :install_gem => [:package] do 172 $stderr.puts 173 installer = Gem::Installer.new( %{pkg/#{PKG_FILE_NAME}.gem} ) 174 installer.install 175 end 176 177 ### Task: uninstall 178 task :uninstall_gem => [:clean] do 179 uninstaller = Gem::Uninstaller.new( PKG_FILE_NAME ) 180 uninstaller.uninstall 181 end 182 187 CLEAN.include 'coverage' 188 CLOBBER.include 'artifacts', 'coverage.info', PKGDIR 183 189 184 190 185 191 ### Cruisecontrol task 186 192 desc "Cruisecontrol build" 187 task :cruise => [:clean, :coverage, :package] do |task|193 task :cruise => [:clean, 'coverage:verify', :package] do |task| 188 194 raise "Artifacts dir not set." if ARTIFACTS_DIR.to_s.empty? 189 195 artifact_dir = ARTIFACTS_DIR.cleanpath … … 198 204 199 205 200 ### RSpec tasks201 begin202 gem 'rspec', '>= 1.1.3'203 require 'spec/rake/spectask'204 205 COMMON_SPEC_OPTS = ['-c', '-f', 's']206 207 ### Task: spec208 Spec::Rake::SpecTask.new( :spec ) do |task|209 task.spec_files = SPEC_FILES210 task.libs += [LIBDIR]211 task.spec_opts = COMMON_SPEC_OPTS212 end213 task :test => [:spec]214 215 216 namespace :spec do217 desc "Run rspec every time there's a change to one of the files"218 task :autotest do219 require 'autotest/rspec'220 221 autotester = Autotest::Rspec.new222 autotester.exceptions = %r{\.svn|\.skel}223 autotester.run224 end225 226 227 desc "Generate HTML output for a spec run"228 Spec::Rake::SpecTask.new( :html ) do |task|229 task.spec_files = SPEC_FILES230 task.spec_opts = ['-f','h', '-D']231 end232 233 desc "Generate plain-text output for a CruiseControl.rb build"234 Spec::Rake::SpecTask.new( :text ) do |task|235 task.spec_files = SPEC_FILES236 task.spec_opts = ['-f','p']237 end238 end239 rescue LoadError => err240 task :no_rspec do241 $stderr.puts "Testing tasks not defined: RSpec rake tasklib not available: %s" %242 [ err.message ]243 end244 245 task :spec => :no_rspec246 namespace :spec do247 task :autotest => :no_rspec248 task :html => :no_rspec249 task :text => :no_rspec250 end251 end252 253 254 ### RCov (via RSpec) tasks255 begin256 gem 'rcov'257 gem 'rspec', '>= 1.1.3'258 259 RCOV_OPTS = ['--exclude', SPEC_EXCLUDES, '--xrefs', '--save']260 261 ### Task: coverage (via RCov)262 ### Task: spec263 desc "Build test coverage reports"264 Spec::Rake::SpecTask.new( :coverage ) do |task|265 task.spec_files = SPEC_FILES266 task.libs += [LIBDIR]267 task.spec_opts = ['-f', 'p', '-b']268 task.rcov_opts = RCOV_OPTS269 task.rcov = true270 end271 272 task :rcov => [:coverage] do; end273 274 ### Other coverage tasks275 namespace :coverage do276 desc "Generate a detailed text coverage report"277 Spec::Rake::SpecTask.new( :text ) do |task|278 task.spec_files = SPEC_FILES279 task.rcov_opts = RCOV_OPTS + ['--text-report']280 task.rcov = true281 end282 283 desc "Show differences in coverage from last run"284 Spec::Rake::SpecTask.new( :diff ) do |task|285 task.spec_files = SPEC_FILES286 task.rcov_opts = ['--text-coverage-diff']287 task.rcov = true288 end289 290 ### Task: verify coverage291 desc "Build coverage statistics"292 VerifyTask.new( :verify => :rcov ) do |task|293 task.threshold = 85.0294 end295 296 desc "Run RCov in 'spec-only' mode to check coverage from specs"297 Spec::Rake::SpecTask.new( :speconly ) do |task|298 task.spec_files = SPEC_FILES299 task.rcov_opts = ['--exclude', SPEC_EXCLUDES, '--text-report', '--save']300 task.rcov = true301 end302 end303 304 rescue LoadError => err305 task :no_rcov do306 $stderr.puts "Coverage tasks not defined: RSpec+RCov tasklib not available: %s" %307 [ err.message ]308 end309 310 task :coverage => :no_rcov311 task :clobber_coverage312 task :rcov => :no_rcov313 namespace :coverage do314 task :text => :no_rcov315 task :diff => :no_rcov316 end317 task :verify => :no_rcov318 end319 320 321 322 ### Coding style checks and fixes323 namespace :style do324 325 BLANK_LINE = /^\s*$/326 GOOD_INDENT = /^(\t\s*)?\S/327 328 # A list of the files that have legitimate leading whitespace, etc.329 PROBLEM_FILES = []330 331 desc "Check source files for inconsistent indent and fix them"332 task :fix_indent do333 files = LIB_FILES + SPEC_FILES334 335 badfiles = Hash.new {|h,k| h[k] = [] }336 337 trace "Checking files for indentation"338 files.each do |file|339 if PROBLEM_FILES.include?( file )340 trace " skipping problem file #{file}..."341 next342 end343 344 trace " #{file}"345 linecount = 0346 file.each_line do |line|347 linecount += 1348 349 # Skip blank lines350 next if line =~ BLANK_LINE351 352 # If there's a line with incorrect indent, note it and skip to the353 # next file354 if line !~ GOOD_INDENT355 trace " Bad line %d: %p" % [ linecount, line ]356 badfiles[file] << [ linecount, line ]357 end358 end359 end360 361 if badfiles.empty?362 log "No indentation problems found."363 else364 log "Found incorrect indent in #{badfiles.length} files:\n "365 badfiles.each do |file, badlines|366 log " #{file}:\n" +367 " " + badlines.collect {|badline| "%5d: %p" % badline }.join( "\n " )368 end369 end370 end371 372 end373 374 -
branches/rakefile-work/convertdb.rb
r87 r90 50 50 $scanner = StringScanner::new( "" ) 51 51 52 # Source WordNet files 53 IndexFiles = %w[ index.noun index.verb index.adj index.adv ] 54 MorphFiles = { 55 'adj.exc' => WordNet::Adjective, 56 'adv.exc' => WordNet::Adverb, 57 'noun.exc' => WordNet::Noun, 58 'verb.exc' => WordNet::Verb, 59 'cousin.exc' => '', 60 } 61 DataFiles = { 62 'data.adj' => WordNet::Adjective, 63 'data.adv' => WordNet::Adverb, 64 'data.noun' => WordNet::Noun, 65 'data.verb' => WordNet::Verb, 66 } 67 68 # Struct which represents a list of files, a database, and a processor function 69 # for moving records from each of the files into the database. 70 Fileset = Struct::new( "WordNetFileset", :files, :name, :db, :processor ) 71 72 # How many records to insert between commits 73 CommitThreshold = 2000 74 75 # Temporary location for the lexicon data files 76 BuildDir = Pathname.new( __FILE__ ).expand_path.dirname + 77 Pathname.new( WordNet::Lexicon::DEFAULT_DB_ENV ).basename 78 79 80 81 ##################################################################### 82 ### M A I N P R O G R A M 83 ##################################################################### 84 def convertdb( errorLimit=0 ) 85 $stderr.sync = $stdout.sync = true 86 header "WordNet Lexicon Converter" 87 88 # Make sure the user knows what they're in for 89 message "This program will convert WordNet data files into databases\n"\ 90 "used by Ruby-WordNet. This will not affect existing WordNet files,\n"\ 91 "but will require up to 40Mb of disk space.\n" 92 exit unless /^y/i =~ prompt_with_default("Continue?", "y") 93 94 # Open the database and check to be sure it's empty. Confirm overwrite if 95 # not. Checkpoint and set up logging proc if debugging. 96 if BuildDir.exist? 97 message ">>> Warning: Existing data in the Ruby-WordNet databases\n"\ 98 "will be overwritten.\n" 99 abort( "user cancelled." ) unless 100 /^y/i =~ prompt_with_default( "Continue?", "n" ) 101 BuildDir.rmtree 52 class WordNetConverter 53 54 # Source WordNet files 55 IndexFiles = %w[ index.noun index.verb index.adj index.adv ] 56 MorphFiles = { 57 'adj.exc' => WordNet::Adjective, 58 'adv.exc' => WordNet::Adverb, 59 'noun.exc' => WordNet::Noun, 60 'verb.exc' => WordNet::Verb, 61 'cousin.exc' => '', 62 } 63 DataFiles = { 64 'data.adj' => WordNet::Adjective, 65 'data.adv' => WordNet::Adverb, 66 'data.noun' => WordNet::Noun, 67 'data.verb' => WordNet::Verb, 68 } 69 70 # Struct which represents a list of files, a database, and a processor function 71 # for moving records from each of the files into the database. 72 Fileset = Struct::new( "WordNetFileset", :files, :name, :db, :processor ) 73 74 # How many records to insert between commits 75 CommitThreshold = 2000 76 77 # Temporary location for the lexicon data files 78 BuildDir = Pathname.new( __FILE__ ).expand_path.dirname + 79 Pathname.new( WordNet::Lexicon::DEFAULT_DB_ENV ).basename 80 81 82 ### Create a new converter that will dump WordNet dictionary files into a BerkeleyDB 83 ### in the given +builddir+ 84 def initialize( builddir=BuildDir ) 85 @builddir = Pathname.new( builddir ) 102 86 end 103 104 # Find the source data files 105 if ARGV.empty? 87 88 89 ### Convert the various dict files from the WordNet project into a BerkeleyDB database 90 def convertdb( errorLimit=0 ) 91 $stderr.sync = $stdout.sync = true 92 header "WordNet Lexicon Converter" 93 94 # Make sure the user knows what they're in for 95 message "This program will convert WordNet data files into databases\n"\ 96 "used by Ruby-WordNet. This will not affect existing WordNet files,\n"\ 97 "but will require up to 40Mb of disk space.\n" 98 exit unless /^y/i =~ prompt_with_default("Continue?", "y") 99 100 # Open the database and check to be sure it's empty. Confirm overwrite if 101 # not. Checkpoint and set up logging proc if debugging. 102 if @builddir.exist? && @builddir.entries.include?( 'data' ) 103 message ">>> Warning: Existing data in the Ruby-WordNet databases\n"\ 104 "will be overwritten.\n" 105 abort( "user cancelled." ) unless 106 /^y/i =~ prompt_with_default( "Continue?", "n" ) 107 @builddir.rmtree 108 end 109 110 # Find the source data files 106 111 default = nil 107 108 if wndirs = Pathname.glob( Pathname.getwd + 'WordNet-*' )112 wndirs = Pathname.glob( Pathname.getwd + 'WordNet-*' ) 113 unless wndirs.empty? 109 114 default = wndirs.first 110 115 else … … 114 119 # :TODO: Do some more intelligent searching here 115 120 message "Where can I find the WordNet data files?\n" 116 datadir = prompt_with_default( "Data directory", default + "dict" ) 117 else 118 datadir = ARGV.shift 119 end 120 datadir = Pathname.new( datadir ) 121 122 abort( "Directory '#{datadir}' does not exist" ) unless datadir.exist? 123 abort( "'#{datadir}' is not a directory" ) unless datadir.directory? 124 testfile = datadir + "data.noun" 125 abort( "'#{datadir}' doesn't seem to contain the necessary files.") unless testfile.exist? 126 127 # Open the lexicon readwrite into the temporary datadir 128 BuildDir.mkpath 129 lexicon = WordNet::Lexicon::new( BuildDir.to_s, 0666 ) 130 131 # Process each fileset 132 [ # Fileset, name, database handle, processor 133 Fileset::new( IndexFiles, "index", lexicon.index_db, method(:parse_index_line) ), 134 Fileset::new( MorphFiles, "morph", lexicon.morph_db, method(:parse_morph_line) ), 135 Fileset::new( DataFiles, "data", lexicon.data_db, method(:parse_synset_line) ), 136 ].each do |set| 137 message "Converting %s files...\n" % set.name 138 set.db.truncate 139 140 # Process each file in the set with the appropriate processor method and 141 # insert results into the corresponding table. 142 set.files.each do |file,pos| 143 message " #{file}..." 144 145 filepath = File::join( datadir, file ) 146 if !File::exists?( filepath ) 147 message "missing: skipped\n" 148 next 149 end 150 151 txn, dbh = lexicon.env.txn_begin( 0, set.db ) 152 entries = lineNumber = errors = 0 153 File::readlines( filepath ).each do |line| 154 lineNumber += 1 155 next if /^\s/ =~ line 156 157 key, value = set.processor.call( line.chomp, lineNumber, pos ) 158 unless key 159 errors += 1 160 if errorLimit.nonzero? && errors >= errorLimit 161 abort( "Too many errors" ) 121 datadir = prompt_with_default( "Data directory", default + "/dict" ) 122 datadir = Pathname.new( datadir ) 123 124 abort( "Directory '#{datadir}' does not exist" ) unless datadir.exist? 125 abort( "'#{datadir}' is not a directory" ) unless datadir.directory? 126 testfile = datadir + "data.noun" 127 abort( "'#{datadir}' doesn't seem to contain the necessary files.") unless testfile.exist? 128 129 # Open the lexicon readwrite into the temporary datadir 130 @builddir.mkpath 131 lexicon = WordNet::Lexicon::new( @builddir.to_s, 0666 ) 132 133 # Process each fileset 134 [ # Fileset, name, database handle, processor 135 Fileset::new( IndexFiles, "index", lexicon.index_db, method(:parse_index_line) ), 136 Fileset::new( MorphFiles, "morph", lexicon.morph_db, method(:parse_morph_line) ), 137 Fileset::new( DataFiles, "data", lexicon.data_db, method(:parse_synset_line) ), 138 ].each do |set| 139 message "Converting %s files...\n" % set.name 140 set.db.truncate 141 142 # Process each file in the set with the appropriate processor method and 143 # insert results into the corresponding table. 144 set.files.each do |file,pos| 145 message " #{file}..." 146 147 filepath = File::join( datadir, file ) 148 if !File::exists?( filepath ) 149 message "missing: skipped\n" 150 next 151 end 152 153 txn, dbh = lexicon.env.txn_begin( 0, set.db ) 154 entries = lineNumber = errors = 0 155 File::readlines( filepath ).each do |line| 156 lineNumber += 1 157 next if /^\s/ =~ line 158 159 key, value = set.processor.call( line.chomp, lineNumber, pos ) 160 unless key 161 errors += 1 162 if errorLimit.nonzero? && errors >= errorLimit 163 abort( "Too many errors" ) 164 end 165 end 166 167 dbh[ key ] = value 168 entries += 1 169 print "%d%s" % [ entries, "\x08" * entries.to_s.length ] 170 171 # Commit and start a new transaction every 1000 recor
