Changeset 90

Show
Ignore:
Timestamp:
07/09/08 16:02:53 (3 months ago)
Author:
deveiant
Message:

Checkpoint -- conversion of database files and basic rake infrastructure from rake-tasklibs external
working.

Location:
branches/rakefile-work
Files:
6 modified

Legend:

Unmodified
Added
Removed
  • branches/rakefile-work

    • Property svn:externals
      •  

        old new  
        1 rake svn://deveiate.org/rake-tasklibs/trunks 
         1rake svn://deveiate.org/rake-tasklibs/trunk 
  • branches/rakefile-work/Rakefile

    r87 r90  
    11#!rake 
    22# 
    3 # Ruby-WordNet Rakefile 
    4 # 
    5 # Copyright (c) 2008, The FaerieMUD Consortium 
     3# WordNet rakefile 
     4# 
     5# Based on various other Rakefiles, especially one by Ben Bleything 
     6# 
     7# Copyright (c) 2008 The FaerieMUD Consortium 
    68# 
    79# Authors: 
     
    1214    require 'pathname' 
    1315    basedir = Pathname.new( __FILE__ ).dirname 
    14     libdir = basedir + 'lib' 
     16 
     17    libdir = basedir + "lib" 
     18    extdir = basedir + "ext" 
    1519 
    1620    $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s ) 
     21    $LOAD_PATH.unshift( extdir.to_s ) unless $LOAD_PATH.include?( extdir.to_s ) 
    1722} 
    1823 
    1924 
     25require 'rbconfig' 
    2026require 'rubygems' 
    2127require 'rake' 
    2228require 'rake/rdoctask' 
     29require 'rake/testtask' 
    2330require 'rake/packagetask' 
    24 require 'rake/gempackagetask' 
    25 require 'pathname' 
     31require 'rake/clean' 
    2632 
    2733$dryrun = false 
    2834 
    29 # Pathname constants 
     35### Config constants 
    3036BASEDIR       = Pathname.new( __FILE__ ).dirname.relative_path_from( Pathname.getwd ) 
    3137LIBDIR        = BASEDIR + 'lib' 
     38EXTDIR        = BASEDIR + 'ext' 
    3239DOCSDIR       = BASEDIR + 'docs' 
    33 RAKEDIR       = BASEDIR + 'rake' 
    34 RDOCDIR       = BASEDIR + 'docs' 
    3540PKGDIR        = BASEDIR + 'pkg' 
    36  
    3741RAKE_TASKDIR  = BASEDIR + 'rake' 
    38 ARTIFACTS_DIR = Pathname.new( ENV['CC_BUILD_ARTIFACTS'] || '' ) 
    39  
    40 TEXT_FILES    = %w( Rakefile ChangeLog INSTALL README LICENSE ). 
    41     collect {|filename| BASEDIR + filename } 
    42  
    43 BUILD_FILES   = %w( convertdb.rb utils.rb ) 
     42 
     43PKG_NAME      = 'wordnet' 
     44PKG_SUMMARY   = '' 
     45VERSION_FILE  = LIBDIR + 'wordnet.rb' 
     46PKG_VERSION   = VERSION_FILE.read[ /VERSION = '(\d+\.\d+\.\d+)'/, 1 ] 
     47PKG_FILE_NAME = "#{PKG_NAME.downcase}-#{PKG_VERSION}" 
     48GEM_FILE_NAME = "#{PKG_FILE_NAME}.gem" 
     49 
     50RELEASE_NAME  = "RELEASE_#{PKG_VERSION.gsub(/\./, '_')}" 
     51 
     52ARTIFACTS_DIR = Pathname.new( ENV['CC_BUILD_ARTIFACTS'] || 'artifacts' ) 
     53 
     54TEXT_FILES    = %w( Rakefile ChangeLog README LICENSE ).collect {|filename| BASEDIR + filename } 
     55LIB_FILES     = Pathname.glob( LIBDIR + '**/*.rb' ).delete_if {|item| item =~ /\.svn/ } 
     56EXT_FILES     = Pathname.glob( EXTDIR + '**/*.{c,h,rb}' ).delete_if {|item| item =~ /\.svn/ } 
    4457 
    4558SPECDIR       = BASEDIR + 'spec' 
    46 SPEC_FILES    = Pathname.glob( (SPECDIR + '**/*_spec.rb').to_s ). 
    47     delete_if {|item| item =~ /\.svn/ } 
    48 # Ideally, this should be automatically generated. 
     59SPEC_FILES    = Pathname.glob( SPECDIR + '**/*_spec.rb' ).delete_if {|item| item =~ /\.svn/ } 
    4960SPEC_EXCLUDES = 'spec,/Library/Ruby,/var/lib,/usr/local/lib' 
    5061 
    51 LIB_FILES     = Pathname.glob( LIBDIR + '**/*.rb'). 
    52     delete_if {|item| item =~ /\.svn/ } 
    53  
    54 RELEASE_FILES = TEXT_FILES + LIB_FILES + SPEC_FILES 
    55  
     62 
     63RELEASE_FILES = FileList[ TEXT_FILES + SPEC_FILES + LIB_FILES + EXT_FILES ] 
     64 
     65 
     66RCOV_OPTS = [ 
     67    '--exclude', SPEC_EXCLUDES, 
     68    '--xrefs', 
     69    '--save', 
     70    '--callsites' 
     71  ] 
     72 
     73 
     74### Load some task libraries that need to be loaded early 
    5675require RAKE_TASKDIR + 'helpers.rb' 
    57  
    58 ### Package constants 
    59 PKG_NAME      = 'wordnet' 
    60 PKG_VERSION   = find_pattern_in_file( /VERSION = '(\d+\.\d+\.\d+)'/, LIBDIR + 'wordnet.rb' ).first 
    61 PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}" 
    62  
    63 RELEASE_NAME  = "REL #{PKG_VERSION}" 
    64  
    65 ### Load task libraries 
    6676require RAKE_TASKDIR + 'svn.rb' 
    6777require RAKE_TASKDIR + 'verifytask.rb' 
     78 
     79# Define some constants that depend on the 'svn' tasklib 
     80PKG_BUILD = get_svn_rev( BASEDIR ) || 0 
     81SNAPSHOT_PKG_NAME = "#{PKG_FILE_NAME}.#{PKG_BUILD}" 
     82SNAPSHOT_GEM_NAME = "#{SNAPSHOT_PKG_NAME}.gem" 
     83 
     84# Documentation constants 
     85RDOC_OPTIONS = [ 
     86    '-w', '4', 
     87    '-SHN', 
     88    '-i', '.', 
     89    '-m', 'README', 
     90    '-W', 'http://deveiate.org/projects/Ruby-WordNet/browser/trunk/' 
     91  ] 
     92 
     93# Release constants 
     94SMTP_HOST = 'mail.faeriemud.org' 
     95SMTP_PORT = 465 # SMTP + SSL 
     96 
     97# Project constants 
     98PROJECT_HOST = 'deveiate.org' 
     99PROJECT_PUBDIR = "/usr/local/www/public/code" 
     100PROJECT_DOCDIR = "#{PROJECT_PUBDIR}/#{PKG_NAME}" 
     101PROJECT_SCPURL = "#{PROJECT_HOST}:#{PROJECT_DOCDIR}" 
     102 
     103# Gem dependencies: gemname => version 
     104DEPENDENCIES = { 
     105#   'mongrel'       => '', 
     106} 
     107 
     108# Non-gem requirements: packagename => version 
     109REQUIREMENTS = { 
     110#   'Apache'  => '>= 2.2.6', 
     111} 
     112 
     113# RubyGem specification 
     114GEMSPEC   = Gem::Specification.new do |gem| 
     115    gem.name              = PKG_NAME.downcase 
     116    gem.version           = PKG_VERSION 
     117 
     118    gem.summary           = PKG_SUMMARY 
     119    gem.description       = <<-EOD 
     120 
     121    A Ruby implementation of the WordNet lexical dictionary 
     122 
     123    EOD 
     124 
     125    gem.authors           = 'Michael Granger' 
     126    gem.email             = 'ged@FaerieMUD.org' 
     127    gem.homepage          = 'http://deveiate.org/projects/Ruby-WordNet' 
     128    gem.rubyforge_project = 'deveiate' 
     129 
     130    gem.has_rdoc          = true 
     131    gem.rdoc_options      = RDOC_OPTIONS 
     132 
     133    gem.files             = RELEASE_FILES. 
     134        collect {|f| f.relative_path_from(BASEDIR).to_s } 
     135    gem.test_files        = SPEC_FILES. 
     136        collect {|f| f.relative_path_from(BASEDIR).to_s } 
     137         
     138    DEPENDENCIES.each do |name, version| 
     139        version = '>= 0' if version.length.zero? 
     140        gem.add_dependency( name, version ) 
     141    end 
     142     
     143    REQUIREMENTS.each do |name, version| 
     144        gem.requirements << [ name, version ].compact.join(' ') 
     145    end 
     146end 
     147 
     148 
     149# Load any remaining task libraries 
    68150Pathname.glob( RAKE_TASKDIR + '*.rb' ).each do |tasklib| 
     151    RELEASE_FILES.include( tasklib ) 
     152 
    69153    next if tasklib =~ %r{/(helpers|svn|verifytask)\.rb$} 
    70154    begin 
     
    81165end 
    82166 
    83  
    84 if Rake.application.options.trace 
    85     $trace = true 
    86     log "$trace is enabled" 
    87 end 
    88  
    89 if Rake.application.options.dryrun 
    90     $dryrun = true 
    91     log "$dryrun is enabled" 
    92     Rake.application.options.dryrun = false 
    93 end 
     167$trace = Rake.application.options.trace ? true : false 
     168$dryrun = Rake.application.options.dryrun ? true : false 
     169 
     170# Load any project-specific rules defined in 'Rakefile.local' if it exists 
     171LOCAL_RAKEFILE = BASEDIR + 'Rakefile.local' 
     172if LOCAL_RAKEFILE.exist? 
     173    import LOCAL_RAKEFILE  
     174    RELEASE_FILES.include( LOCAL_RAKEFILE.to_s ) 
     175end 
     176 
     177 
     178##################################################################### 
     179### T A S K S    
     180##################################################################### 
    94181 
    95182### Default task 
    96 task :default  => [:clean, :spec, :verify, :package] 
     183task :default  => [:clean, :spec, :rdoc, :package] 
    97184 
    98185 
    99186### Task: clean 
    100 desc "Clean pkg, coverage, and rdoc; remove .bak files" 
    101 task :clean => [ :clobber_rdoc, :clobber_package, :clobber_coverage, :clobber_manual ] do 
    102     files = FileList['**/*.bak'] 
    103     files.clear_exclude 
    104     File.rm( files ) unless files.empty? 
    105     FileUtils.rm_rf( 'artifacts' ) 
    106 end 
    107  
    108  
    109 ### Task: docs -- Convenience task for rebuilding dynamic docs, including coverage, api  
    110 ### docs, and manual 
    111 task :docs => [ :manual, :coverage, :rdoc ] 
    112  
    113  
    114 ### Task: rdoc 
    115 Rake::RDocTask.new do |rdoc| 
    116     rdoc.rdoc_dir = 'docs' 
    117     rdoc.title    = "Ruby WordNet" 
    118  
    119     rdoc.options += [ 
    120         '-w', '4', 
    121         '-SHN', 
    122         '-i', 'docs', 
    123         '-f', 'darkfish', 
    124         '-m', 'README', 
    125         '-W', 'http://deveiate.org/projects/Ruby-WordNet/browser/trunk/' 
    126       ] 
    127      
    128     rdoc.rdoc_files.include 'README' 
    129     rdoc.rdoc_files.include LIB_FILES.collect {|f| f.relative_path_from(BASEDIR).to_s } 
    130 end 
    131  
    132  
    133 ### Task: gem 
    134 gemspec = Gem::Specification.new do |gem| 
    135     pkg_build = get_svn_rev( BASEDIR ) || 0 
    136      
    137     gem.name        = PKG_NAME 
    138     gem.version     = "%s.%s" % [ PKG_VERSION, pkg_build ] 
    139  
    140     gem.summary     = "Ruby-WordNet is a Ruby interface to the WordNet® Lexical Database" 
    141     gem.description = <<-EOD 
    142     Ruby-WordNet is a Ruby interface to the WordNet® Lexical Database. WordNet  
    143     is an online lexical reference system whose design is inspired by current  
    144     psycholinguistic theories of human lexical memory. English nouns, verbs,  
    145     adjectives and adverbs are organized into synonym sets, each representing  
    146     one underlying lexical concept. Different relations link the synonym sets. 
    147     EOD 
    148  
    149     gem.authors     = "Michael Granger" 
    150     gem.homepage    = "http://deveiate.org/projects/Ruby-WordNet/" 
    151  
    152     gem.has_rdoc    = true 
    153  
    154     gem.files       = RELEASE_FILES. 
    155         collect {|f| f.relative_path_from(BASEDIR).to_s } 
    156     gem.test_files  = SPEC_FILES. 
    157         collect {|f| f.relative_path_from(BASEDIR).to_s } 
    158  
    159     gem.add_dependency( 'sequel' ) 
    160 end 
    161 Rake::GemPackageTask.new( gemspec ) do |task| 
    162     task.gem_spec = gemspec 
    163     task.need_tar = false 
    164     task.need_tar_gz = true 
    165     task.need_tar_bz2 = true 
    166     task.need_zip = true 
    167 end 
    168  
    169  
    170 ### Task: install 
    171 task :install_gem => [:package] do 
    172     $stderr.puts  
    173     installer = Gem::Installer.new( %{pkg/#{PKG_FILE_NAME}.gem} ) 
    174     installer.install 
    175 end 
    176  
    177 ### Task: uninstall 
    178 task :uninstall_gem => [:clean] do 
    179     uninstaller = Gem::Uninstaller.new( PKG_FILE_NAME ) 
    180     uninstaller.uninstall 
    181 end 
    182  
     187CLEAN.include 'coverage' 
     188CLOBBER.include 'artifacts', 'coverage.info', PKGDIR 
    183189 
    184190 
    185191### Cruisecontrol task 
    186192desc "Cruisecontrol build" 
    187 task :cruise => [:clean, :coverage, :package] do |task| 
     193task :cruise => [:clean, 'coverage:verify', :package] do |task| 
    188194    raise "Artifacts dir not set." if ARTIFACTS_DIR.to_s.empty? 
    189195    artifact_dir = ARTIFACTS_DIR.cleanpath 
     
    198204 
    199205 
    200 ### RSpec tasks 
    201 begin 
    202     gem 'rspec', '>= 1.1.3' 
    203     require 'spec/rake/spectask' 
    204  
    205     COMMON_SPEC_OPTS = ['-c', '-f', 's'] 
    206  
    207     ### Task: spec 
    208     Spec::Rake::SpecTask.new( :spec ) do |task| 
    209         task.spec_files = SPEC_FILES 
    210         task.libs += [LIBDIR] 
    211         task.spec_opts = COMMON_SPEC_OPTS 
    212     end 
    213     task :test => [:spec] 
    214  
    215  
    216     namespace :spec do 
    217         desc "Run rspec every time there's a change to one of the files" 
    218         task :autotest do 
    219             require 'autotest/rspec' 
    220  
    221             autotester = Autotest::Rspec.new 
    222             autotester.exceptions = %r{\.svn|\.skel} 
    223             autotester.run 
    224         end 
    225  
    226      
    227         desc "Generate HTML output for a spec run" 
    228         Spec::Rake::SpecTask.new( :html ) do |task| 
    229             task.spec_files = SPEC_FILES 
    230             task.spec_opts = ['-f','h', '-D'] 
    231         end 
    232  
    233         desc "Generate plain-text output for a CruiseControl.rb build" 
    234         Spec::Rake::SpecTask.new( :text ) do |task| 
    235             task.spec_files = SPEC_FILES 
    236             task.spec_opts = ['-f','p'] 
    237         end 
    238     end 
    239 rescue LoadError => err 
    240     task :no_rspec do 
    241         $stderr.puts "Testing tasks not defined: RSpec rake tasklib not available: %s" % 
    242             [ err.message ] 
    243     end 
    244      
    245     task :spec => :no_rspec 
    246     namespace :spec do 
    247         task :autotest => :no_rspec 
    248         task :html => :no_rspec 
    249         task :text => :no_rspec 
    250     end 
    251 end 
    252  
    253  
    254 ### RCov (via RSpec) tasks 
    255 begin 
    256     gem 'rcov' 
    257     gem 'rspec', '>= 1.1.3' 
    258  
    259     RCOV_OPTS = ['--exclude', SPEC_EXCLUDES, '--xrefs', '--save'] 
    260  
    261     ### Task: coverage (via RCov) 
    262     ### Task: spec 
    263     desc "Build test coverage reports" 
    264     Spec::Rake::SpecTask.new( :coverage ) do |task| 
    265         task.spec_files = SPEC_FILES 
    266         task.libs += [LIBDIR] 
    267         task.spec_opts = ['-f', 'p', '-b'] 
    268         task.rcov_opts = RCOV_OPTS 
    269         task.rcov = true 
    270     end 
    271      
    272     task :rcov => [:coverage] do; end 
    273      
    274     ### Other coverage tasks 
    275     namespace :coverage do 
    276         desc "Generate a detailed text coverage report" 
    277         Spec::Rake::SpecTask.new( :text ) do |task| 
    278             task.spec_files = SPEC_FILES 
    279             task.rcov_opts = RCOV_OPTS + ['--text-report'] 
    280             task.rcov = true 
    281         end 
    282  
    283         desc "Show differences in coverage from last run" 
    284         Spec::Rake::SpecTask.new( :diff ) do |task| 
    285             task.spec_files = SPEC_FILES 
    286             task.rcov_opts = ['--text-coverage-diff'] 
    287             task.rcov = true 
    288         end 
    289  
    290         ### Task: verify coverage 
    291         desc "Build coverage statistics" 
    292         VerifyTask.new( :verify => :rcov ) do |task| 
    293             task.threshold = 85.0 
    294         end 
    295          
    296         desc "Run RCov in 'spec-only' mode to check coverage from specs" 
    297         Spec::Rake::SpecTask.new( :speconly ) do |task| 
    298             task.spec_files = SPEC_FILES 
    299             task.rcov_opts = ['--exclude', SPEC_EXCLUDES, '--text-report', '--save'] 
    300             task.rcov = true 
    301         end 
    302     end 
    303  
    304 rescue LoadError => err 
    305     task :no_rcov do 
    306         $stderr.puts "Coverage tasks not defined: RSpec+RCov tasklib not available: %s" % 
    307             [ err.message ] 
    308     end 
    309  
    310     task :coverage => :no_rcov 
    311     task :clobber_coverage 
    312     task :rcov => :no_rcov 
    313     namespace :coverage do 
    314         task :text => :no_rcov 
    315         task :diff => :no_rcov 
    316     end 
    317     task :verify => :no_rcov 
    318 end 
    319  
    320  
    321  
    322 ### Coding style checks and fixes 
    323 namespace :style do 
    324      
    325     BLANK_LINE = /^\s*$/ 
    326     GOOD_INDENT = /^(\t\s*)?\S/ 
    327  
    328     # A list of the files that have legitimate leading whitespace, etc. 
    329     PROBLEM_FILES = [] 
    330      
    331     desc "Check source files for inconsistent indent and fix them" 
    332     task :fix_indent do 
    333         files = LIB_FILES + SPEC_FILES 
    334  
    335         badfiles = Hash.new {|h,k| h[k] = [] } 
    336          
    337         trace "Checking files for indentation" 
    338         files.each do |file| 
    339             if PROBLEM_FILES.include?( file ) 
    340                 trace "  skipping problem file #{file}..." 
    341                 next 
    342             end 
    343              
    344             trace "  #{file}" 
    345             linecount = 0 
    346             file.each_line do |line| 
    347                 linecount += 1 
    348                  
    349                 # Skip blank lines 
    350                 next if line =~ BLANK_LINE 
    351                  
    352                 # If there's a line with incorrect indent, note it and skip to the  
    353                 # next file 
    354                 if line !~ GOOD_INDENT 
    355                     trace "    Bad line %d: %p" % [ linecount, line ] 
    356                     badfiles[file] << [ linecount, line ] 
    357                 end 
    358             end 
    359         end 
    360  
    361         if badfiles.empty? 
    362             log "No indentation problems found." 
    363         else 
    364             log "Found incorrect indent in #{badfiles.length} files:\n  " 
    365             badfiles.each do |file, badlines| 
    366                 log "  #{file}:\n" + 
    367                     "    " + badlines.collect {|badline| "%5d: %p" % badline }.join( "\n    " ) 
    368             end 
    369         end 
    370     end 
    371  
    372 end 
    373  
    374  
  • branches/rakefile-work/convertdb.rb

    r87 r90  
    5050$scanner = StringScanner::new( "" ) 
    5151 
    52 # Source WordNet files 
    53 IndexFiles = %w[ index.noun index.verb index.adj index.adv ] 
    54 MorphFiles = { 
    55     'adj.exc'       => WordNet::Adjective, 
    56     'adv.exc'       => WordNet::Adverb, 
    57     'noun.exc'      => WordNet::Noun, 
    58     'verb.exc'      => WordNet::Verb, 
    59     'cousin.exc'    => '', 
    60 } 
    61 DataFiles =  { 
    62     'data.adj'      => WordNet::Adjective, 
    63     'data.adv'      => WordNet::Adverb, 
    64     'data.noun'     => WordNet::Noun, 
    65     'data.verb'     => WordNet::Verb, 
    66 } 
    67  
    68 # Struct which represents a list of files, a database, and a processor function 
    69 # for moving records from each of the files into the database. 
    70 Fileset = Struct::new( "WordNetFileset", :files, :name, :db, :processor ) 
    71  
    72 # How many records to insert between commits 
    73 CommitThreshold = 2000 
    74  
    75 # Temporary location for the lexicon data files 
    76 BuildDir = Pathname.new( __FILE__ ).expand_path.dirname +  
    77            Pathname.new( WordNet::Lexicon::DEFAULT_DB_ENV ).basename 
    78  
    79  
    80  
    81 ##################################################################### 
    82 ### M A I N   P R O G R A M 
    83 ##################################################################### 
    84 def convertdb( errorLimit=0 ) 
    85     $stderr.sync = $stdout.sync = true 
    86     header "WordNet Lexicon Converter" 
    87  
    88     # Make sure the user knows what they're in for 
    89     message "This program will convert WordNet data files into databases\n"\ 
    90         "used by Ruby-WordNet. This will not affect existing WordNet files,\n"\ 
    91         "but will require up to 40Mb of disk space.\n" 
    92     exit unless /^y/i =~ prompt_with_default("Continue?", "y") 
    93  
    94     # Open the database and check to be sure it's empty. Confirm overwrite if 
    95     # not. Checkpoint and set up logging proc if debugging. 
    96     if BuildDir.exist? 
    97         message ">>> Warning: Existing data in the Ruby-WordNet databases\n"\ 
    98             "will be overwritten.\n" 
    99         abort( "user cancelled." ) unless  
    100             /^y/i =~ prompt_with_default( "Continue?", "n" ) 
    101         BuildDir.rmtree 
     52class WordNetConverter 
     53 
     54    # Source WordNet files 
     55    IndexFiles = %w[ index.noun index.verb index.adj index.adv ] 
     56    MorphFiles = { 
     57        'adj.exc'       => WordNet::Adjective, 
     58        'adv.exc'       => WordNet::Adverb, 
     59        'noun.exc'      => WordNet::Noun, 
     60        'verb.exc'      => WordNet::Verb, 
     61        'cousin.exc'    => '', 
     62    } 
     63    DataFiles =  { 
     64        'data.adj'      => WordNet::Adjective, 
     65        'data.adv'      => WordNet::Adverb, 
     66        'data.noun'     => WordNet::Noun, 
     67        'data.verb'     => WordNet::Verb, 
     68    } 
     69 
     70    # Struct which represents a list of files, a database, and a processor function 
     71    # for moving records from each of the files into the database. 
     72    Fileset = Struct::new( "WordNetFileset", :files, :name, :db, :processor ) 
     73 
     74    # How many records to insert between commits 
     75    CommitThreshold = 2000 
     76 
     77    # Temporary location for the lexicon data files 
     78    BuildDir = Pathname.new( __FILE__ ).expand_path.dirname +  
     79               Pathname.new( WordNet::Lexicon::DEFAULT_DB_ENV ).basename 
     80 
     81 
     82    ### Create a new converter that will dump WordNet dictionary files into a BerkeleyDB  
     83    ### in the given +builddir+  
     84    def initialize( builddir=BuildDir ) 
     85        @builddir = Pathname.new( builddir ) 
    10286    end 
    103  
    104     # Find the source data files 
    105     if ARGV.empty? 
     87     
     88 
     89    ### Convert the various dict files from the WordNet project into a BerkeleyDB database 
     90    def convertdb( errorLimit=0 ) 
     91        $stderr.sync = $stdout.sync = true 
     92        header "WordNet Lexicon Converter" 
     93 
     94        # Make sure the user knows what they're in for 
     95        message "This program will convert WordNet data files into databases\n"\ 
     96            "used by Ruby-WordNet. This will not affect existing WordNet files,\n"\ 
     97            "but will require up to 40Mb of disk space.\n" 
     98        exit unless /^y/i =~ prompt_with_default("Continue?", "y") 
     99 
     100        # Open the database and check to be sure it's empty. Confirm overwrite if 
     101        # not. Checkpoint and set up logging proc if debugging. 
     102        if @builddir.exist? && @builddir.entries.include?( 'data' ) 
     103            message ">>> Warning: Existing data in the Ruby-WordNet databases\n"\ 
     104                "will be overwritten.\n" 
     105            abort( "user cancelled." ) unless  
     106                /^y/i =~ prompt_with_default( "Continue?", "n" ) 
     107            @builddir.rmtree 
     108        end 
     109 
     110        # Find the source data files 
    106111        default = nil 
    107          
    108         if wndirs = Pathname.glob( Pathname.getwd + 'WordNet-*' ) 
     112        wndirs = Pathname.glob( Pathname.getwd + 'WordNet-*' ) 
     113        unless wndirs.empty? 
    109114            default = wndirs.first 
    110115        else 
     
    114119        # :TODO: Do some more intelligent searching here 
    115120        message "Where can I find the WordNet data files?\n" 
    116         datadir = prompt_with_default( "Data directory", default + "dict" ) 
    117     else 
    118         datadir = ARGV.shift 
    119     end 
    120     datadir = Pathname.new( datadir ) 
    121  
    122     abort( "Directory '#{datadir}' does not exist" ) unless datadir.exist? 
    123     abort( "'#{datadir}' is not a directory" ) unless datadir.directory? 
    124     testfile = datadir + "data.noun" 
    125     abort( "'#{datadir}' doesn't seem to contain the necessary files.") unless testfile.exist? 
    126  
    127     # Open the lexicon readwrite into the temporary datadir 
    128     BuildDir.mkpath 
    129     lexicon = WordNet::Lexicon::new( BuildDir.to_s, 0666 ) 
    130  
    131     # Process each fileset 
    132     [     # Fileset,  name,    database handle, processor 
    133         Fileset::new( IndexFiles, "index", lexicon.index_db, method(:parse_index_line) ), 
    134         Fileset::new( MorphFiles, "morph", lexicon.morph_db, method(:parse_morph_line) ), 
    135         Fileset::new( DataFiles,  "data",  lexicon.data_db,  method(:parse_synset_line) ), 
    136     ].each do |set| 
    137         message "Converting %s files...\n" % set.name 
    138         set.db.truncate 
    139  
    140         # Process each file in the set with the appropriate processor method and 
    141         # insert results into the corresponding table. 
    142         set.files.each do |file,pos| 
    143             message "    #{file}..." 
    144  
    145             filepath = File::join( datadir, file ) 
    146             if !File::exists?( filepath ) 
    147                 message "missing: skipped\n" 
    148                 next 
    149             end 
    150  
    151             txn, dbh = lexicon.env.txn_begin( 0, set.db ) 
    152             entries = lineNumber = errors = 0 
    153             File::readlines( filepath ).each do |line| 
    154                 lineNumber += 1 
    155                 next if /^\s/ =~ line 
    156  
    157                 key, value = set.processor.call( line.chomp, lineNumber, pos ) 
    158                 unless key 
    159                     errors += 1 
    160                     if errorLimit.nonzero? && errors >= errorLimit 
    161                         abort( "Too many errors" ) 
     121        datadir = prompt_with_default( "Data directory", default + "/dict" ) 
     122        datadir = Pathname.new( datadir ) 
     123 
     124        abort( "Directory '#{datadir}' does not exist" ) unless datadir.exist? 
     125        abort( "'#{datadir}' is not a directory" ) unless datadir.directory? 
     126        testfile = datadir + "data.noun" 
     127        abort( "'#{datadir}' doesn't seem to contain the necessary files.") unless testfile.exist? 
     128 
     129        # Open the lexicon readwrite into the temporary datadir 
     130        @builddir.mkpath 
     131        lexicon = WordNet::Lexicon::new( @builddir.to_s, 0666 ) 
     132 
     133        # Process each fileset 
     134        [     # Fileset,  name,    database handle, processor 
     135            Fileset::new( IndexFiles, "index", lexicon.index_db, method(:parse_index_line) ), 
     136            Fileset::new( MorphFiles, "morph", lexicon.morph_db, method(:parse_morph_line) ), 
     137            Fileset::new( DataFiles,  "data",  lexicon.data_db,  method(:parse_synset_line) ), 
     138        ].each do |set| 
     139            message "Converting %s files...\n" % set.name 
     140            set.db.truncate 
     141 
     142            # Process each file in the set with the appropriate processor method and 
     143            # insert results into the corresponding table. 
     144            set.files.each do |file,pos| 
     145                message "    #{file}..." 
     146 
     147                filepath = File::join( datadir, file ) 
     148                if !File::exists?( filepath ) 
     149                    message "missing: skipped\n" 
     150                    next 
     151                end 
     152 
     153                txn, dbh = lexicon.env.txn_begin( 0, set.db ) 
     154                entries = lineNumber = errors = 0 
     155                File::readlines( filepath ).each do |line| 
     156                    lineNumber += 1 
     157                    next if /^\s/ =~ line 
     158 
     159                    key, value = set.processor.call( line.chomp, lineNumber, pos ) 
     160                    unless key 
     161                        errors += 1 
     162                        if errorLimit.nonzero? && errors >= errorLimit 
     163                            abort( "Too many errors" ) 
     164                        end 
     165                    end 
     166 
     167                    dbh[ key ] = value 
     168                    entries += 1 
     169                    print "%d%s" % [ entries, "\x08" * entries.to_s.length ] 
     170 
     171                    # Commit and start a new transaction every 1000 records 
     172                    if (entries % CommitThreshold).zero?