Changeset 85 for trunk/convertdb.rb

Show
Ignore:
Timestamp:
07/07/08 20:39:04 (5 months ago)
Author:
deveiant
Message:

Checkpoint commit

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • trunk/convertdb.rb

    r79 r85  
    3939end 
    4040 
     41require 'pathname' 
    4142require 'strscan' 
    4243require 'wordnet' 
     
    7374 
    7475# Temporary location for the lexicon data files 
    75 BuildDir = File::join( File::dirname(__FILE__), File::basename(WordNet::Lexicon::DefaultDbEnv) ) 
     76BuildDir = Pathname.new( __FILE__ ).expand_path.dirname +  
     77           Pathname.new( WordNet::Lexicon::DefaultDbEnv ).basename 
    7678 
    7779 
     
    9294    # Open the database and check to be sure it's empty. Confirm overwrite if 
    9395    # not. Checkpoint and set up logging proc if debugging. 
    94     if File::exists?( BuildDir ) 
     96    if BuildDir.exist? 
    9597        message ">>> Warning: Existing data in the Ruby-WordNet databases\n"\ 
    9698            "will be overwritten.\n" 
    9799        abort( "user cancelled." ) unless  
    98100            /^y/i =~ promptWithDefault( "Continue?", "n" ) 
    99         FileUtils::rm_rf( BuildDir ) 
     101        BuildDir.rmtree 
    100102    end 
    101103 
     
    117119 
    118120    # Open the lexicon readwrite into the temporary datadir 
    119     FileUtils::mkdir( BuildDir ) 
    120     lexicon = WordNet::Lexicon::new( BuildDir, 0666 ) 
     121    BuildDir.mkpath 
     122    lexicon = WordNet::Lexicon::new( BuildDir.to_s, 0666 ) 
    121123 
    122124    # Process each fileset 
    123125    [     # Fileset,  name,    database handle, processor 
    124         Fileset::new( IndexFiles, "index", lexicon.index_db, method(:parseIndexLine) ), 
    125         Fileset::new( MorphFiles, "morph", lexicon.morph_db, method(:parseMorphLine) ), 
    126         Fileset::new( DataFiles,  "data",  lexicon.data_db,  method(:parseSynsetLine) ), 
    127     ].each {|set| 
     126        Fileset::new( IndexFiles, "index", lexicon.index_db, method(:parse_index_line) ), 
     127        Fileset::new( MorphFiles, "morph", lexicon.morph_db, method(:parse_morph_line) ), 
     128        Fileset::new( DataFiles,  "data",  lexicon.data_db,  method(:parse_synset_line) ), 
     129    ].each do |set| 
    128130        message "Converting %s files...\n" % set.name 
    129131        set.db.truncate 
     
    131133        # Process each file in the set with the appropriate processor method and 
    132134        # insert results into the corresponding table. 
    133         set.files.each {|file,pos| 
     135        set.files.each do |file,pos| 
    134136            message "    #{file}..." 
    135137 
     
    142144            txn, dbh = lexicon.env.txn_begin( 0, set.db ) 
    143145            entries = lineNumber = errors = 0 
    144             File::readlines( filepath ).each {|line| 
     146            File::readlines( filepath ).each do |line| 
    145147                lineNumber += 1 
    146148                next if /^\s/ =~ line 
     
    164166                    txn, dbh = lexicon.env.txn_begin( 0, set.db ) 
    165167                end 
    166             } 
     168            end 
     169             
    167170            message "committing..." 
    168171            txn.commit( BDB::TXN_SYNC ) 
    169172            message "done (%d entries, %d errors).\n" % 
    170173                [ entries, errors ] 
    171         } 
     174        end 
     175 
     176        lock_stats = lexicon.env.lock_stat 
     177        message "Lock statistics:\n" 
     178        puts "  Lock objects: #{lock_stats['st_nobjects']}/#{lock_stats['st_maxnobjects']}", 
     179             "  Locks: #{lock_stats['st_nlocks']}/#{lock_stats['st_maxnlocks']}", 
     180             "  Lockers: #{lock_stats['st_nlockers']}/#{lock_stats['st_maxnlockers']}" 
     181         
    172182 
    173183        message "Checkpointing DB and cleaning logs..." 
     
    175185        lexicon.clean_logs 
    176186        puts "done." 
    177     } 
     187    end 
    178188 
    179189    message "done.\n\n" 
     
    191201### +pos+ argument is not used -- it's just to make the interface between all 
    192202### three processor methods the same. 
    193 def parseIndexLine( string, lineNumber, pos=nil ) 
     203def parse_index_line( string, lineNumber, pos=nil ) 
    194204    $scanner.string = string 
    195205    synsets = [] 
     
    233243 
    234244### "Parse" a morph line and return it as a key and value. 
    235 def parseMorphLine( string, lineNumber, pos ) 
     245def parse_morph_line( string, lineNumber, pos ) 
    236246    key, value = string.split 
    237247    return "#{key}%#{pos}", value 
     
    259269### Parse an entry from a data file and return the key and data. Returns +nil+ 
    260270### if any part of the entry isn't able to be parsed. 
    261 def parseSynsetLine( string, lineNumber, pos ) 
     271def parse_synset_line( string, lineNumber, pos ) 
    262272    $scanner.string = string 
    263273