Changeset 85
- Timestamp:
- 07/07/08 20:39:04 (3 months ago)
- Location:
- trunk
- Files:
-
- 7 modified
-
WordNet Project.tmproj (modified) (5 diffs)
-
convertdb.rb (modified) (11 diffs)
-
install.rb (modified) (1 diff)
-
lib/wordnet/constants.rb (modified) (3 diffs)
-
lib/wordnet/synset.rb (modified) (2 diffs)
-
test.rb (modified) (3 diffs)
-
utils.rb (modified) (34 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/WordNet Project.tmproj
r83 r85 1 1 <?xml version="1.0" encoding="UTF-8"?> 2 <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">2 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> 3 3 <plist version="1.0"> 4 4 <dict> 5 5 <key>currentDocument</key> 6 <string> INSTALL</string>6 <string>install.rb</string> 7 7 <key>documents</key> 8 8 <array> … … 14 14 <key>regexFolderFilter</key> 15 15 <string>!.*/(\.[^/]*|CVS|_darcs|_MTN|\{arch\}|blib|.*~\.nib|.*\.(framework|app|pbproj|pbxproj|xcode(proj)?|bundle))$</string> 16 <key>selected</key> 17 <true/> 16 18 <key>sourceDirectory</key> 17 19 <string></string> … … 55 57 <dict> 56 58 <key>column</key> 57 <integer>37</integer> 58 <key>line</key> 59 <integer>198</integer> 59 <integer>0</integer> 60 <key>line</key> 61 <integer>121</integer> 62 </dict> 63 <key>firstVisibleColumn</key> 64 <integer>0</integer> 65 <key>firstVisibleLine</key> 66 <integer>66</integer> 67 </dict> 68 <key>install.rb</key> 69 <dict> 70 <key>caret</key> 71 <dict> 72 <key>column</key> 73 <integer>0</integer> 74 <key>line</key> 75 <integer>49</integer> 76 </dict> 77 <key>firstVisibleColumn</key> 78 <integer>0</integer> 79 <key>firstVisibleLine</key> 80 <integer>0</integer> 81 </dict> 82 <key>lib/wordnet.rb</key> 83 <dict> 84 <key>caret</key> 85 <dict> 86 <key>column</key> 87 <integer>0</integer> 88 <key>line</key> 89 <integer>0</integer> 90 </dict> 91 <key>firstVisibleColumn</key> 92 <integer>0</integer> 93 <key>firstVisibleLine</key> 94 <integer>2</integer> 95 </dict> 96 <key>lib/wordnet/constants.rb</key> 97 <dict> 98 <key>caret</key> 99 <dict> 100 <key>column</key> 101 <integer>0</integer> 102 <key>line</key> 103 <integer>0</integer> 104 </dict> 105 <key>firstVisibleColumn</key> 106 <integer>0</integer> 107 <key>firstVisibleLine</key> 108 <integer>20</integer> 109 </dict> 110 <key>lib/wordnet/lexicon.rb</key> 111 <dict> 112 <key>caret</key> 113 <dict> 114 <key>column</key> 115 <integer>19</integer> 116 <key>line</key> 117 <integer>201</integer> 60 118 </dict> 61 119 <key>columnSelection</key> … … 64 122 <integer>0</integer> 65 123 <key>firstVisibleLine</key> 66 <integer> 0</integer>124 <integer>160</integer> 67 125 <key>selectFrom</key> 68 126 <dict> 69 127 <key>column</key> 70 <integer> 30</integer>71 <key>line</key> 72 <integer> 198</integer>128 <integer>15</integer> 129 <key>line</key> 130 <integer>201</integer> 73 131 </dict> 74 132 <key>selectTo</key> 75 133 <dict> 76 134 <key>column</key> 77 <integer>37</integer> 78 <key>line</key> 79 <integer>198</integer> 80 </dict> 81 </dict> 82 <key>install.rb</key> 83 <dict> 84 <key>caret</key> 85 <dict> 86 <key>column</key> 87 <integer>0</integer> 88 <key>line</key> 89 <integer>0</integer> 90 </dict> 91 <key>firstVisibleColumn</key> 92 <integer>0</integer> 93 <key>firstVisibleLine</key> 94 <integer>0</integer> 95 </dict> 96 <key>lib/wordnet/lexicon.rb</key> 97 <dict> 98 <key>caret</key> 99 <dict> 100 <key>column</key> 101 <integer>0</integer> 102 <key>line</key> 103 <integer>0</integer> 104 </dict> 105 <key>firstVisibleColumn</key> 106 <integer>0</integer> 107 <key>firstVisibleLine</key> 108 <integer>62</integer> 135 <integer>19</integer> 136 <key>line</key> 137 <integer>201</integer> 138 </dict> 139 </dict> 140 <key>lib/wordnet/synset.rb</key> 141 <dict> 142 <key>caret</key> 143 <dict> 144 <key>column</key> 145 <integer>11</integer> 146 <key>line</key> 147 <integer>135</integer> 148 </dict> 149 <key>columnSelection</key> 150 <false/> 151 <key>firstVisibleColumn</key> 152 <integer>0</integer> 153 <key>firstVisibleLine</key> 154 <integer>571</integer> 155 <key>selectFrom</key> 156 <dict> 157 <key>column</key> 158 <integer>6</integer> 159 <key>line</key> 160 <integer>135</integer> 161 </dict> 162 <key>selectTo</key> 163 <dict> 164 <key>column</key> 165 <integer>11</integer> 166 <key>line</key> 167 <integer>135</integer> 168 </dict> 169 </dict> 170 <key>utils.rb</key> 171 <dict> 172 <key>caret</key> 173 <dict> 174 <key>column</key> 175 <integer>0</integer> 176 <key>line</key> 177 <integer>35</integer> 178 </dict> 179 <key>columnSelection</key> 180 <false/> 181 <key>firstVisibleColumn</key> 182 <integer>0</integer> 183 <key>firstVisibleLine</key> 184 <integer>0</integer> 185 <key>selectFrom</key> 186 <dict> 187 <key>column</key> 188 <integer>0</integer> 189 <key>line</key> 190 <integer>4</integer> 191 </dict> 192 <key>selectTo</key> 193 <dict> 194 <key>column</key> 195 <integer>0</integer> 196 <key>line</key> 197 <integer>35</integer> 198 </dict> 109 199 </dict> 110 200 </dict> … … 113 203 <string>INSTALL</string> 114 204 <string>install.rb</string> 205 <string>utils.rb</string> 115 206 <string>convertdb.rb</string> 207 <string>lib/wordnet/constants.rb</string> 208 <string>lib/wordnet.rb</string> 209 <string>lib/wordnet/synset.rb</string> 116 210 <string>lib/wordnet/lexicon.rb</string> 117 211 <string>README</string> 118 212 </array> 119 213 <key>showFileHierarchyDrawer</key> 120 <true/> 214 <false/> 215 <key>treeState</key> 216 <dict> 217 <key>WordNet</key> 218 <dict> 219 <key>isExpanded</key> 220 <true/> 221 <key>subItems</key> 222 <dict> 223 <key>lib</key> 224 <dict> 225 <key>isExpanded</key> 226 <true/> 227 <key>subItems</key> 228 <dict> 229 <key>wordnet</key> 230 <dict> 231 <key>isExpanded</key> 232 <true/> 233 <key>subItems</key> 234 <dict/> 235 </dict> 236 </dict> 237 </dict> 238 </dict> 239 </dict> 240 </dict> 121 241 <key>windowFrame</key> 122 <string>{{ 395, 87}, {1010, 1491}}</string>242 <string>{{796, 51}, {1356, 1527}}</string> 123 243 </dict> 124 244 </plist> -
trunk/convertdb.rb
r79 r85 39 39 end 40 40 41 require 'pathname' 41 42 require 'strscan' 42 43 require 'wordnet' … … 73 74 74 75 # Temporary location for the lexicon data files 75 BuildDir = File::join( File::dirname(__FILE__), File::basename(WordNet::Lexicon::DefaultDbEnv) ) 76 BuildDir = Pathname.new( __FILE__ ).expand_path.dirname + 77 Pathname.new( WordNet::Lexicon::DefaultDbEnv ).basename 76 78 77 79 … … 92 94 # Open the database and check to be sure it's empty. Confirm overwrite if 93 95 # not. Checkpoint and set up logging proc if debugging. 94 if File::exists?( BuildDir )96 if BuildDir.exist? 95 97 message ">>> Warning: Existing data in the Ruby-WordNet databases\n"\ 96 98 "will be overwritten.\n" 97 99 abort( "user cancelled." ) unless 98 100 /^y/i =~ promptWithDefault( "Continue?", "n" ) 99 FileUtils::rm_rf( BuildDir )101 BuildDir.rmtree 100 102 end 101 103 … … 117 119 118 120 # Open the lexicon readwrite into the temporary datadir 119 FileUtils::mkdir( BuildDir )120 lexicon = WordNet::Lexicon::new( BuildDir , 0666 )121 BuildDir.mkpath 122 lexicon = WordNet::Lexicon::new( BuildDir.to_s, 0666 ) 121 123 122 124 # Process each fileset 123 125 [ # Fileset, name, database handle, processor 124 Fileset::new( IndexFiles, "index", lexicon.index_db, method(:parse IndexLine) ),125 Fileset::new( MorphFiles, "morph", lexicon.morph_db, method(:parse MorphLine) ),126 Fileset::new( DataFiles, "data", lexicon.data_db, method(:parse SynsetLine) ),127 ].each {|set|126 Fileset::new( IndexFiles, "index", lexicon.index_db, method(:parse_index_line) ), 127 Fileset::new( MorphFiles, "morph", lexicon.morph_db, method(:parse_morph_line) ), 128 Fileset::new( DataFiles, "data", lexicon.data_db, method(:parse_synset_line) ), 129 ].each do |set| 128 130 message "Converting %s files...\n" % set.name 129 131 set.db.truncate … … 131 133 # Process each file in the set with the appropriate processor method and 132 134 # insert results into the corresponding table. 133 set.files.each {|file,pos|135 set.files.each do |file,pos| 134 136 message " #{file}..." 135 137 … … 142 144 txn, dbh = lexicon.env.txn_begin( 0, set.db ) 143 145 entries = lineNumber = errors = 0 144 File::readlines( filepath ).each {|line|146 File::readlines( filepath ).each do |line| 145 147 lineNumber += 1 146 148 next if /^\s/ =~ line … … 164 166 txn, dbh = lexicon.env.txn_begin( 0, set.db ) 165 167 end 166 } 168 end 169 167 170 message "committing..." 168 171 txn.commit( BDB::TXN_SYNC ) 169 172 message "done (%d entries, %d errors).\n" % 170 173 [ entries, errors ] 171 } 174 end 175 176 lock_stats = lexicon.env.lock_stat 177 message "Lock statistics:\n" 178 puts " Lock objects: #{lock_stats['st_nobjects']}/#{lock_stats['st_maxnobjects']}", 179 " Locks: #{lock_stats['st_nlocks']}/#{lock_stats['st_maxnlocks']}", 180 " Lockers: #{lock_stats['st_nlockers']}/#{lock_stats['st_maxnlockers']}" 181 172 182 173 183 message "Checkpointing DB and cleaning logs..." … … 175 185 lexicon.clean_logs 176 186 puts "done." 177 }187 end 178 188 179 189 message "done.\n\n" … … 191 201 ### +pos+ argument is not used -- it's just to make the interface between all 192 202 ### three processor methods the same. 193 def parse IndexLine( string, lineNumber, pos=nil )203 def parse_index_line( string, lineNumber, pos=nil ) 194 204 $scanner.string = string 195 205 synsets = [] … … 233 243 234 244 ### "Parse" a morph line and return it as a key and value. 235 def parse MorphLine( string, lineNumber, pos )245 def parse_morph_line( string, lineNumber, pos ) 236 246 key, value = string.split 237 247 return "#{key}%#{pos}", value … … 259 269 ### Parse an entry from a data file and return the key and data. Returns +nil+ 260 270 ### if any part of the entry isn't able to be parsed. 261 def parse SynsetLine( string, lineNumber, pos )271 def parse_synset_line( string, lineNumber, pos ) 262 272 $scanner.string = string 263 273 -
trunk/install.rb
r75 r85 6 6 # Thanks to Masatoshi SEKI for ideas found in his install.rb. 7 7 # 8 # Copyright (c) 2001-2005 The FaerieMUD Consortium. 9 # 10 # This is free software. You may use, modify, and/or redistribute this 11 # software under the terms of the Perl Artistic License. (See 12 # http://language.perl.com/misc/Artistic.html) 13 # 8 # Copyright (c) 2001-2008, The FaerieMUD Consortium. 9 # 10 # All rights reserved. 11 # 12 # Redistribution and use in source and binary forms, with or without modification, are 13 # permitted provided that the following conditions are met: 14 # 15 # * Redistributions of source code must retain the above copyright notice, this 16 # list of conditions and the following disclaimer. 17 # 18 # * Redistributions in binary form must reproduce the above copyright notice, this 19 # list of conditions and the following disclaimer in the documentation and/or 20 # other materials provided with the distribution. 21 # 22 # * Neither the name of FaerieMUD, nor the names of its contributors may be used to 23 # endorse or promote products derived from this software without specific prior 24 # written permission. 25 # 26 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 30 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 31 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 32 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 33 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 34 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 35 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 36 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37 # 38 14 39 15 40 BEGIN { -
trunk/lib/wordnet/constants.rb
r79 r85 8 8 # E.g., 9 9 # 10 # WordNet::Adjective == WordNet::Constants::Adjective10 # WordNet::Adjective == WordNet::Constants::Adjective 11 11 # 12 12 # If you do: 13 # include WordNet::Constants13 # include WordNet::Constants 14 14 # 15 15 # then: 16 # Adjective == WordNet::Adjective16 # Adjective == WordNet::Adjective 17 17 # 18 18 # == Synopsis 19 19 # 20 20 # require 'wordnet' 21 # include WordNet::Constants22 # 23 # lex = WordNet::Lexicon::new24 # origins = lex.lookup_synsets( "shoe", Noun )21 # include WordNet::Constants 22 # 23 # lex = WordNet::Lexicon::new 24 # origins = lex.lookup_synsets( "shoe", Noun ) 25 25 # 26 26 # == Authors … … 44 44 module WordNet 45 45 46 ### Constant-container module 47 module Constants 48 49 # Synset syntactic-category names -> indicators 50 SyntacticCategories = { 51 :noun => "n", 52 :verb => "v", 53 :adjective => "a", 54 :adverb => "r", 55 :other => "s", 56 } 57 # Syntactic-category indicators -> names 58 SyntacticSymbols = SyntacticCategories.invert 59 60 # Map the categories into their own constants (eg., Noun) 61 SyntacticCategories.each {|sym,val| 62 cname = sym.to_s.capitalize 63 const_set( cname, val ) 64 } 46 ### Constant-container module 47 module Constants 48 49 # Synset syntactic-category names -> indicators 50 # From: senseidx(5WN) 51 SyntacticCategories = { 52 :noun => "n", 53 :verb => "v", 54 :adjective => "a", 55 :adverb => "r", 56 :adjective_satellite => "s", 57 } 58 # Syntactic-category indicators -> names 59 SyntacticSymbols = SyntacticCategories.invert 60 61 # Map the categories into their own constants (eg., Noun) 62 SyntacticCategories.each {|sym,val| 63 cname = sym.to_s.capitalize 64 const_set( cname, val ) 65 } 65 66 66 67 # Information about pointer types is contained in the wninput(5WN) 67 68 # manpage. 68 69 69 # Synset pointer typenames -> indicators70 PointerTypes = {71 :antonym=> '!',72 :hypernym=> '@',73 :entailment=> '*',74 :hyponym=> '~',75 :meronym=> '%',76 :holonym=> '#',77 :cause=> '>',78 :verb_group=> %{$},79 :similar_to=> '&',80 :participle=> '<',81 :pertainym=> '\\',82 :attribute=> '=',83 :derived_from=> '\\',84 :see_also=> '^',85 :derivation=> '+',86 :domain=> ';',87 :member=> '-',88 }89 90 # Synset pointer indicator -> typename91 PointerSymbols = PointerTypes.invert92 93 # Map the pointer types into their own symbols (eg., VerbGroup)94 PointerTypes.each {|sym,val|95 cname = sym.to_s[0,1].upcase + sym.to_s[1..-1]96 const_set( cname, val )97 }70 # Synset pointer typenames -> indicators 71 PointerTypes = { 72 :antonym => '!', 73 :hypernym => '@', 74 :entailment => '*', 75 :hyponym => '~', 76 :meronym => '%', 77 :holonym => '#', 78 :cause => '>', 79 :verb_group => %{$}, 80 :similar_to => '&', 81 :participle => '<', 82 :pertainym => '\\', 83 :attribute => '=', 84 :derived_from => '\\', 85 :see_also => '^', 86 :derivation => '+', 87 :domain => ';', 88 :member => '-', 89 } 90 91 # Synset pointer indicator -> typename 92 PointerSymbols = PointerTypes.invert 93 94 # Map the pointer types into their own symbols (eg., VerbGroup) 95 PointerTypes.each {|sym,val| 96 cname = sym.to_s[0,1].upcase + sym.to_s[1..-1] 97 const_set( cname, val ) 98 } 98 99 99 100 # Hypernym synset pointer types … … 115 116 HyponymSymbols = HyponymTypes.invert 116 117 117 # Meronym synset pointer types118 MeronymTypes = {119 :member=> '%m',120 :stuff=> '%s',121 :portion=> '%o',122 :component=> '%p',123 :feature=> '%f',124 :phase=> '%a',125 :place=> '%l',126 }127 128 # Meronym indicator -> type map129 MeronymSymbols = MeronymTypes.invert130 131 # Map the meronym types into their own constants (eg., MemberMeronym)132 MeronymTypes.each {|sym,val|133 cname = sym.to_s.capitalize + "Meronym"134 const_set( cname, val )135 }136 137 # Holonym synset pointer types138 HolonymTypes = {139 :member=> '#m',140 :stuff=> '#s',141 :portion=> '#o',142 :component=> '#p',143 :feature=> '#f',144 :phase=> '#a',145 :place=> '#l',146 }147 148 # Holonym indicator -> type map149 HolonymSymbols = HolonymTypes.invert150 151 # Map the holonym types into their own constants (eg., MemberHolonym)152 HolonymTypes.each {|sym,val|153 cname = sym.to_s.capitalize + "Holonym"154 const_set( cname, val )155 }156 157 # Domain synset pointer types158 DomainTypes = {159 :category=> ';c',160 :region=> ';r',161 :usage=> ';u',162 }163 164 # Domain indicator -> type map165 DomainSymbols = DomainTypes.invert166 167 # Map the domain types into their own constants (eg., CategoryDomain)168 DomainTypes.each {|sym,val|169 cname = sym.to_s.capitalize + "Domain"170 const_set( cname, val )171 }172 173 # Member synset pointer types174 MemberTypes = {175 :category=> '-c',176 :region=> '-r',177 :usage=> '-u',178 }179 180 # Member indicator -> type map181 MemberSymbols = MemberTypes.invert182 183
