| 1 | |
|---|
| 2 | = English Ruby Linguistics Module - Synopsis |
|---|
| 3 | |
|---|
| 4 | This is an overview of the functionality currently in the English functions of |
|---|
| 5 | the Ruby Linguistics module as of version 0.02: |
|---|
| 6 | |
|---|
| 7 | |
|---|
| 8 | == Pluralization |
|---|
| 9 | |
|---|
| 10 | require 'linguistics' |
|---|
| 11 | Linguistics::use( :en ) # extends Array, String, and Numeric |
|---|
| 12 | |
|---|
| 13 | "box".en.plural |
|---|
| 14 | # => "boxes" |
|---|
| 15 | |
|---|
| 16 | "mouse".en.plural |
|---|
| 17 | # => "mice" |
|---|
| 18 | |
|---|
| 19 | "ruby".en.plural |
|---|
| 20 | # => "rubies" |
|---|
| 21 | |
|---|
| 22 | |
|---|
| 23 | == Indefinite Articles |
|---|
| 24 | |
|---|
| 25 | "book".en.a |
|---|
| 26 | # => "a book" |
|---|
| 27 | |
|---|
| 28 | "article".en.a |
|---|
| 29 | # => "an article" |
|---|
| 30 | |
|---|
| 31 | |
|---|
| 32 | == Present Participles |
|---|
| 33 | |
|---|
| 34 | "runs".en.present_participle |
|---|
| 35 | # => "running" |
|---|
| 36 | |
|---|
| 37 | "eats".en.present_participle |
|---|
| 38 | # => "eating" |
|---|
| 39 | |
|---|
| 40 | "spies".en.present_participle |
|---|
| 41 | # => "spying" |
|---|
| 42 | |
|---|
| 43 | |
|---|
| 44 | == Ordinal Numbers |
|---|
| 45 | |
|---|
| 46 | 5.en.ordinal |
|---|
| 47 | # => "5th" |
|---|
| 48 | |
|---|
| 49 | 2004.en.ordinal |
|---|
| 50 | # => "2004th" |
|---|
| 51 | |
|---|
| 52 | |
|---|
| 53 | == Numbers to Words |
|---|
| 54 | |
|---|
| 55 | 5.en.numwords |
|---|
| 56 | # => "five" |
|---|
| 57 | |
|---|
| 58 | 2004.en.numwords |
|---|
| 59 | # => "two thousand and four" |
|---|
| 60 | |
|---|
| 61 | 2385762345876.en.numwords |
|---|
| 62 | # => "two trillion, three hundred and eighty-five billion, |
|---|
| 63 | seven hundred and sixty-two million, three hundred and |
|---|
| 64 | forty-five thousand, eight hundred and seventy-six" |
|---|
| 65 | |
|---|
| 66 | |
|---|
| 67 | == Quantification |
|---|
| 68 | |
|---|
| 69 | "cow".en.quantify( 5 ) |
|---|
| 70 | # => "several cows" |
|---|
| 71 | |
|---|
| 72 | "cow".en.quantify( 1005 ) |
|---|
| 73 | # => "thousands of cows" |
|---|
| 74 | |
|---|
| 75 | "cow".en.quantify( 20_432_123_000_000 ) |
|---|
| 76 | # => "tens of trillions of cows" |
|---|
| 77 | |
|---|
| 78 | |
|---|
| 79 | == Conjunctions |
|---|
| 80 | |
|---|
| 81 | animals = %w{dog cow ox chicken goose goat cow dog rooster llama |
|---|
| 82 | pig goat dog cat cat dog cow goat goose goose ox alpaca} |
|---|
| 83 | puts "The farm has: " + animals.en.conjunction |
|---|
| 84 | |
|---|
| 85 | # => The farm has: four dogs, three cows, three geese, three goats, |
|---|
| 86 | two oxen, two cats, a chicken, a rooster, a llama, a pig, |
|---|
| 87 | and an alpaca |
|---|
| 88 | |
|---|
| 89 | Note that 'goose' and 'ox' are both correctly pluralized, and the correct |
|---|
| 90 | indefinite article 'an' has been used for 'alpaca'. |
|---|
| 91 | |
|---|
| 92 | You can also use the generalization function of the #quantify method to give |
|---|
| 93 | general descriptions of object lists instead of literal counts: |
|---|
| 94 | |
|---|
| 95 | allobjs = [] |
|---|
| 96 | ObjectSpace::each_object {|obj| allobjs << obj.class.name} |
|---|
| 97 | |
|---|
| 98 | puts "The current Ruby objectspace contains: " + |
|---|
| 99 | allobjs.en.conjunction( :generalize => true ) |
|---|
| 100 | |
|---|
| 101 | which will print something like: |
|---|
| 102 | |
|---|
| 103 | The current Ruby objectspace contains: thousands of Strings, |
|---|
| 104 | thousands of Arrays, hundreds of Hashes, hundreds of |
|---|
| 105 | Classes, many Regexps, a number of Ranges, a number of |
|---|
| 106 | Modules, several Floats, several Procs, several MatchDatas, |
|---|
| 107 | several Objects, several IOS, several Files, a Binding, a |
|---|
| 108 | NoMemoryError, a SystemStackError, a fatal, a ThreadGroup, |
|---|
| 109 | and a Thread |
|---|
| 110 | |
|---|
| 111 | |
|---|
| 112 | == Infinitives |
|---|
| 113 | |
|---|
| 114 | New in version 0.02: |
|---|
| 115 | |
|---|
| 116 | "leaving".en.infinitive |
|---|
| 117 | # => "leave" |
|---|
| 118 | |
|---|
| 119 | "left".en.infinitive |
|---|
| 120 | # => "leave" |
|---|
| 121 | |
|---|
| 122 | "leaving".en.infinitive.suffix |
|---|
| 123 | # => "ing" |
|---|
| 124 | |
|---|
| 125 | |
|---|
| 126 | == WordNet® Integration |
|---|
| 127 | |
|---|
| 128 | Also new in version 0.02, if you have the Ruby-WordNet module installed, you can |
|---|
| 129 | look up WordNet synsets using the Linguistics interface: |
|---|
| 130 | |
|---|
| 131 | # Test to be sure the WordNet module loaded okay. |
|---|
| 132 | Linguistics::EN.has_wordnet? |
|---|
| 133 | # => true |
|---|
| 134 | |
|---|
| 135 | # Fetch the default synset for the word "balance" |
|---|
| 136 | "balance".synset |
|---|
| 137 | # => #<WordNet::Synset:0x40376844 balance (noun): "a state of equilibrium" |
|---|
| 138 | (derivations: 3, antonyms: 1, hypernyms: 1, hyponyms: 3)> |
|---|
| 139 | |
|---|
| 140 | # Fetch the synset for the first verb sense of "balance" |
|---|
| 141 | "balance".en.synset( :verb ) |
|---|
| 142 | # => #<WordNet::Synset:0x4033f448 balance, equilibrate, equilibrize, equilibrise |
|---|
| 143 | (verb): "bring into balance or equilibrium; "She has to balance work and her |
|---|
| 144 | domestic duties"; "balance the two weights"" (derivations: 7, antonyms: 1, |
|---|
| 145 | verbGroups: 2, hypernyms: 1, hyponyms: 5)> |
|---|
| 146 | |
|---|
| 147 | # Fetch the second noun sense |
|---|
| 148 | "balance".en.synset( 2, :noun ) |
|---|
| 149 | # => #<WordNet::Synset:0x404ebb24 balance (noun): "a scale for weighing; depends |
|---|
| 150 | on pull of gravity" (hypernyms: 1, hyponyms: 5)> |
|---|
| 151 | |
|---|
| 152 | # Fetch the second noun sense's hypernyms (more-general words, like a superclass) |
|---|
| 153 | "balance".en.synset( 2, :noun ).hypernyms |
|---|
| 154 | # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring |
|---|
| 155 | instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1, |
|---|
| 156 | hyponyms: 2)>] |
|---|
| 157 | |
|---|
| 158 | # A simpler way of doing the same thing: |
|---|
| 159 | "balance".en.hypernyms( 2, :noun ) |
|---|
| 160 | # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring |
|---|
| 161 | instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1, |
|---|
| 162 | hyponyms: 2)>] |
|---|
| 163 | |
|---|
| 164 | # Fetch the first hypernym's hypernyms |
|---|
| 165 | "balance".en.synset( 2, :noun ).hypernyms.first.hypernyms |
|---|
| 166 | # => [#<WordNet::Synset:0x404c60b8 measuring instrument, measuring system, |
|---|
| 167 | measuring device (noun): "instrument that shows the extent or amount or quantity |
|---|
| 168 | or degree of something" (hypernyms: 1, hyponyms: 83)>] |
|---|
| 169 | |
|---|
| 170 | # Find the synset to which both the second noun sense of "balance" and the |
|---|
| 171 | # default sense of "shovel" belong. |
|---|
| 172 | ("balance".en.synset( 2, :noun ) | "shovel".en.synset) |
|---|
| 173 | # => #<WordNet::Synset:0x40473da4 instrumentality, instrumentation (noun): "an |
|---|
| 174 | artifact (or system of artifacts) that is instrumental in accomplishing some |
|---|
| 175 | end" (derivations: 1, hypernyms: 1, hyponyms: 13)> |
|---|
| 176 | |
|---|
| 177 | # Fetch just the words for the other kinds of "instruments" |
|---|
| 178 | "instrument".en.hyponyms.collect {|synset| synset.words}.flatten |
|---|
| 179 | # => ["analyzer", "analyser", "cautery", "cauterant", "drafting instrument", |
|---|
| 180 | "extractor", "instrument of execution", "instrument of punishment", "measuring |
|---|
| 181 | instrument", "measuring system", "measuring device", "medical instrument", |
|---|
| 182 | "navigational instrument", "optical instrument", "plotter", "scientific |
|---|
| 183 | instrument", "sonograph", "surveying instrument", "surveyor's instrument", |
|---|
| 184 | "tracer", "weapon", "arm", "weapon system", "whip"] |
|---|
| 185 | |
|---|
| 186 | There are many more WordNet methods supported Ð too many to list here. See the |
|---|
| 187 | documentation for the complete list. |
|---|
| 188 | |
|---|
| 189 | |
|---|
| 190 | == LinkParser Integration |
|---|
| 191 | |
|---|
| 192 | Another new feature in version 0.02 is integration with the Ruby version of the |
|---|
| 193 | CMU Link Grammar Parser by Martin Chase. If you have the LinkParser module |
|---|
| 194 | installed, you can create linkages from English sentences that let you query for |
|---|
| 195 | parts of speech: |
|---|
| 196 | |
|---|
| 197 | # Test to see whether or not the link parser is loaded. |
|---|
| 198 | Linguistics::EN.has_link_parser? |
|---|
| 199 | # => true |
|---|
| 200 | |
|---|
| 201 | # Diagram the first linkage for a test sentence |
|---|
| 202 | puts "he is a big dog".sentence.linkages.first.to_s |
|---|
| 203 | +---O*---+ |
|---|
| 204 | | +--Ds--+ |
|---|
| 205 | +Ss+ | +-A-+ |
|---|
| 206 | | | | | | |
|---|
| 207 | he is a big dog |
|---|
| 208 | |
|---|
| 209 | # Find the verb in the sentence |
|---|
| 210 | "he is a big dog".en.sentence.verb.to_s |
|---|
| 211 | # => "is" |
|---|
| 212 | |
|---|
| 213 | # Combined infinitive + LinkParser: Find the infinitive form of the verb of the |
|---|
| 214 | given sentence. |
|---|
| 215 | "he is a big dog".en.sentence.verb.infinitive |
|---|
| 216 | # => "be" |
|---|
| 217 | |
|---|
| 218 | # Find the direct object of the sentence |
|---|
| 219 | "he is a big dog".en.sentence.object.to_s |
|---|
| 220 | # => "dog" |
|---|
| 221 | |
|---|
| 222 | # Look at the raw LinkParser::Word for the direct object of the sentence. |
|---|
| 223 | "he is a big dog".en.sentence.object |
|---|
| 224 | # => #<LinkParser::Word:0x403da0a0 @definition=[[{@A-}, Ds-, {@M+}, J-], [{@A-}, |
|---|
| 225 | Ds-, {@M+}, Os-], [{@A-}, Ds-, {@M+}, Ss+, {@CO-}, {C-}], [{@A-}, Ds-, {@M+}, |
|---|
| 226 | Ss+, R-], [{@A-}, Ds-, {@M+}, SIs-], [{@A-}, Ds-, {R+}, {Bs+}, J-], [{@A-}, Ds-, |
|---|
| 227 | {R+}, {Bs+}, Os-], [{@A-}, Ds-, {R+}, {Bs+}, Ss+, {@CO-}, {C-}], [{@A-}, Ds-, |
|---|
| 228 | {R+}, {Bs+}, Ss+, R-], [{@A-}, Ds-, {R+}, {Bs+}, SIs-]], @right=[], @suffix="", |
|---|
| 229 | @left=[#<LinkParser::Connection:0x403da028 @rword=#<LinkParser::Word:0x403da0a0 |
|---|
| 230 | ...>, @lword=#<LinkParser::Word:0x403da0b4 @definition=[[Ss-, O+, {@MV+}], [Ss-, |
|---|
| 231 | B-, {@MV+}], [Ss-, P+], [Ss-, AF-], [RS-, Bs-, O+, {@MV+}], [RS-, Bs-, B-, |
|---|
| 232 | {@MV+}], [RS-, Bs-, P+], [RS-, Bs-, AF-], [{Q-}, SIs+, O+, {@MV+}], [{Q-}, SIs+, |
|---|
| 233 | B-, {@MV+}], [{Q-}, SIs+, P+], [{Q-}, SIs+, AF-]], |
|---|
| 234 | @right=[#<LinkParser::Connection:0x403da028 ...>], @suffix="", @left=[], |
|---|
| 235 | @name="is", @position=1>, @subName="*", @name="O", @length=3>], @name="dog", |
|---|
| 236 | @position=4> |
|---|
| 237 | |
|---|
| 238 | # Combine WordNet + LinkParser to find the definition of the direct object of |
|---|
| 239 | # the sentence |
|---|
| 240 | "he is a big dog".en.sentence.object.gloss |
|---|
| 241 | # => "a member of the genus Canis (probably descended from the common wolf) that |
|---|
| 242 | has been domesticated by man since prehistoric times; occurs in many breeds; |
|---|
| 243 | \"the dog barked all night\"" |
|---|
| 244 | |
|---|