root/imbot/MontyLemmatiser.py

Revision 98, 65.6 kB (checked in by njohri2, 3 years ago)

Added MontyLemmatiser and required files. SABBrain now uses it

Line 
1 from __future__ import nested_scopes
2 __author__="Hugo Liu <hugo@media.mit.edu>"
3 __version__="2.0"
4 import re
5 import MontyUtils
6
7 class MontyLemmatiser:
8     path_prefix=''
9     xtag_morph_english_corpus=path_prefix+'xtag_morph_english.txt'
10     exceptions_file="LEMMAEXCEPTIONS.MDF"
11     regular_any=[]
12     regular_verb=[]
13     regular_noun=[]
14     irregular_re_any=[]
15     irregular_re_verbs=[]
16     irregular_re_nouns=[]
17     irregular_verbs=[]
18     irregular_nouns=[]
19     irregular_nouns +=[
20 ('vegas','vegas',''),
21 ('tomatoes','tomato','s'),
22 ('potatoes','potato','s'),
23 ('Asia','Asia',''),
24 ('asia','asia',''),
25 ('media','media',''),
26 ('Media','Media',''),
27 ('California','California',''),
28 ('california','california',''),
29 ('leaves','leaf','s'),
30 ('acme','acme',''),
31 ('gloria','gloria',''),
32 ('mini','mini',''),
33 ('doggies','doggy','s'),
34 ('Chianti','Chianti',''),
35 ('briefs','brief','s'),
36 ('wives','wife','s'),
37 ('johannes','johannes',''),
38 ('tops','top','s'),
39 ('deadeyes','deadeye','s'),
40 ('eyes','eye','s'),
41 ('alumnae','alumnus','s'),
42 ('acropolis','acropolis',''),
43 ('metropolis','metropolis',''),
44 ('stamens','stamen','s'),
45 ]
46     irregular_verbs +=[
47 ('leaves','leave',''),
48 ('does','do',''),
49 ('alibiing','alibii','ing'),
50 ('snorkeling','snorkel','ing'),
51 ('gaping','gape','ing'),
52 ('siting','site','ing'),
53 ('chequering','chequer','ing'),
54 ('restring','restring','')
55 ]
56
57     def __init__(self):
58         filename_str=[self.regular_any,self.regular_verb,self.regular_noun,self.irregular_re_any,self.irregular_re_verbs,self.irregular_re_nouns]
59         self.regular_any,self.regular_verb,self.regular_noun,self.irregular_re_any,self.irregular_re_verbs,self.irregular_re_nouns=map(lambda the_tokenizers:map(lambda the_tokenizer_str:[re.compile('^'+the_tokenizer_str[0].lower()+'$')]+the_tokenizer_str[1:],the_tokenizers),filename_str)
60         self.exceptions_db={}
61         hostname_cleaned=self.exceptions_db
62         c=self.setitem
63         buffers=MontyUtils.MontyUtils().find_file(self.exceptions_file)
64         pathname_str=open(buffers,'r')
65         cd_cleaned=map(lambda the_tokenizer_str:the_tokenizer_str.split(),pathname_str.read().split('\n'))
66         pathname_str.close()
67         map(lambda the_tokenizer_str:c(hostname_cleaned,the_tokenizer_str[0]+'/'+the_tokenizer_str[1],the_tokenizer_str[2]),cd_cleaned)
68         print "Lemmatiser OK!"
69         return
70
71     def lemmatise_untagged_sentence(self,untagged):
72         the_parser_dict=' '.join(map(lambda the_tokenizer_str:the_tokenizer_str+'/UNK',untagged.split()))
73         return self.lemmatise_tagged_sentence(the_parser_dict)
74
75     def lemmatise_tagged_sentence(self,tagged):
76         cmp=self.lemmatise_word
77         domain_str=map(lambda the_tokenizer_str:the_tokenizer_str.split('/'),tagged.split())
78
79         for popds in range(len(domain_str)):
80             j,cksum_str=domain_str[popds]
81             _hugo_cleaned=""
82
83             if cksum_str in['NN','NNS','NNP','NNPS']:
84                 _hugo_cleaned='noun'
85             elif cksum_str in['VB','VBD','VBZ','VBG','VBP','VBN']:
86                 _hugo_cleaned='verb'
87
88             if _hugo_cleaned=='':
89                 command1=j
90             else :
91                 command1=cmp(j,_hugo_cleaned)
92             domain_str[popds]=[j,cksum_str,command1]
93         domain_str=map(lambda the_tokenizer_str:'/'.join(the_tokenizer_str),domain_str)
94         cal=' '.join(domain_str)
95         return cal
96
97     def lemmatise_word(self,word,pos=""):
98         the_parser=self.fix_case
99
100         if word[-2:]=="'s":
101             word=word[:-2]
102         elif word[-2:]=="s'":
103             word=word[:-1]
104         cron_dictr=self.check_dictionary_exceptions(word,pos)
105
106         if cron_dictr!=None:
107             chown,cd_cleanedo=cron_dictr
108             chown=the_parser(word,chown)
109             return chown
110         cron_dictr=self.find_irregular_morph(word,pos)
111
112         if cron_dictr!=None:
113             chown,cd_cleanedo=cron_dictr
114             chown=the_parser(word,chown)
115             return chown
116         cron_dictr=self.find_regular_morph(word,pos)
117
118         if cron_dictr!=None:
119             chown,cd_cleanedo=cron_dictr
120             chown=the_parser(word,chown)
121             return chown
122         return word
123
124     def verify_lemmatiser(self):
125         arg_cleaned=[]
126         print "LOADING verification corpus"
127         dict=self.make_verification_dictionary()
128         print 'verifying against',len(dict),'entries'
129         built_in_dict=0
130
131         for chmod_pq in dict:
132             j,command1,chmod_cleaned=chmod_pq
133
134             if chmod_cleaned in['PropN']:
135                 chmod_cleaned='noun'
136             elif chmod_cleaned in['V']:
137                 chmod_cleaned='verb'
138             else :
139                 continue
140             file_cleaned=self.lemmatise_word(j,chmod_cleaned)
141
142             if file_cleaned!=command1:
143                 built_in_dict += 1
144                 print 'WRONG! WORD: '+j+' GUESSED: '+file_cleaned+' ACTUAL: '+command1
145                 arg_cleaned.append([j,chmod_cleaned,command1])
146
147                 if j!=j.lower():
148                     arg_cleaned.append([j.lower(),chmod_cleaned,command1.lower()])
149             else :
150                 pass
151         print 'Results: got',built_in_dict,'out of',len(dict),'wrong (',built_in_dict*1.0/len(dict),'% error)'
152         pathname_str=open(self.exceptions_file,'w')
153         pathname_str.write('\n'.join(map(lambda the_tokenizer_str:' '.join(the_tokenizer_str),arg_cleaned)))
154         pathname_str.close()
155         return arg_cleaned
156
157     def make_verification_dictionary(self):
158         pathname_str=open(self.xtag_morph_english_corpus,'r')
159         output=pathname_str.read()
160         factor=output.split('\n')
161         factor=filter(lambda the_tokenizer_str:the_tokenizer_str[:3]!=';;;',factor)
162         factor=map(lambda the_tokenizer_str:the_tokenizer_str.split(),factor)
163         factor=filter(lambda the_tokenizer_str:len(the_tokenizer_str)>=3,factor)
164         factor=map(lambda the_tokenizer_str:[the_tokenizer_str[0],the_tokenizer_str[1],the_tokenizer_str[2]],factor)
165         factor=filter(lambda the_tokenizer_str:the_tokenizer_str[1][-3:]!='ize' and the_tokenizer_str[1][-7:]!='ization' and the_tokenizer_str[1][4:]!='izer' and '-' not in the_tokenizer_str[1],factor)
166         return factor
167
168     def fix_case(self,word1,word2):
169
170         if word1.lower()==word1:
171             return word2.lower()
172         elif word1.capitalize()==word1:
173             return word2.capitalize()
174         elif word1.upper()==word1:
175             return word2.upper()
176         else :
177             return word2
178
179     def _re_match_helper(self,re_kb,word):
180
181         for popds in range(len(re_kb)):
182             buf1,file_str,buffer1,cd_cleanedo=re_kb[popds]
183             pairs_cleaned=buf1.search(word.lower())
184
185             if pairs_cleaned!=None:
186                 chown=word[:len(word)-file_str]+buffer1
187                 return[chown,cd_cleanedo]
188         return None
189
190     def find_irregular_morph(self,word,pos=""):
191         a1=self._re_match_helper
192         groupnames1=self.find_irregular_morph
193         cron_dictr=a1(self.irregular_re_any,word)
194
195         if cron_dictr!=None:
196             return cron_dictr
197
198         if pos=='verb':
199             cron_dictr=a1(self.irregular_re_verbs,word)
200
201             if cron_dictr!=None:
202                 return cron_dictr
203         elif pos=='noun':
204             cron_dictr=a1(self.irregular_re_nouns,word)
205
206             if cron_dictr!=None:
207                 return cron_dictr
208         else :
209             cron_dictr=groupnames1(word,'verb')
210
211             if cron_dictr!=None:
212                 return cron_dictr
213             cron_dictr=groupnames1(word,'noun')
214             return cron_dictr
215         return None
216
217     def find_regular_morph(self,word,pos=""):
218         a1=self._re_match_helper
219         history1=self.find_regular_morph
220         info_arr=word.lower()
221
222         if pos=='verb':
223             cron_dictr=a1(self.regular_verb,info_arr)
224
225             if cron_dictr!=None:
226                 return cron_dictr
227         elif pos=='noun':
228             cron_dictr=a1(self.regular_noun,info_arr)
229
230             if cron_dictr!=None:
231                 return cron_dictr
232         else :
233             cron_dictr=history1(word,'verb')
234
235             if cron_dictr!=None:
236                 return cron_dictr
237             cron_dictr=history1(word,'noun')
238             return cron_dictr
239         cron_dictr=a1(self.regular_any,info_arr)
240
241         if cron_dictr!=None:
242             return cron_dictr
243         return None
244
245     def check_dictionary_exceptions(self,word,pos=""):
246         hostname_cleaned=self.exceptions_db
247         contents_str=self.check_dictionary_exceptions
248         info_arr=word.lower()
249
250         if pos=="verb":
251             built_in_cleaned=map(lambda the_tokenizer_str:the_tokenizer_str[0],self.irregular_verbs)
252
253             if info_arr in built_in_cleaned:
254                 hostname_arr=built_in_cleaned.index(info_arr)
255                 chown_p=self.irregular_verbs[hostname_arr]
256                 return chown_p[1:3]
257             elif hostname_cleaned.get(word+'/verb',''):
258                 return[hostname_cleaned[word+'/verb'],'']
259             else :
260                 return None
261         elif pos=="noun":
262             pairs_dict=map(lambda the_tokenizer_str:the_tokenizer_str[0],self.irregular_nouns)
263
264             if info_arr in pairs_dict:
265                 hostname_arr=pairs_dict.index(info_arr)
266                 chown_p=self.irregular_nouns[hostname_arr]
267                 return chown_p[1:3]
268             elif hostname_cleaned.get(word+'/noun',''):
269                 return[hostname_cleaned[word+'/noun'],'']
270             else :
271                 return None
272         else :
273             cron_dictr=contents_str(word,'verb')
274
275             if cron_dictr==None:
276                 cron_dictr=contents_str(word,'noun')
277             return cron_dictr
278     V='[aeiou]'
279     VI='[aeiouy]'
280     C='[bcdfghjklmnpqrstvwxyz]'
281     CX='[bcdfghjklmnpqrstvwxz]'
282     CX2='(bb|cc|dd|ff|gg|hh|jj|kk|ll|mm|nn|pp|qq|rr|ss|tt|vv|ww|xx|zz)'
283     CX2S='(ff|ss|zz)'
284     S='([sx]|([cs]h))'
285     A='[^ \n_]'
286     SKIP='[ \n]'
287     EDING='ed|ing'
288     ESEDING='es|ed|ing'
289     regular_any +=[
290 [A+'+'+CX2S+'es',2,'','s'],
291 [A+'+'+'thes',1,'','s'],
292 [A+'+'+CX+'[cglsv]'+'es',1,'','s'],
293 [A+'+'+CX+CX+'es',2,'','s'],
294 [A+'+'+VI+VI+'es',2,'','s'],
295 [A+'+'+'xes',2,'','s'],
296 [A+'+'+S+'es',1,'','s'],
297 [A+'+'+C+'ies',3,'y','s'],
298 [A+'+'+'s',1,'','s']
299 ]
300     regular_verb +=[
301 [A+'+'+'vened',1,'','ed'],
302 [CX+'ed',0,'',''],
303 [C+V+'nged',2,'','ed'],
304 [A+'+'+'icked',2,'','ed'],
305 [A+'+'+CX2S+'ed',2,'','ed'],
306 [C+'+'+V+'lled',2,'','ed'],
307 [A+'*'+C+'ined',1,'','ed'],
308 [A+'*'+C+V+'[npwx]'+'ed',2,'','ed'],
309 [A+'*'+C+V+CX2+'ed',3,'','ed'],
310 [A+'+'+C+'ied',3,'y','ed'],
311 [A+'*'+'qu'+V+C+'ed',1,'','ed'],
312 [A+'+'+'u'+V+'ded',1,'','ed'],
313 [A+'+'+'[ei]'+'ted',2,'','ed'],
314 [A+'+'+'[eo]'+'ated',2,'','ed'],
315 [A+'+'+V+'ated',1,'','ed'],
316 [A+'*'+V+V+'[cgsvz]'+'ed',1,'','ed'],
317 [A+'*'+V+V+C+'ed',2,'','ed'],
318 [A+'+'+'[rw]'+'led',2,'','ed'],
319 [A+'+'+'thed',1,'','ed'],
320 [A+'+'+CX+'[cglsv]'+'ed',1,'','ed'],
321 [A+'+'+CX+CX+'ed',2,'','ed'],
322 [A+'+'+VI+VI+'ed',2,'','ed'],
323 [A+'*'+C+'[clt]'+'ored',1,'','ed'],
324 [A+'+'+'[eo]'+'red',2,'','ed'],
325 [A+'+'+'ed',1,'','ed'],
326 [CX+'+'+'ing',0,'',''],
327 [C+V+'nging',3,'','ing'],
328 [A+'+'+'icking',3,'','ing'],
329 [A+'+'+CX2S+'ing',3,'','ing'],
330 [C+'+'+V+'lling',3,'','ing'],
331 [A+'*'+C+'ining',3,'e','ing'],
332 [A+'*'+C+V+'[npwx]'+'ing',3,'','ing'],
333 [A+'*'+C+V+CX2+'ing',4,'','ing'],
334 [A+'*'+'qu'+V+C+'ing',3,'e','ing'],
335 [A+'+'+'u'+V+'ding',3,'e','ing'],
336 [A+'+'+'[ei]'+'ting',3,'','ing'],
337 [A+'+'+'[eo]'+'ating',3,'','ing'],
338 [A+'+'+V+'ating',3,'e','ing'],
339 [A+'*'+V+V+'[cgsvz]'+'ing',3,'e','ing'],
340 [A+'*'+V+V+C+'ing',3,'','ing'],
341 [A+'+'+'[rw]'+'ling',3,'','ing'],
342 [A+'+'+'thing',3,'e','ing'],
343 [A+'+'+CX+'[cglsv]'+'ing',3,'e','ing'],
344 [A+'+'+CX+CX+'ing',3,'','ing'],
345 [A+'+'+VI+VI+'ing',3,'','ing'],
346 [C+'ying',4,'ie','ing'],
347 [A+'+'+'ying',3,'','ing'],
348 [A+'*'+CX+'oing',3,'','ing'],
349 [A+'*'+C+'[clt]'+'oring',3,'e','ing'],
350 [A+'+'+'[eo]'+'ring',3,'','ing'],
351 [A+'+'+'(el)'+'ing',3,'','ing'],
352 [A+'+'+'ing',3,'e','ing']
353 ]
354     regular_verb +=[
355 [A+'+'+'uses',2,'','s'],
356 [A+'+'+'used',2,'','ed'],
357 [A+'+'+'using',3,'','ing']
358 ]
359     regular_noun +=[
360 [A+'+'+'uses',2,'','s']
361 ]
362     regular_noun +=[
363 [A+'*'+'men',2,'an','s'],
364 [A+'*'+'wives',3,'fe','s'],
365 [A+'+'+'zoa',1,'on','s'],
366 [A+'+'+'iia',2,'um','s'],
367 [A+'+'+'la',1,'um','s'],
368 [A+'+'+'ae',1,'','s'],
369 [A+'+'+'ata',2,'','s']
370 ]
371     irregular_re_any +=[
372 ['(his|hers|theirs|ours|yours|as|its|this|during|something|nothing|anything|everything|('+A+'+(us|ss|sis|eed)))',0,'','']
373 ]
374     irregular_re_verbs +=[
375 ['bias',0,'',''],
376 ['canvas',0,'',''],
377 ['canvas'+'(es)',2,'','s'],
378 ['canvas'+'(ing)',3,'','ing'],
379 ['canvas'+'(ed)',2,'','ed'],
380 ['cryed',2,'','ed'],
381 ['embed',0,'',''],
382 ['focuss'+'(es)',3,'','s'],
383 ['focuss'+'(ing)',4,'','ing'],
384 ['focuss'+'(ed)',3,'','ed'],
385 ['forted',2,'e','ed'],
386 ['forteing',3,'','ing'],
387 ['gas',0,'',''],
388 ['picknicks',2,'','s'],
389 ['picknick'+'(ing)',4,'','ing'],
390 ['picknick'+'(ed)',3,'','ed'],
391 ['resold',3,'ell','ed'],
392 ['retold',3,'ell','ed'],
393 ['retying',4,'ie','ing'],
394 ['singed',2,'e','ed'],
395 ['singeing',3,'','ing'],
396 ['trecked',4,'k','ed'],
397 ['trecking',5,'k','ing'],
398 ['(adher|ador|attun|bast|bor|bronz|can|centr|cit|compet|complet|concret|condon|contraven|conven|cran|delet|delineat|dop|drap|dron|escap|excit|fort|gazett|grop|hon|ignit|ignor|incit|interven|inton|invit|landscap|manoeuvr|nauseat|normalis|outmanoeuvr|overaw|permeat|persever|por|postpon|prun|recit|reshap|rop|shap|shor|snor|ston|wip)'+'(es)',2,'e','s'],
399 ['(adher|ador|attun|bast|bor|bronz|can|centr|cit|compet|complet|concret|condon|contraven|conven|cran|delet|delineat|dop|drap|dron|escap|excit|fort|gazett|grop|hon|ignit|ignor|incit|interven|inton|invit|landscap|manoeuvr|nauseat|normalis|outmanoeuvr|overaw|permeat|persever|por|postpon|prun|recit|reshap|rop|shap|shor|snor|ston|wip)'+'(ed)',2,'e','ed'],
400 ['(adher|ador|attun|bast|bor|bronz|can|centr|cit|compet|complet|concret|condon|contraven|conven|cran|delet|delineat|dop|drap|dron|escap|excit|fort|gazett|grop|hon|ignit|ignor|incit|interven|inton|invit|landscap|manoeuvr|nauseat|normalis|outmanoeuvr|overaw|permeat|persever|por|postpon|prun|recit|reshap|rop|shap|shor|snor|ston|wip)'+'(ing)',3,'e','ing'],
401 ['(ape|appall|augur|belong|berth|burr|conquer|egg|enroll|enthrall|forestall|froth|fulfill|install|instill|lacquer|martyr|mouth|murmur|pivot|preceed|prolong|purr|quell|recall|refill|remill|resell|retell|smooth|throng|twang|unearth)'+'(ed)',2,'','ed'],
402 ['(ape|appall|augur|belong|berth|burr|conquer|egg|enroll|enthrall|forestall|froth|fulfill|install|instill|lacquer|martyr|mouth|murmur|pivot|preceed|prolong|purr|quell|recall|refill|remill|resell|retell|smooth|throng|twang|unearth)'+'(ing)',3,'','ing']
403 ]
404     irregular_re_nouns +=[
405 ['canvases',2,'','s'],
406 ['carcases',2,'','s'],
407 ['lenses',2,'','s'],
408 ['schizophrenia',0,'',''],
409 ['(('+A+'*'+'metre'+')|('+A+'*'+'litre'+')|('+A+'+'+'ette'+')|'+'acre|Aussie|bronze|budgie|burnurn|canoe|carriageway|catastrophe|centre|cill|cliche|commie|coolie|curie|demesne|employee|evacuee|fibre|foe|headache|horde|magpie|manoeuvre|moggie|moustache|movie|nightie|oboe|programme|queue|sabre|shoe|sloe|sortie|taste|theatre|timbre|titre|umbrella|utopia|wiseacre|woe)'+'(s)',1,'','s'],
410 ['(('+A+'+'+'itis'+')|'+'abdomen|achimenes|acumen|Afrikaans|alibi|alkali|amnesia|anaesthesia|aphis|aria|asbestos|asphyxia|atlas|axis|bedclothes|begonia|bias|bikini|calyptopis|cannula|cantharides|canvas|caries|chas|chamois|chaos|chili|chinchilla|Christmas|confetti|contretemps|cornucopia|corps|cosmos|cupola|cyclamen|dais|debris|diabetes|diphtheria|dysphagia|encyclopaedia|ennui|escallonia|ethos|extremis|fella|ferris|flotilla|formula|forsythia|gallows|ganglia|gardenia|gas|gasworks|gondola|grata|guerrilla|haemophilia|hors|hovis|hustings|hysteria|inertia|innards|iris|isosceles|khaki|koala|lens|macaroni|manilla|mania|mantis|maquis|martini|matins|memorabilia|metropolis|minutiae|molasses|morphia|mortis|neurasthenia|normoglycaemia|nostalgia|omen|pantometria|parabola|paraphernalia|pastis|patella|patens|pathos|patois|pectoris|pelvis|peninsula|phantasmagoria|pharos|plumbites|pneumonia|polyuria|portcullis|pyrexia|regalia|rhinoceros|safari|salami|sari|saturnalia|series|spaghetti|specimen|species|submatrices|subtopia|suburbia|syphilis|tares|taxi|tennis|toccata|trellis|tripos|turps|tutti|umbrella|utopia|villa'+')',0,'',''],
411 ['('+'accoutrements|aerodynamics|aeronautics|aesthetics|algae|amends|ammonia|ancients|annals|antics|arrears|assizes|auspices|backwoods|bacteria|banns|barracks|baths|battlements|bellows|belongings|billiards|binoculars|bitters|blandishments|bleachers|blinkers|blues|breeches|brussels|clothes|clutches|commons|confines|contents|credentials|crossbones|crossroads|curia|damages|dealings|dentures|depths|devotions|diggings|doings|downs|droppings|dues|dynamics|earnings|eatables|eaves|economics|electrodynamics|electronics|entrails|environs|equities|ethics|eugenics|filings|finances|folks|footlights|fumes|furnishings|genitals|genitalia|goggles|goods|grits|groceries|grounds|handcuffs|headquarters|histrionics|hostilities|humanities|hydraulics|hysterics|illuminations|innings|italics|jeans|jitters|kinetics|knickers|kudos|latitudes|leggings|likes|linguistics|lodgings|loggerheads|mains|manners|mathematics|means|measles|media|memoirs|metaphysics|mews|mockers|morals|motions|munitions|news|nutria|nylons|oats|odds|oils|oilskins|optics|orthodontics|outskirts|overalls|overtones|pants|pantaloons|papers|paras|paratroops|particulars|pediatrics|phonemics|phonetics|physics|pincers|plastics|politics|proceeds|proceedings|prospects|provinces|provisions|pyjamas|races|rations|ravages|refreshments|regards|reinforcements|remains|respects|returns|riches|rights|savings|schizophrenia|scissors|seconds|semantics|senses|shades|shallows|shambles|shares|shivers|shorts|singles|skittles|slacks|soundings|specifics|spectacles|spoils|stamens|statics|statistics|stratums|summons|supplies|surroundings|suspenders|takings|teens|telecommunications|tenterhooks|thanks|theatricals|thermos|thermodynamics|tights|toils|tops|trades|trappings|travels|troops|tropics|trousers|tweeds|underpants|vapours|vicissitudes|vitals|volumes|wages|wanderings|wares|waters|whereabouts|whites|winnings|withers|woollens|workings|writings|yes'+')',0,'',''],
412 ['('+'boatie|bonhomie|clippie|creepie|dearie|droppie|gendarmerie|girlie|goalie|haddie|kookie|kyrie|lambie|lassie|marie|menagerie|pettie|reverie|snottie|sweetie'+')'+'(s)',1,'','s']
413 ]
414     irregular_re_verbs +=[
415 ['(buffett|plummett)'+'es',3,'','s'],
416 ['(buffett|plummett)'+'ed',3,'','ed'],
417 ['(buffett|plummett)'+'ing',4,'','ing'],
418 ['buffetts',2,'','s'],
419 ['plummetts',2,'','s'],
420 ['gunsling',0,'',''],
421 ['gunslung',3,'ing','ed'],
422 ['gunslinging',3,'','ing'],
423 ['hamstring',0,'',''],
424 ['shred',0,'',''],
425 ['unfocuss'+'es',3,'','s'],
426 ['unfocuss'+'ed',3,'','ed'],
427 ['unfocuss'+'ing',4,'','ing'],
428 ['(accret|clon|deplet|dethron|dup|excret|expedit|extradit|fet|finetun|gor|hing|massacr|obsolet|reconven|recreat|recus|reignit|swip|videotap|zon)'+'(es)',2,'e','s'],
429 ['(accret|clon|deplet|dethron|dup|excret|expedit|extradit|fet|finetun|gor|hing|massacr|obsolet|reconven|recreat|recus|reignit|swip|videotap|zon)'+'(ed)',2,'e','ed'],
430 ['(accret|clon|deplet|dethron|dup|excret|expedit|extradit|fet|finetun|gor|hing|massacr|obsolet|reconven|recreat|recus|reignit|swip|videotap|zon)'+'(ing)',3,'e','ing'],
431 ['(backpedal|bankroll|bequeath|blackball|bottom|clang|debut|doctor|eyeball|factor|imperil|landfill|margin|occur|overbill|pilot|prong|pyramid|reinstall|relabel|remodel|squirrel|stonewall|wrong)'+'(ed)',2,'','ed'],
432 ['(backpedal|bankroll|bequeath|blackball|bottom|clang|debut|doctor|eyeball|factor|imperil|landfill|margin|occur|overbill|pilot|prong|pyramid|reinstall|relabel|remodel|squirrel|stonewall|wrong)'+'(ed)',3,'','ing']
433 ]
434     irregular_re_nouns +=[
435 ['biases',2,'','s'],
436 ['biscotti',1,'o','s'],
437 ['bookshelves',3,'f','s'],
438 ['palazzi',1,'o','s'],
439 ['(beastie|brownie|cache|cadre|calorie|champagne|cologne|cookie|druggie|eaterie|emigre|emigree|employee|freebie|genre|kiddie|massacre|moonie|necktie|niche|prairie|softie|toothpaste|willie)'+'(s)',1,'','s'],
440 ['(('+A+'*'+'phobia'+')|'+'academia|accompli|aegis|anemia|anorexia|anti|artemisia|ataxia|beatlemania|blini|cafeteria|capita|cognoscenti|coli|deli|dementia|downstairs|dyslexia|dystopia|encyclopedia|estancia|euphoria|euthanasia|fracas|fuss|gala|gorilla|gravitas|GI|habeas|haemophilia|hemophilia|hoopla|hubris|hula|hypoglycemia|ides|impatiens|informatics|intelligentsia|jacuzzi|kiwi|leukaemia|leukemia|mafia|magnolia|malaria|maquila|marginalia|megalomania|mercedes|militia|miniseries|mips|mufti|muni|olympics|pancreas|paranoia|pastoris|pastrami|pepperoni|pepsi|piroghi|pizzeria|plainclothes|pneumocystis|potpourri|proboscis|rabies|reggae|regimen|rigatoni|salmonella|samurai|sarsaparilla|semen|ski|sonata|spatula|stats|subtilis|sushi|tachyarrhythmia|tachycardia|tequila|tetris|thrips|throes|timpani|tsunami|vaccinia|vanilla)',0,'',''],
441 ['(acrobatics|alias|athletics|basics|betters|bifocals|bowels|briefs|checkers|denims|doldrums|dramatics|dungarees|ergonomics|genetics|gymnastics|hackles|haves|incidentals|ironworks|jinks|leavings|leftovers|logistics|makings|microelectronics|mores|oodles|pajamas|pampas|panties|payola|pickings|pliers|pi|ravings|reparations|rudiments|scads|splits|stays|subtitles|sunglasss|sweepstakes|tatters|toiletries|tongs|trivia|tweezers|vibes|waterworks|woolens)',0,'',''],
442 ['(biggie|bourgeoisie|brie|camaraderie|chinoiserie|coterie|doggie|genie|hippie|junkie|lingerie|moxie|preppie|rookie|yuppie)'+'(s)',1,'','s']
443 ]
444     irregular_re_verbs +=[
445 ['(chor|sepulchr|silhouett|telescop)'+'(es)',2,'e','s'],
446 ['(chor|sepulchr|silhouett|telescop)'+'(ed)',2,'e','ed'],
447 ['(chor|sepulchr|silhouett|telescop)'+'(ing)',3,'e','ing'],
448 ['(subpena|suds|fresco)'+'(es)',2,'','s'],
449 ['(subpena|suds|fresco)'+'(ed)',2,'','ed'],
450 ['(subpena|suds|fresco)'+'(ing)',3,'','ing'],
451 ['daises',2,'','s'],
452 ['reguli',1,'o','s'],
453 ['steppes',1,'','s'],
454 ['(('+A+'+'+'philia'+')|'+'fantasia|Feis|Gras|Mardi|OS|pleura|tularemia|vasa)',0,'',''],
455 ['(calisthenics|heroics|rheumatics|victuals|wiles)',0,'',''],
456 ['(auntie|anomie|coosie|quickie)'+'(s)',1,'','s']
457 ]
458     irregular_re_nouns +=[
459 ['(absentia|bourgeois|pecunia|Syntaxis|uncia)',0,'',''],
460 ['(apologetics|goings|outdoors)',0,'',''],
461 ['collies',1,'','s']
462 ]
463     irregular_re_verbs +=[
464 ['bob-sled',0,'',''],
465 ['imbed',0,'',''],
466 ['precis',0,'',''],
467 ['precis'+'(es)',2,'','s'],
468 ['precis'+'(ed)',2,'','ed'],
469 ['precis'+'(ing)',3,'','ing']
470 ]
471     irregular_re_nouns +=[
472 ['obsequies',3,'y','s'],
473 ['superficies',1,'','s'],
474 ['(acacia|albumen|alms|alopecia|ambergris|ambrosia|anaemia|analgesia|anopheles|aphasia|arras|assagai|assegai|astrophysics|aubrietia|avoirdupois|bathos|beriberi|biceps|bitumen|borzoi|broccoli|cadi|calends|callisthenics|calla|camellia|campanula|cantata|caravanserai|cedilla|chilli|chrysalis|clematis|clitoris|cognomen|collywobbles|copula|corolla|cybernetics|cyclops|cyclopaedia|cyclopedia|dahlia|dhoti|dickens|dietetics|dipsomania|dolmen|dyspepsia|effendi|elevenses|epidermis|epiglottis|erysipelas|eurhythmics|faeces|fascia|fibula|finis|fistula|fives|fleur-de-lis|forceps|freesia|fuchsia|geophysics|geriatrics|glottis|guerilla|hadji|haggis|hara-kiri|hernia|herpes|hoop-la|houri|hymen|hyperbola|hypochondria|ibis|inamorata|insignia|insomnia|jackanapes|jimjams|jodhpurs|kepi|kleptomania|kohlrabi|kris|kukri|kumis|litchi|litotes|loggia|magnesia|man-at-arms|manila|mantilla|marquis|master-at-arms|mattins|melancholia|menses|minutia|monomania|muggins|mumps|mi|myopia|nebula|necropolis|neuralgia|nibs|numismatics|nymphomania|obstetrics|okapi|onomatopoeia|ophthalmia|paraplegia|patchouli|paterfamilias|penis|pergola|petunia|pharmacopoeia|phi|piccalilli|poinsettia|praxis|precis|primula|prophylaxis|pyrites|rabbi|raffia|reredos|revers|rickets|rounders|rubella|saki|salvia|sassafras|sawbones|scabies|scapula|schnapps|scintilla|scrofula|secateurs|sepia|septicaemia|sequoia|shears|smithereens|spermaceti|stamen|suds|sundae|si|swami|tarantella|tarantula|testis|therapeutics|thews|tibia|tiddlywinks|tombola|topi|tortilla|trews|triceps|underclothes|undies|uvula|verdigris|vermicelli|viola|wadi|wapiti|wisteria|yaws|yogi|zinnia)',0,'',''],
475 ['(aerie|birdie|bogie|caddie|cock-a-leekie|collie|corrie|cowrie|dixie|eyrie|faerie|gaucherie|gillie|knobkerrie|laddie|mashie|mealie|menagerie|organdie|patisserie|pinkie|pixie|stymie|talkie)'+'(s)',1,'','s']
476 ]
477     irregular_re_nouns +=[
478 ['(ablutions|adenoids|aerobatics|afters|astronautics|atmospherics|bagpipes|ballistics|bell-bottoms|belles-lettres|blinders|bloomers|butterfingers|buttocks|bygones|cahoots|cannabis|castanets|clappers|corgi|cross-purposes|dodgems|dregs|duckboards|edibles|envoi|eurythmics|externals|extortions|falsies|fisticuffs|fleshings|fleur-de-lys|fours|gentleman-at-arms|geopolitics|giblets|glassworks|gleanings|handlebars|heartstrings|hi-fi|homiletics|housetops|hunkers|hydroponics|impala|kalends|knickerbockers|kwela|lees|lei|lexis|lieder|literati|loins|meanderings|meths|muesli|muniments|necessaries|nines|ninepins|nippers|nuptials|orthopaedics|paediatrics|phonics|polemics|pontificals|prelims|pyrotechnics|ravioli|rompers|ructions|scampi|scrapings|serjeant-at-arms|sheila|shires|smalls|steelworks|sweepings|toxaemia|ti|vespers|virginals|waxworks|yeti|zucchini)',0,'',''],
479 ['(mountie|brasserie|cup-tie|grannie|koppie|rotisserie|walkie-talkie)'+'(s)',1,'','s']
480 ]
481     irregular_re_verbs +=[
482 ['busses',3,'','s'],
483 ['bussed',3,'','ed'],
484 ['bussing',4,'','ing'],
485 ['hocus-pocusses',3,'','s'],
486 ['hocusses',3,'','s'],
487 ['(('+A+'*'+'-us'+')|'+'abus|accus|amus|arous|bemus|carous|contus|disabus|disus|dous|enthus|excus|grous|misus|mus|overus|perus|reus|rous|sous|us|('+A+'*'+'[hlmp]ous)|('+A+'*'+'[af]us))'+'(es)',2,'e','s'],
488 ['(('+A+'*'+'-us'+')|'+'abus|accus|amus|arous|bemus|carous|contus|disabus|disus|dous|enthus|excus|grous|misus|mus|overus|perus|reus|rous|sous|us|('+A+'*'+'[hlmp]ous)|('+A+'*'+'[af]us))'+'(ed)',2,'e','ed'],
489 ['(('+A+'*'+'-us'+')|'+'abus|accus|amus|arous|bemus|carous|contus|disabus|disus|dous|enthus|excus|grous|misus|mus|overus|perus|reus|rous|sous|us|('+A+'*'+'[hlmp]ous)|('+A+'*'+'[af]us))'+'(ing)',3,'e','ing']
490 ]
491     irregular_re_nouns +=[
492 ['(('+A+'*-abus)|('+A+'*-us)|abus|burnous|cayus|chanteus|chartreus|chauffeus|crus|disus|excus|grous|hypotenus|masseus|misus|mus|Ous|overus|poseus|reclus|reus|rus|us|('+A+'*[hlmp]ous)|('+A+'*[af]us))'+'(es)',1,'','s']
493 ]
494     irregular_verbs +=[
495 ['ached','ache','ed'],
496 ['aching','ache','ing'],
497 ['being','be','ing'],
498 ['accustomed','accustom','ed'],
499 ['accustoming','accustom','ing'],
500 ['blossomed','blossom','ed'],
501 ['blossoming','blossom','ing'],
502 ['boycotted','boycott','ed'],
503 ['boycotting','boycott','ing'],
504 ['cataloged','catalog','ed'],
505 ['cataloging','catalog','ing'],
506 ['created','create','ed'],
507 ['creating','create','ing'],
508 ['finesses','finesse','s'],
509 ['finessed','finesse','ed'],
510 ['finessing','finesse','ing'],
511 ['interfered','interfere','ed'],
512 ['interfering','interfere','ing'],
513 ['tastes','taste','s'],
514 ['tasted','taste','ed'],
515 ['tasting','taste','ing'],
516 ['torpedoed','torpedo','ed'],
517 ['torpedoing','torpedo','ing'],
518 ['wastes','waste','s'],
519 ['wasted','waste','ed'],
520 ['wasting','waste','ing'],
521 ['routed','route','ed'],
522 ['routing','route','ing'],
523 ['rerouted','reroute','ed'],
524 ['rerouting','reroute','ing']
525 ]
526
527     def irregular_verbs_wordnet(self):
528         return[
529 ('abode','abide','ed'),
530 ('abought','aby','ed'),
531 ('abye','aby',''),
532 ('abyes','aby','s'),
533 ('acquitted','acquit','ed'),
534 ('acquitting','acquit','ing'),
535 ('addrest','address','ed'),
536 ('ageing','age','ing'),
537 ('agreed','agree','ed'),
538 ('am','be',''),
539 ('anted','ante','ed'),
540 ('anteed','ante','ed'),
541 ('anteing','ante','ing'),
542 ('antes','ante','s'),
543 ('arced','arc','ed'),
544 ('arcing','arc','ing'),
545 ('arcked','arc','ed'),
546 ('arcking','arc','ing'),
547 ('are','be',''),
548 ('arisen','arise','en'),
549 ('arose','arise','ed'),
550 ('ate','eat','ed'),
551 ('awoke','awake','ed'),
552 ('awoken','awake','en'),
553 ('baby-sat','baby-sit','ed'),
554 ('back-pedaled','back-pedal','ed'),
555 ('back-pedaling','back-pedal','ing'),
556 ('backbit','backbite','ed'),
557 ('backbiting','backbite','ing'),
558 ('backbitten','backbite','en'),
559 ('backslid','backslide','ed'),
560 ('backslidden','backslide','en'),
561 ('bade','bid','ed'),
562 ('bandieds','bandy','s'),
563 ('banqueted','banquet','ed'),
564 ('banqueting','banquet','ing'),
565 ('barreled','barrel','ed'),
566 ('barreling','barrel','ing'),
567 ('bastinadoed','bastinado','ed'),
568 ('beaten','beat','en'),
569 ('became','become','ed'),
570 ('bedeviled','bedevil','ed'),
571 ('bedeviling','bedevil','ing'),
572 ('been','be','en'),
573 ('befallen','befall','en'),
574 ('befalling','befall','ing'),
575 ('befell','befall','ed'),
576 ('began','begin','ed'),
577 ('begat','beget','ed'),
578 ('begirt','begird','ed'),
579 ('begot','beget','ed'),
580 ('begotten','beget','en'),
581 ('beguiled','beguile','ed'),
582 ('beguiling','beguile','ing'),
583 ('begun','begin','en'),
584 ('beheld','behold','ed'),
585 ('beholden','behold','en'),
586 ('bejeweled','bejewel','ed'),
587 ('bejeweling','bejewel','ing'),
588 ('belied','belie','ed'),
589 ('belies','belie','s'),
590 ('belying','belie','ing'),
591 ('benempt','bename','ed'),
592 ('bent','bend','ed'),
593 ('besought','beseech','ed'),
594 ('bespoke','bespeak','ed'),
595 ('bespoken','bespeak','en'),
596 ('bestrewn','bestrew','en'),
597 ('bestrid','bestride','ed'),
598 ('bestridden','bestride','en'),
599 ('bestrode','bestride','ed'),
600 ('betaken','betake','en'),
601 ('bethought','bethink','ed'),
602 ('betook','betake','ed'),
603 ('beveled','bevel','ed'),
604 ('beveling','bevel','ing'),
605 ('biased','bias','ed'),
606 ('biases','bias','s'),
607 ('biasing','bias','ing'),
608 ('biassed','bias','ed'),
609 ('biassing','bias','ing'),
610 ('bidden','bid','en'),
611 ('bit','bite','ed'),
612 ('biting','bite','ing'),
613 ('bitten','bite','en'),
614 ('bivouacked','bivouac','ed'),
615 ('bivouacking','bivouac','ing'),
616 ('bled','bleed','ed'),
617 ('blest','bless','ed'),
618 ('blew','blow','ed'),
619 ('blown','blow','en'),
620 ('blue-pencils','blue-pencil','s'),
621 ('bogged-down','bog-down','ed'),
622 ('bogging-down','bog-down','ing'),
623 ('bogs-down','bog-down','s'),
624 ('boogied','boogie','ed'),
625 ('boogies','boogie','s'),
626 ('bore','bear','ed'),
627 ('born','bear','en'),
628 ('borne','bear','en'),
629 ('bottle-fed','bottle-feed','ed'),
630 ('bought','buy','ed'),
631 ('bound','bind','ed'),
632 ('breast-fed','breast-feed','ed'),
633 ('bred','breed','ed'),
634 ('breid','brei','ed'),
635 ('bringing','bring','ing'),
636 ('broke','break','ed'),
637 ('broken','break','en'),
638 ('brought','bring','ed'),
639 ('browbeaten','browbeat','en'),
640 ('buckramed','buckram','ed'),
641 ('buckraming','buckram','ing'),
642 ('built','build','ed'),
643 ('buncoed','bunco','ed'),
644 ('bunkoed','bunko','ed'),
645 ('burnt','burn','ed'),
646 ('busheled','bushel','ed'),
647 ('busheling','bushel','ing'),
648 ('bypast','bypass','ed'),
649 ('came','come','ed'),
650 ('canaled','canal','ed'),
651 ('canaling','canal','ing'),
652 ('canceled','cancel','ed'),
653 ('canceling','cancel','ing'),
654 ('carbonadoed','carbonado','ed'),
655 ('caroled','carol','ed'),
656 ('caroling','carol','ing'),
657 ('caught','catch','ed'),
658 ('caviled','cavil','ed'),
659 ('caviling','cavil','ing'),
660 ('cbeled','cbel','ed'),
661 ('cbeling','cbel','ing'),
662 ('cbelled','cbel','ed'),
663 ('cbelling','cbel','ing'),
664 ('channeled','channel','ed'),
665 ('channeling','channel','ing'),
666 ('chassed','chasse','ed'),
667 ('chasseing','chasse','ing'),
668 ('chasses','chasse','s'),
669 ('chevied','chivy','ed'),
670 ('chevies','chivy','s'),
671 ('chevying','chivy','ing'),
672 ('chid','chide','ed'),
673 ('chidden','chide','en'),
674 ('chiseled','chisel','ed'),
675 ('chiseling','chisel','ing'),
676 ('chivvied','chivy','ed'),
677 ('chivvies','chivy','s'),
678 ('chivvying','chivy','ing'),
679 ('chose','choose','ed'),
680 ('chosen','choose','en'),
681 ('clad','clothe','ed'),
682 ('cleft','cleave','ed'),
683 ('cleped','clepe','ed'),
684 ('cleping','clepe','ing'),
685 ('clept','clepe','ed'),
686 ('clinging','cling','ing'),
687 ('clothed','clothe','ed'),
688 ('clothes','clothe','s'),
689 ('clothing','clothe','ing'),
690 ('clove','cleave','ed'),
691 ('cloven','cleave','en'),
692 ('clung','cling','ed'),
693 ('co-opted','coopt','ed'),
694 ('co-opting','coopt','ing'),
695 ('co-opts','coopts','s'),
696 ('co-ordinate','coordinate',''),
697 ('co-ordinated','coordinate','ed'),
698 ('co-ordinates','coordinate','s'),
699 ('co-ordinating','coordinate','ing'),
700 ('coiffed','coif','ed'),
701 ('coiffing','coif','ing'),
702 ('combated','combat','ed'),
703 ('combating','combat','ing'),
704 ('concertinaed','concertina','ed'),
705 ('concertinaing','concertina','ing'),
706 ('congaed','conga','ed'),
707 ('congaing','conga','ing'),
708 ('contangoed','contango','ed'),
709 ('cooeed','cooee','ed'),
710 ('cooees','cooee','s'),
711 ('coquetted','coquet','ed'),
712 ('coquetting','coquet','ing'),
713 ('counseled','counsel','ed'),
714 ('counseling','counsel','ing'),
715 ('countersank','countersink','ed'),
716 ('countersunk','countersink','en'),
717 ('court-martialled','court-martial','ed'),
718 ('court-martialling','court-martial','ing'),
719 ('crept','creep','ed'),
720 ('crescendoed','crescendo','ed'),
721 ('croqueted','croquet','ed'),
722 ('croqueting','croquet','ing'),
723 ('crossbred','crossbreed','ed'),
724 ('cudgeled','cudgel','ed'),
725 ('cudgeling','cudgel','ing'),
726 ('cupeled','cupel','ed'),
727 ('cupeling','cupel','ing'),
728 ('curettes','curet','s'),
729 ('curst','curse','ed'),
730 ('dealt','deal','ed'),
731 ('debussed','debus','ed'),
732 ('debusses','debus','s'),
733 ('debussing','debus','ing'),
734 ('decreed','decree','ed'),
735 ('deep-freeze','deepfreeze',''),
736 ('deep-freezed','deepfreeze','ed'),
737 ('deep-freezes','deepfreeze','s'),
738 ('deep-frozen','deepfreeze','en'),
739 ('degases','degas','s'),
740 ('degassed','degas','ed'),
741 ('degasses','degas','s'),
742 ('degassing','degas','ing'),
743 ('deleing','dele','ing'),
744 ('deviled','devil','ed'),
745 ('deviling','devil','ing'),
746 ('diagramed','diagram','ed'),
747 ('diagraming','diagram','ing'),
748 ('dialled','dial','ed'),
749 ('dialling','dial','ing'),
750 ('did','do','ed'),
751 ('disagreed','disagree','ed'),
752 ('disemboweled','disembowel','ed'),
753 ('disemboweling','disembowel','ing'),
754 ('disenthralls','disenthral','s'),
755 ('disenthrals','disenthrall','s'),
756 ('disheveled','dishevel','ed'),
757 ('disheveling','dishevel','ing'),
758 ('dittoed','ditto','ed'),
759 ('done','do','en'),
760 ('dought','dow','ed'),
761 ('dove','dive','ed'),
762 ('drank','drink','ed'),
763 ('drawn','draw','en'),
764 ('dreamt','dream','ed'),
765 ('dreed','dree','ed'),
766 ('drew','draw','ed'),
767 ('driveled','drivel','ed'),
768 ('driveling','drivel','ing'),
769 ('driven','drive','en'),
770 ('drove','drive','ed'),
771 ('drunk','drink','en'),
772 ('duelled','duel','ed'),
773 ('duelling','duel','ing'),
774 ('dug','dig','ed'),
775 ('dwelt','dwell','ed'),
776 ('eaten','eat','en'),
777 ('echoed','echo','ed'),
778 ('embargoed','embargo','ed'),
779 ('embussed','embus','ed'),
780 ('embusses','embus','s'),
781 ('embussing','embus','ing'),
782 ('emceed','emcee','ed'),
783 ('empaneled','empanel','ed'),
784 ('empaneling','empanel','ing'),
785 ('enameled','enamel','ed'),
786 ('enameling','enamel','ing'),
787 ('enwound','enwind','ed'),
788 ('equaled','equal','ed'),
789 ('equaling','equal','ing'),
790 ('equalled','equal','ed'),
791 ('equalling','equal','ing'),
792 ('equipped','equip','ed'),
793 ('equipping','equip','ing'),
794 ('eying','eye','ing'),
795 ('facsimileing','facsimile','ing'),
796 ('fallen','fall','en'),
797 ('fed','feed','ed'),
798 ('fell','fall','ed'),
799 ('felt','feel','ed'),
800 ('filagreed','filagree','ed'),
801 ('filigreed','filigree','ed'),
802 ('fillagreed','fillagree','ed'),
803 ('fine-drawn','fine-draw','en'),
804 ('fine-drew','fine-draw','ed'),
805 ('flanneled','flannel','ed'),
806 ('flanneling','flannel','ing'),
807 ('fled','flee','ed'),
808 ('flew','fly','ed'),
809 ('flinging','fling','ing'),
810 ('floodlit','floodlight','ed'),
811 ('flown','fly','en'),
812 ('flung','fling','ed'),
813 ('flyblew','flyblow','ed'),
814 ('flyblown','flyblow','en'),
815 ('forbad','forbid','ed'),
816 ('forbade','forbid','ed'),
817 ('forbidden','forbid','en'),
818 ('forbore','forbear','ed'),
819 ('forborne','forbear','en'),
820 ('force-fed','force-feed','ed'),
821 ('fordid','fordo','ed'),
822 ('fordone','fordo','en'),
823 ('foredid','foredo','ed'),
824 ('foredone','foredo','en'),
825 ('foregone','forego','en'),
826 ('foreknew','foreknow','ed'),
827 ('foreknown','foreknow','en'),
828 ('foreran','forerun','ed'),
829 ('foresaw','foresee','ed'),
830 ('foreseen','foresee','en'),
831 ('foreshown','foreshow','en'),
832 ('forespoke','forespeak','ed'),
833 ('forespoken','forespeak','en'),
834 ('foretelling','foretell','ing'),
835 ('foretold','foretell','ed'),
836 ('forewent','forego','ed'),
837 ('forgave','forgive','ed'),
838 ('forgiven','forgive','en'),
839 ('forgone','forgo','en'),
840 ('forgot','forget','ed'),
841 ('forgotten','forget','en'),
842 ('forsaken','forsake','en'),
843 ('forsook','forsake','ed'),
844 ('forspoke','forspeak','ed'),
845 ('forspoken','forspeak','en'),
846 ('forswore','forswear','ed'),
847 ('forsworn','forswear','en'),
848 ('forwent','forgo','ed'),
849 ('fought','fight','ed'),
850 ('found','find','ed'),
851 ('freed','free','ed'),
852 ('fricasseed','fricassee','ed'),
853 ('frivoled','frivol','ed'),
854 ('frivoling','frivol','ing'),
855 ('frolicked','frolic','ed'),
856 ('frolicking','frolic','ing'),
857 ('froze','freeze','ed'),
858 ('frozen','freeze','en'),
859 ('fuelled','fuel','ed'),
860 ('fuelling','fuel','ing'),
861 ('funneled','funnel','ed'),
862 ('funneling','funnel','ing'),
863 ('gainsaid','gainsay','ed'),
864 ('gamboled','gambol','ed'),
865 ('gamboling','gambol','ing'),
866 ('gan','gin','en'),
867 ('garnisheed','garnishee','ed'),
868 ('gases','gas','s'),
869 ('gassed','gas','ed'),
870 ('gasses','gas','s'),
871 ('gassing','gas','ing'),
872 ('gave','give','ed'),
873 ('geed','gee','ed'),
874 ('gelled','gel','ed'),
875 ('gelling','gel','ing'),
876 ('gelt','geld','ed'),
877 ('genned-up','gen-up','ed'),
878 ('genning-up','gen-up','ing'),
879 ('gens-up','gen-up','s'),
880 ('ghostwriting','ghostwrite','ing'),
881 ('ghostwritten','ghostwrite','en'),
882 ('ghostwrote','ghostwrite','ed'),
883 ('gilt','gild','ed'),
884 ('girt','gird','ed'),
885 ('given','give','en'),
886 ('glaceed','glace','ed'),
887 ('glaceing','glace','ing'),
888 ('gnawn','gnaw','en'),
889 ('gone','go','en'),
890 ('got','get','ed'),
891 ('gotten','get','en'),
892 ('graveled','gravel','ed'),
893 ('graveling','gravel','ing'),
894 ('graven','grave','en'),
895 ('greed','gree','ed'),
896 ('grew','grow','ed'),
897 ('gript','grip','ed'),
898 ('ground','grind','ed'),
899 ('groveled','grovel','ed'),
900 ('groveling','grovel','ing'),
901 ('grown','grow','en'),
902 ('guaranteed','guarantee','ed'),
903 ('gumshoes','gumshoe','s'),
904 ('gypped','gyp','ed'),
905 ('gypping','gyp','ing'),
906 ('hacksawn','hacksaw','en'),
907 ('had','have','ed'),
908 ('halloed','hallo','ed'),
909 ('haloed','halo','ed'),
910 ('hamstringing','hamstring','ing'),
911 ('hamstrung','hamstring','ed'),
912 ('handfed','handfeed','ed'),
913 ('hanseled','hansel','ed'),
914 ('hanseling','hansel','ing'),
915 ('has','have','s'),
916 ('hatcheled','hatchel','ed'),
917 ('hatcheling','hatchel','ing'),
918 ('heard','hear','ed'),
919 ('held','hold','ed'),
920 ('hewn','hew','en'),
921 ('hid','hide','ed'),
922 ('hidden','hide','en'),
923 ('hocus-pocussed','hocus-pocus','ed'),
924 ('hocus-pocussing','hocus-pocus','ing'),
925 ('hocussed','hocus','ed'),
926 ('hocussing','hocus','ing'),
927 ('hoes','hoe','s'),
928 ('hogtied','hogtie','ed'),
929 ('hogties','hogtie','s'),
930 ('hogtying','hogtie','ing'),
931 ('honied','honey','ed'),
932 ('horseshoes','horseshoe','s'),
933 ('houseled','housel','ed'),
934 ('houseling','housel','ing'),
935 ('hove','heave','ed'),
936 ('hoveled','hovel','ed'),
937 ('hoveling','hovel','ing'),
938 ('hung','hang','ed'),
939 ('impaneled','impanel','ed'),
940 ('impaneling','impanel','ing'),
941 ('impanells','impanel','s'),
942 ('inbred','inbreed','ed'),
943 ('indwelling','indwell','ing'),
944 ('indwelt','indwell','ed'),
945 ('initialled','initial','ed'),
946 ('initialling','initial','ing'),
947 ('inlaid','inlay','ed'),
948 ('interbred','interbreed','ed'),
949 ('interlaid','interlay','ed'),
950 ('interpled','interplead','ed'),
951 ('interwove','interweave','ed'),
952 ('interwoven','interweave','en'),
953 ('inwove','inweave','ed'),
954 ('inwoven','inweave','en'),
955 ('is','be','s'),
956 ('jerry-built','jerry-build','ed'),
957 ('jeweled','jewel','ed'),
958 ('jeweling','jewel','ing'),
959 ('joint','join','ed'),
960 ('joy-ridden','joy-ride','en'),
961 ('joy-rode','joy-ride','ed'),
962 ('kenneled','kennel','ed'),
963 ('kenneling','kennel','ing'),
964 ('kent','ken','ed'),
965 ('kept','keep','ed'),
966 ('kerneled','kernel','ed'),
967 ('kerneling','kernel','ing'),
968 ('kneed','knee','ed'),
969 ('knelt','kneel','ed'),
970 ('knew','know','ed'),
971 ('known','know','en'),
972 ("ko'd",'ko','ed'),
973 ("ko'ing",'ko','ing'),
974 ("ko's",'ko','s'),
975 ('labeled','label','ed'),
976 ('labeling','label','ing'),
977 ('laden','lade','en'),
978 ('ladyfied','ladify','ed'),
979 ('ladyfies','ladify','s'),
980 ('ladyfying','ladify','ing'),
981 ('laid','lay','ed'),
982 ('lain','lie','en'),
983 ('lassoed','lasso','ed'),
984 ('laureled','laurel','ed'),
985 ('laureling','laurel','ing'),
986 ('leant','lean','ed'),
987 ('leapt','leap','ed'),
988 ('learnt','learn','ed'),
989 ('led','lead','ed'),
990 ('left','leave','ed'),
991 ('lent','lend','ed'),
992 ('leveled','level','ed'),
993 ('leveling','level','ing'),
994 ('libeled','libel','ed'),
995 ('libeling','libel','ing'),
996 ('lit','light','ed'),
997 ('lost','lose','ed'),
998 ('made','make','ed'),
999 ('marshaled','marshal','ed'),
1000 ('marshaling','marshal','ing'),
1001 ('marveled','marvel','ed'),
1002 ('marveling','marvel','ing'),
1003 ('meant','mean','ed'),
1004 ('medaled','medal','ed'),
1005 ('medaling','medal','ing'),
1006 ('met','meet','ed'),
1007 ('metaled','metal','ed'),
1008 ('metaling','metal','ing'),
1009 ('might','may',''),
1010 ('mimicked','mimic','ed'),
1011 ('mimicking','mimic','ing'),
1012 ('misbecame','misbecome','ed'),
1013 ('misdealt','misdeal','ed'),
1014 ('misgave','misgive','ed'),
1015 ('misgiven','misgive','en'),
1016 ('misheard','mishear','ed'),
1017 ('mislaid','mislay','ed'),
1018 ('misled','mislead','ed'),
1019 ('mispled','misplead','ed'),
1020 ('misspelled','misspell','ed'),
1021 ('misspelling','misspell','ing'),
1022 ('misspelt','misspell','ed'),
1023 ('misspent','misspend','ed'),
1024 ('mistaken','mistake','en'),
1025 ('mistook','mistake','ed'),
1026 ('misunderstood','misunderstand','ed'),
1027 ('modeled','model','ed'),
1028 ('modeling','model','ing'),
1029 ('molten','melt','en'),
1030 ('mown','mow','en'),
1031 ('nickeled','nickel','ed'),
1032 ('nickeling','nickel','ing'),
1033 ('nielloed','niello','ed'),
1034 ('non-prossed','non-pros','ed'),
1035 ('non-prosses','non-pros','s'),
1036 ('non-prossing','non-pros','ing'),
1037 ('nonplussed','nonplus','ed'),
1038 ('nonplusses','nonplus','s'),
1039 ('nonplussing','nonplus','ing'),
1040 ('outbidden','outbid','en'),
1041 ('outbred','outbreed','ed'),
1042 ('outdid','outdo','ed'),
1043 ('outdone','outdo','en'),
1044 ('outgassed','outgas','ed'),
1045 ('outgasses','outgas','s'),
1046 ('outgassing','outgas','ing'),
1047 ('outgeneraled','outgeneral','ed'),
1048 ('outgeneraling','outgeneral','ing'),
1049 ('outgone','outgo','en'),
1050 ('outgrew','outgrow','ed'),
1051 ('outgrown','outgrow','en'),
1052 ('outlaid','outlay','ed'),
1053 ('outran','outrun','ed'),
1054 ('outridden','outride','en'),
1055 ('outrode','outride','ed'),
1056 ('outselling','outsell','ing'),
1057 ('outshone','outshine','ed'),
1058 ('outshot','outshoot','ed'),
1059 ('outsold','outsell','ed'),
1060 ('outstood','outstand','ed'),
1061 ('outthought','outthink','ed'),
1062 ('outwent','outgo','ed'),
1063 ('outwore','outwear','ed'),
1064 ('outworn','outwear','en'),
1065 ('overbidden','overbid','en'),
1066 ('overblew','overblow','ed'),
1067 ('overblown','overblow','en'),
1068 ('overbore','overbear','ed'),
1069 ('overborne','overbear','en'),
1070 ('overbuilt','overbuild','ed'),
1071 ('overcame','overcome','ed'),
1072 ('overdid','overdo','ed'),
1073 ('overdone','overdo','en'),
1074 ('overdrawn','overdraw','en'),
1075 ('overdrew','overdraw','ed'),
1076 ('overdriven','overdrive','en'),
1077 ('overdrove','overdrive','ed'),
1078 ('overflew','overfly','ed'),
1079 ('overgrew','overgrow','ed'),
1080 ('overgrown','overgrow','en'),
1081 ('overhanging','overhang','ing'),
1082 ('overheard','overhear','ed'),
1083 ('overhung','overhang','ed'),
1084 ('overlaid','overlay','ed'),
1085 ('overlain','overlie','en'),
1086 ('overlies','overlie','s'),
1087 ('overlying','overlie','ing'),
1088 ('overpaid','overpay','ed'),
1089 ('overpast','overpass','ed'),
1090 ('overran','overrun','ed'),
1091 ('overridden','override','en'),
1092 ('overrode','override','ed'),
1093 ('oversaw','oversee','ed'),
1094 ('overseen','oversee','en'),
1095 ('overselling','oversell','ing'),
1096 ('oversewn','oversew','en'),
1097 ('overshot','overshoot','ed'),
1098 ('overslept','oversleep','ed'),
1099 ('oversold','oversell','ed'),
1100 ('overspent','overspend','ed'),
1101 ('overspilled','overspill','ed'),
1102 ('overspilling','overspill','ing'),
1103 ('overspilt','overspill','ed'),
1104 ('overtaken','overtake','en'),
1105 ('overthrew','overthrow','ed'),
1106 ('overthrown','overthrow','en'),
1107 ('overtook','overtake','ed'),
1108 ('overwound','overwind','ed'),
1109 ('overwriting','overwrite','ing'),
1110 ('overwritten','overwrite','en'),
1111 ('overwrote','overwrite','ed'),
1112 ('paid','pay','ed'),
1113 ('palled','pal','ed'),
1114 ('palling','pal','ing'),
1115 ('paneled','panel','ed'),
1116 ('paneling','panel','ing'),
1117 ('panicked','panic','ed'),
1118 ('panicking','panic','ing'),
1119 ('paralleled','parallel','ed'),
1120 ('paralleling','parallel','ing'),
1121 ('parceled','parcel','ed'),
1122 ('parceling','parcel','ing'),
1123 ('partaken','partake','en'),
1124 ('partook','partake','ed'),
1125 ('pasquil','pasquinade',''),
1126 ('pasquilled','pasquinade','ed'),
1127 ('pasquilling','pasquinade','ing'),
1128 ('pasquils','pasquinade','s'),
1129 ('pedaled','pedal','ed'),
1130 ('pedaling','pedal','ing'),
1131 ('peed','pee','ed'),
1132 ('penciled','pencil','ed'),
1133 ('penciling','pencil','ing'),
1134 ('pent','pen','ed'),
1135 ('physicked','physic','ed'),
1136 ('physicking','physic','ing'),
1137 ('picnicked','picnic','ed'),
1138 ('picnicking','picnic','ing'),
1139 ('pistoled','pistol','ed'),
1140 ('pistoling','pistol','ing'),
1141 ('pled','plead','ed'),
1142 ('polkaed','polka','ed'),
1143 ('polkaing','polka','ing'),
1144 ('pommeled','pommel','ed'),
1145 ('pommeling','pommel','ing'),
1146 ('precanceled','precancel','ed'),
1147 ('precanceling','precancel','ing'),
1148 ('prepaid','prepay','ed'),
1149 ('programmes','program','s'),
1150 ('prologed','prologue','ed'),
1151 ('prologing','prologue','ing'),
1152 ('prologs','prologue','s'),
1153 ('proven','prove','en'),
1154 ('pummeled','pummel','ed'),
1155 ('pummeling','pummel','ing'),
1156 ('pureed','puree','ed'),
1157 ('quarreled','quarrel','ed'),
1158 ('quarreling','quarrel','ing'),
1159 ('quartersawn','quartersaw','en'),
1160 ('queued','queue','ed'),
1161 ('queues','queue','s'),
1162 ('queuing','queue','ing'),
1163 ('quick-froze','quick-freeze','ed'),
1164 ('quick-frozen','quick-freeze','en'),
1165 ('quipped','quip','ed'),
1166 ('quipping','quip','ing'),
1167 ('quitted','quit','ed'),
1168 ('quitting','quit','ing'),
1169 ('quizzed','quiz','ed'),
1170 ('quizzes','quiz','s'),
1171 ('quizzing','quiz','ing'),
1172 ('ran','run','ed'),
1173 ('rang','ring','ed'),
1174 ('raoed','radio','ed'),
1175 ('rarefied','rarefy','ed'),
1176 ('rarefies','rarefy','s'),
1177 ('rarefying','rarefy','ing'),
1178 ('raveled','ravel','ed'),
1179 ('raveling','ravel','ing'),
1180 ('razeed','razee','ed'),
1181 ('re-trod','re-tread','ed'),
1182 ('re-trodden','re-tread','en'),
1183 ('rebuilt','rebuild','ed'),
1184 ('recced','recce','ed'),
1185 ('recceed','recce','ed'),
1186 ('recceing','recce','ing'),
1187 ('red','red','ed'),
1188 ('red-penciled','red-pencil','ed'),
1189 ('red-penciling','red-pencil','ing'),
1190 ('red-pencils','red-pencil','s'),
1191 ('redid','redo','ed'),
1192 ('redone','redo','en'),
1193 ('refereed','referee','ed'),
1194 ('reft','reave','ed'),
1195 ('refuelled','refuel','ed'),
1196 ('refuelling','refuel','ing'),
1197 ('remade','remake','ed'),
1198 ('rent','rend','ed'),
1199 ('repaid','repay','ed'),
1200 ('reran','rerun','ed'),
1201 ('resat','resit','ed'),
1202 ('retaken','retake','en'),
1203 ('rethought','rethink','ed'),
1204 ('retook','retake','ed'),
1205 ('reveled','revel','ed'),
1206 ('reveling','revel','ing'),
1207 ('rewound','rewind','ed'),
1208 ('rewriting','rewrite','ing'),
1209 ('rewritten','rewrite','en'),
1210 ('rewrote','rewrite','ed'),
1211 ('ridden','ride','en'),
1212 ('risen','rise','en'),
1213 ('rivaled','rival','ed'),
1214 ('rivaling','rival','ing'),
1215 ('riven','rive','en'),
1216 ('rode','ride','ed'),
1217 ('roqueted','roquet','ed'),
1218 ('roqueting','roquet','ing'),
1219 ('rose','rise','ed'),
1220 ('rough-hewn','rough-hew','en'),
1221 ('rove','reeve','ed'),
1222 ('roweled','rowel','ed'),
1223 ('roweling','rowel','ing'),
1224 ('rung','ring','ing'),
1225 ('said','say','ed'),
1226 ('sambaed','samba','ed'),
1227 ('sambaing','samba','ing'),
1228 ('sang','sing','ed'),
1229 ('sank','sink','ed'),
1230 ('sat','sit','ed'),
1231 ('sauteed','saute','ed'),
1232 ('sauteing','saute','ing'),
1233 ('saw','see','ed'),
1234 ('sawn','saw','en'),
1235 ('seen','see','en'),
1236 ('sent','send','ed'),
1237 ('sewn','sew','en'),
1238 ('shaken','shake','en'),
1239 ('shaven','shave','en'),
1240 ('shed','shed','ed'),
1241 ('shellacked','shellac','ed'),
1242 ('shellacking','shellac','ing'),
1243 ('shent','shend','ed'),
1244 ('shewn','shew','en'),
1245 ('shod','shoe','ed'),
1246 ('shoes','shoe','s'),
1247 ('shone','shine','ed'),
1248 ('shook','shake','ed'),
1249 ('shot','shoot','ed'),
1250 ('shoveled','shovel','ed'),
1251 ('shoveling','shovel','ing'),
1252 ('shown','show','en'),
1253 ('shrank','shrink','ed'),
1254 ('shriveled','shrivel','ed'),
1255 ('shriveling','shrivel','ing'),
1256 ('shriven','shrive','en'),
1257 ('shrove','shrive','ed'),
1258 ('shrunk','shrink','en'),
1259 ('shrunken','shrink','en'),
1260 ('sicked','sic','ed'),
1261 ('sicking','sic','ing'),
1262 ('sightsaw','sightsee','ed'),
1263 ('sightseen','sightsee','en'),
1264 ('signaled','signal','ed'),
1265 ('signaling','signal','ing'),
1266 ("ski'd",'ski','ed'),
1267 ('skied','ski','ed'),
1268 ('skiing','ski','ing'),
1269 ('skydove','skydive','ed'),
1270 ('slain','slay','en'),
1271 ('slept','sleep','ed'),
1272 ('slew','slay','ed'),
1273 ('slid','slide','ed'),
1274 ('slidden','slide','en'),
1275 ('slinging','sling','ing'),
1276 ('slung','sling','ed'),
1277 ('slunk','slink','ed'),
1278 ('smelt','smell','ed'),
1279 ('smit','smite','ed'),
1280 ('smiting','smite','ing'),
1281 ('smitten','smite','en'),
1282 ('smote','smite','ed'),
1283 ('snafued','snafu','ed'),
1284 ('snafues','snafu','s'),
1285 ('snafuing','snafu','ing'),
1286 ('sniveled','snivel','ed'),
1287 ('sniveling','snivel','ing'),
1288 ('snowshoes','snowshoe','s'),
1289 ('soft-pedaled','soft-pedal','ed'),
1290 ('soft-pedaling','soft-pedal','ing'),
1291 ('sol-faed','sol-fa','ed'),
1292 ('sol-faing','sol-fa','ing'),
1293 ('sold','sell','ed'),
1294 ('soothsaid','soothsay','ed'),
1295 ('sortied','sortie','ed'),
1296 ('sorties','sortie','s'),
1297 ('sought','seek','ed'),
1298 ('sown','sow','en'),
1299 ('spanceled','spancel','ed'),
1300 ('spanceling','spancel','ing'),
1301 ('spat','spit','ed'),
1302 ('sped','speed','ed'),
1303 ('spellbound','spellbind','ed'),
1304 ('spelt','spell','ed'),
1305 ('spent','spend','ed'),
1306 ('spilt','spill','ed'),
1307 ('spiraled','spiral','ed'),
1308 ('spiraling','spiral','ing'),
1309 ('spoilt','spoil','ed'),
1310 ('spoke','speak','ed'),
1311 ('spoken','speak','en'),
1312 ('spoon-fed','spoon-feed','ed'),
1313 ('spotlit','spotlight','ed'),
1314 ('sprang','spring','ed'),
1315 ('springing','spring','ing'),
1316 ('sprung','spring','en'),
1317 ('spun','spin','ed'),
1318 ('squatted','squat','ed'),
1319 ('squatting','squat','ing'),
1320 ('squeegeed','squeegee','ed'),
1321 ('squibbed','squib','ed'),
1322 ('squibbing','squib','ing'),
1323 ('squidded','squid','ed'),
1324 ('squidding','squid','ing'),
1325 ('squilgee','squeegee',''),
1326 ('stall-fed','stall-feed','ed'),
1327 ('stank','stink','ed'),
1328 ('stenciled','stencil','ed'),
1329 ('stenciling','stencil','ing'),
1330 ('stilettoed','stiletto','ed'),
1331 ('stilettoeing','stiletto','ing'),
1332 ('stinging','sting','ing'),
1333 ('stole','steal','ed'),
1334 ('stolen','steal','en'),
1335 ('stood','stand','ed'),
1336 ('stove','stave','ed'),
1337 ('strewn','strew','en'),
1338 ('stridden','stride','en'),
1339 ('stringing','string','ing'),
1340 ('striven','strive','en'),
1341 ('strode','stride','ed'),
1342 ('strove','strive','ed'),
1343 ('strown','strow','en'),
1344 ('struck','strike','ed'),
1345 ('strung','string','ed'),
1346 ('stuccoed','stucco','ed'),
1347 ('stuck','stick','ed'),
1348 ('stung','sting','ed'),
1349 ('stunk','stink','en'),
1350 ('stymied','stymie','ed'),
1351 ('stymies','stymie','s'),
1352 ('stymying','stymie','ing'),
1353 ('subpoenaed','subpoena','ed'),
1354 ('subpoenaing','subpoena','ing'),
1355 ('subtotaled','subtotal','ed'),
1356 ('subtotaling','subtotal','ing'),
1357 ('sung','sing','en'),
1358 ('sunk','sink','en'),
1359 ('sunken','sink','en'),
1360 ('swam','swim','ed'),
1361 ('swept','sweep','ed'),
1362 ('swinging','swing','ing'),
1363 ('swiveled','swivel','ed'),
1364 ('swiveling','swivel','ing'),
1365 ('swollen','swell','en'),
1366 ('swopped','swap','ed'),
1367 ('swopping','swap','ing'),
1368 ('swops','swap','s'),
1369 ('swore','swear','ed'),
1370 ('sworn','swear','en'),
1371 ('swum','swim','en'),
1372 ('swung','swing','ed'),
1373 ('symboled','symbol','ed'),
1374 ('symboling','symbol','ing'),
1375 ('symbolled','symbol','ed'),
1376 ('symbolling','symbol','ing'),
1377 ('taken','take','en'),
1378 ('talced','talc','ed'),
1379 ('talcing','talc','ing'),
1380 ('talcked','talc','ed'),
1381 ('talcking','talc','ing'),
1382 ("tally-ho'd",'tally-ho','ed'),
1383 ('tally-hoed','tally-ho','ed'),
1384 ('tangoed','tango','ed'),
1385 ('tasseled','tassel','ed'),
1386 ('tasseling','tassel','ing'),
1387 ('taught','teach','ed'),
1388 ('taxied','taxi','ed'),
1389 ('taxies','taxi','s'),
1390 ('taxiing','taxi','ing'),
1391 ('taxying','taxi','ing'),
1392 ('te-heed','te-hee','ed'),
1393 ('teed','tee','ed'),
1394 ('thought','think','ed'),
1395 ('threw','throw','ed'),
1396 ('thriven','thrive','en'),
1397 ('throve','thrive','ed'),
1398 ('thrown','throw','en'),
1399 ('tinged','tinge','ed'),
1400 ('tingeing','tinge','ing'),
1401 ('tinging','tinge','ing'),
1402 ('tinseled','tinsel','ed'),
1403 ('tinseling','tinsel','ing'),
1404 ('tiptoes','tiptoe','s'),
1405 ('toes','toe','s'),
1406 ('told','tell','ed'),
1407 ('took','take','ed'),
1408 ('tore','tear','ed'),
1409 ('torn','tear','en'),
1410 ('torrify','torrefy',''),
1411 ('totaled','total','ed'),
1412 ('totaling','total','ing'),
1413 ('toweled','towel','ed'),
1414 ('toweling','towel','ing'),
1415 ('trafficked','traffic','ed'),
1416 ('trafficking','traffic','ing'),
1417 ('trameled','trammel','ed'),
1418 ('trameling','trammel','ing'),
1419 ('tramelled','trammel','ed'),
1420 ('tramelling','trammel','ing'),
1421 ('tramels','trammel','s'),
1422 ('transfixt','transfix','ed'),
1423 ('tranship','transship','ed'),
1424 ('traveled','travel','ed'),
1425 ('traveling','travel','ing'),
1426 ('trod','tread','ed'),
1427 ('trodden','tread','en'),
1428 ('troweled','trowel','ed'),
1429 ('troweling','trowel','ing'),
1430 ('tunneled','tunnel','ed'),
1431 ('tunneling','tunnel','ing'),
1432 ('typewriting','typewrite','ing'),
1433 ('typewritten','typewrite','en'),
1434 ('typewrote','typewrite','ed'),
1435 ('unbent','unbend','ed'),
1436 ('unbound','unbind','ed'),
1437 ('unclad','unclothe','ed'),
1438 ('unclothed','unclothe','ed'),
1439 ('unclothes','unclothe','s'),
1440 ('unclothing','unclothe','ing'),
1441 ('underbought','underbuy','ed'),
1442 ('underfed','underfeed','ed'),
1443 ('undergirt','undergird','ed'),
1444 ('undergone','undergo','en'),
1445 ('underlaid','underlay','ed'),
1446 ('underlain','underlie','en'),
1447 ('underlies','underlie','s'),
1448 ('underlying','underlie','ing'),
1449 ('underpaid','underpay','ed'),
1450 ('underselling','undersell','ing'),
1451 ('undershot','undershoot','ed'),
1452 ('undersold','undersell','ed'),
1453 ('understood','understand','ed'),
1454 ('undertaken','undertake','en'),
1455 ('undertook','undertake','ed'),
1456 ('underwent','undergo','ed'),
1457 ('underwriting','underwrite','ing'),
1458 ('underwritten','underwrite','en'),
1459 ('underwrote','underwrite','ed'),
1460 ('undid','undo','ed'),
1461 ('undone','undo','en'),
1462 ('unfroze','unfreeze','ed'),
1463 ('unfrozen','unfreeze','en'),
1464 ('unkenneled','unkennel','ed'),
1465 ('unkenneling','unkennel','ing'),
1466 ('unlaid','unlay','ed'),
1467 ('unlearnt','unlearn','ed'),
1468 ('unmade','unmake','ed'),
1469 ('unraveled','unravel','ed'),
1470 ('unraveling','unravel','ing'),
1471 ('unrove','unreeve','ed'),
1472 ('unsaid','unsay','ed'),
1473 ('unslinging','unsling','ing'),
1474 ('unslung','unsling','ed'),
1475 ('unspoke','unspeak','ed'),
1476 ('unspoken','unspeak','en'),
1477 ('unstringing','unstring','ing'),
1478 ('unstrung','unstring','ed'),
1479 ('unstuck','unstick','ed'),
1480 ('unswore','unswear','ed'),
1481 ('unsworn','unswear','en'),
1482 ('untaught','unteach','ed'),
1483 ('unthought','unthink','ed'),
1484 ('untied','untie','ed'),
1485 ('unties','untie','s'),
1486 ('untying','untie','ing'),
1487 ('untrod','untread','ed'),
1488 ('untrodden','untread','en'),
1489 ('unwound','unwind','ed'),
1490 ('upbuilt','upbuild','ed'),
1491 ('upheld','uphold','ed'),
1492 ('uphove','upheave','ed'),
1493 ('upped','up','ed'),
1494 ('upping','up','ing'),
1495 ('uprisen','uprise','en'),
1496 ('uprose','uprise','ed'),
1497 ('upsprang','upspring','ed'),
1498 ('upspringing','upspring','ing'),
1499 ('upsprung','upspring','en'),
1500 ('upswelled','upswell','ed'),
1501 ('upswelling','upswell','ing'),
1502 ('upswept','upsweep','ed'),
1503 ('upswinging','upswing','ing'),
1504 ('upswollen','upswell','en'),
1505 ('upswung','upswing','ed'),
1506 ('vetoed','veto','ed'),
1507 ('victualled','victual','ed'),
1508 ('victualling','victual','ing'),
1509 ('visaed','visa','ed'),
1510 ('visaing','visa','ing'),
1511 ('vitriolled','vitriol','ed'),
1512 ('vitriolling','vitriol','ing'),
1513 ('vivaed','viva','ed'),
1514 ('vivaing','viva','ing'),
1515 ('was','be','ed'),
1516 ("water-ski'd",'water-ski','ed'),
1517 ('water-skied','water-ski','ed'),
1518 ('water-skiing','water-ski','ing'),
1519 ('waylaid','waylay','ed'),
1520 ('waylain','waylay','en'),
1521 ('went','go','ed'),
1522 ('wept','weep','ed'),
1523 ('were','be','ed'),
1524 ('whipsawn','whipsaw','en'),
1525 ('whizzed','whiz','ed'),
1526 ('whizzes','whiz','s'),
1527 ('whizzing','whiz','ing'),
1528 ('winterfed','winterfeed','ed'),
1529 ('wiredrawn','wiredraw','en'),
1530 ('wiredrew','wiredraw','ed'),
1531 ('withdrawn','withdraw','en'),
1532 ('withdrew','withdraw','ed'),
1533 ('withheld','withhold','ed'),
1534 ('withstood','withstand','ed'),
1535 ('woke','wake','ed'),
1536 ('woken','wake','en'),
1537 ('won','win','ed'),
1538 ('wore','wear','ed'),
1539 ('worn','wear','en'),
1540 ('wound','wind','ed'),
1541 ('wove','weave','ed'),
1542 ('woven','weave','en'),
1543 ('wringing','wring','ing'),
1544 ('writing','write','ing'),
1545 ('written','write','en'),
1546 ('wrote','write','ed'),
1547 ('wrung','wring','ed'),
1548 ('ycleped','clepe','ed'),
1549 ('yclept','clepe','ed'),
1550 ('yodeled','yodel','ed'),
1551 ('yodeling','yodel','ing'),
1552 ('zeroed','zero','ed')
1553 ]
1554
1555     def irregular_nouns_wordnet(self):
1556         return[
1557 ('addenda','addendum','s'),
1558 ('adieux','adieu','s'),
1559 ('aides-de-camp','aide-de-camp','s'),
1560 ('aliases','alias','s'),
1561 ('alkalies','alkali','s'),
1562 ('aloes','aloe','s'),
1563 ('amanuenses','amanuensis','s'),
1564 ('analyses','analysis','s'),
1565 ('anastomoses','anastomosis','s'),
1566 ('anthraces','anthrax','s'),
1567 ('antitheses','antithesis','s'),
1568 ('aphides','aphis','s'),
1569 ('apices','apex','s'),
1570 ('apotheoses','apotheosis','s'),
1571 ('appendices','appendix','s'),
1572 ('arboreta','arboretum','s'),
1573 ('areg','erg','s'),
1574 ('arterioscleroses','arteriosclerosis','s'),
1575 ('atlantes','atlas','s'),
1576 ('automata','automaton','s'),
1577 ('axises','axis','s'),
1578 ('bambini','bambino','s'),
1579 ('bandeaux','bandeau','s'),
1580 ('banditti','bandit','s'),
1581 ('bassi','basso','s'),
1582 ('beaux','beau','s'),
1583 ('beeves','beef','s'),
1584 ('bicepses','biceps','s'),
1585 ('bijoux','bijou','s'),
1586 ('billets-doux','billet-doux','s'),
1587 ('boraces','borax','s'),
1588 ('bossies','boss','s'),
1589 ('brainchildren','brainchild','s'),
1590 ('brethren','brother','s'),
1591 ('brothers-in-law','brother-in-law','s'),
1592 ('buckteeth','bucktooth','s'),
1593 ('bunde','bund','s'),
1594 ('bureaux','bureau','s'),
1595 ('busses','bus','s'),
1596 ('calves','calf','s'),
1597 ('calyces','calyx','s'),
1598 ('candelabra','candelabrum','s'),
1599 ('capricci','capriccio','s'),
1600 ('caribous','caribou','s'),
1601 ('carides','caryatid','s'),
1602 ('catalyses','catalysis','s'),
1603 ('cerebra','cerebrum','s'),
1604 ('cervices','cervix','s'),
1605 ('chateaux','chateau','s'),
1606 ('cherubim','cherub','s'),
1607 ('children','child','s'),
1608 ('chillies','chilli','s'),
1609 ('chrysalides','chrysalis','s'),
1610 ('chrysalises','chrysalis','s'),
1611 ('ciceroni','cicerone','s'),
1612 ('cloverleaves','cloverleaf','s'),
1613 ('coccyges','coccyx','s'),
1614 ('codices','codex','s'),
1615 ('cola','colon','s'),
1616 ('colloquies','colloquy','s'),
1617 ('colones','colon','s'),
1618 ('concertanti','concertante','s'),
1619 ('concerti','concerto','s'),
1620 ('concertini','concertino','s'),
1621 ('conquistadores','conquistador','s'),
1622 ('contralti','contralto','s'),
1623 ('corpora','corpus','s'),
1624 ('corrigenda','corrigendum','s'),
1625 ('cortices','cortex','s'),
1626 ('cosmoses','cosmos','s'),
1627 ('crescendi','crescendo','s'),
1628 ('crises','crisis','s'),
1629 ('criteria','criterion','s'),
1630 ('cruces','crux','s'),
1631 ('culs-de-sac','cul-de-sac','s'),
1632 ('cyclopes','cyclops','s'),
1633 ('cyclopses','cyclops','s'),
1634 ('data','datum','s'),
1635 ('daughters-in-law','daughter-in-law','s'),
1636 ('desiderata','desideratum','s'),
1637 ('diaereses','diaeresis','s'),
1638 ('diaerses','diaeresis','s'),
1639 ('diagnoses','diagnosis','s'),
1640 ('dialyses','dialysis','s'),
1641 ('diathses','diathesis','s'),
1642 ('dicta','dictum','s'),
1643 ('diereses','dieresis','s'),
1644 ('dilettantes','dilettante','s'),
1645 ('dilettanti','dilettante','s'),
1646 ('divertimenti','divertimento','s'),
1647 ('dogteeth','dogtooth','s'),
1648 ('dormice','dormouse','s'),
1649 ('dryades','dryad','s'),
1650 ('dui','duo','s'),
1651 ('duona','duodenum','s'),
1652 ('duonas','duodenum','s'),
1653 ('dwarves','dwarf','s'),
1654 ('eisteddfodau','eisteddfod','s'),
1655 ('ellipses','ellipsis','s'),
1656 ('elves','elf','s'),
1657 ('emphases','emphasis','s'),
1658 ('epicentres','epicentre','s'),
1659 ('epiglottides','epiglottis','s'),
1660 ('epiglottises','epiglottis','s'),
1661 ('errata','erratum','s'),
1662 ('exegeses','exegesis','s'),
1663 ('eyeteeth','eyetooth','s'),
1664 ('fathers-in-law','father-in-law','s'),
1665 ('feet','foot','s'),
1666 ('fellaheen','fellah','s'),
1667 ('fellahin','fellah','s'),
1668 ('femora','femur','s'),
1669 ('fezzes','fez','s'),
1670 ('flagstaves','flagstaff','s'),
1671 ('flambeaux','flambeau','s'),
1672 ('flatfeet','flatfoot','s'),
1673 ('fleurs-de-lis','fleur-de-lis','s'),
1674 ('fleurs-de-lys','fleur-de-lys','s'),
1675 ('flyleaves','flyleaf','s'),
1676 ('fora','forum','s'),
1677 ('forcipes','forceps','s'),
1678 ('forefeet','forefoot','s'),
1679 ('fulcra','fulcrum','s'),
1680 ('gallowses','gallows','s'),
1681 ('gases','gas','s'),
1682 ('gasses','gas','s'),
1683 ('gateaux','gateau','s'),
1684 ('geese','goose','s'),
1685 ('gemboks','gemsbok','s'),
1686 ('genera','genus','s'),
1687 ('geneses','genesis','s'),
1688 ('gentlemen-at-arms','gentleman-at-arms','s'),
1689 ('gestalten','gestalt','s'),
1690 ('giraffes','giraffe','s'),
1691 ('glissandi','glissando','s'),
1692 ('glottides','glottis','s'),
1693 ('glottises','glottis','s'),
1694 ('godchildren','godchild','s'),
1695 ('goings-over','going-over','s'),
1696 ('grandchildren','grandchild','s'),
1697 ('halves','half','s'),
1698 ('hangers-on','hanger-on','s'),
1699 ('helices','helix','s'),
1700 ('hooves','hoof','s'),
1701 ('hosen','hose','s'),
1702 ('hypnoses','hypnosis','s'),
1703 ('hypotheses','hypothesis','s'),
1704 ('iambi','iamb','s'),
1705 ('ibices','ibex','s'),
1706 ('ibises','ibis','s'),
1707 ('impedimenta','impediment','s'),
1708 ('indices','index','s'),
1709 ('intagli','intaglio','s'),
1710 ('intermezzi','intermezzo','s'),
1711 ('interregna','interregnum','s'),
1712 ('irides','iris','s'),
1713 ('irises','iris','s'),
1714 ('is','is','s'),
1715 ('jacks-in-the-box','jack-in-the-box','s'),
1716 ('kibbutzim','kibbutz','s'),
1717 ('knives','knife','s'),
1718 ('kohlrabies','kohlrabi','s'),
1719 ('kronen','krone','s'),
1720 ('kroner','krone','s'),
1721 ('kronur','krona','s'),
1722 ('kylikes','kylix','s'),
1723 ('ladies-in-waiting','lady-in-waiting','s'),
1724 ('larynges','larynx','s'),
1725 ('latices','latex','s'),
1726 ('leges','lex','s'),
1727 ('libretti','libretto','s'),
1728 ('lire','lira','s'),
1729 ('lives','life','s'),
1730 ('loaves','loaf','s'),
1731 ('loggie','loggia','s'),
1732 ('lustra','lustre','s'),
1733 ('lyings-in','lying-in','s'),
1734 ('macaronies','macaroni','s'),
1735 ('maestri','maestro','s'),
1736 ('mantes','mantis','s'),
1737 ('mantises','mantis','s'),
1738 ('markkaa','markka','s'),
1739 ('marquises','marquis','s'),
1740 ('masters-at-arms','master-at-arms','s'),
1741 ('matrices','matrix','s'),
1742 ('matzoth','matzo','s'),
1743 ('mausolea','mausoleum','s'),
1744 ('maxima','maximum','s'),
1745 ('meioses','meiosis','s'),
1746 ('memoranda','memorandum','s'),
1747 ('men-at-arms','man-at-arms','s'),
1748 ("men-o'-war",'man-of-war','s'),
1749 ('men-of-war','man-of-war','s'),
1750 ('menservants','manservant','s'),
1751 ('mesdemoiselles','mademoiselle','s'),
1752 ('messieurs','monsieur','s'),
1753 ('metamorphoses','metamorphosis','s'),
1754 ('metatheses','metathesis','s'),
1755 ('metempsychoses','metempsychosis','s'),
1756 ('metropolises','metropolis','s'),
1757 ('mice','mouse','s'),
1758 ('milieux','milieu','s'),
1759 ('minima','minimum','s'),
1760 ('momenta','momentum','s'),
1761 ('monies','money','s'),
1762 ('monsignori','monsignor','s'),
1763 ('mooncalves','mooncalf','s'),
1764 ('mothers-in-law','mother-in-law','s'),
1765 ('naiades','naiad','s'),
1766 ('necropoleis','necropolis','s'),
1767 ('necropolises','necropolis','s'),
1768 ('nemeses','nemesis','s'),
1769 ('neuroses','neurosis','s'),
1770 ('novelle','novella','s'),
1771 ('oases','oasis','s'),
1772 ('obloquies','obloquy','s'),
1773 ('octahedra','octahedron','s'),
1774 ('optima','optimum','s'),
1775 ('ora','os','s'),
1776 ('osar','os','s'),
1777 ('ossa','os','s'),
1778 ('ova','ovum','s'),
1779 ('oxen','ox','s'),
1780 ('paralyses','paralysis','s'),
1781 ('parentheses','parenthesis','s'),
1782 ('paris-mutuels','pari-mutuel','s'),
1783 ('pastorali','pastorale','s'),
1784 ('patresfamilias','paterfamilias','s'),
1785 ('pease','pea','s'),
1786 ('pekingese','pekinese','s'),
1787 ('pelves','pelvis','s'),
1788 ('pelvises','pelvis','s'),
1789 ('pence','penny','s'),
1790 ('penes','penis','s'),
1791 ('penises','penis','s'),
1792 ('penknives','penknife','s'),
1793 ('perihelia','perihelion','s'),
1794 ('pfennige','pfennig','s'),
1795 ('pharynges','pharynx','s'),
1796 ('phenomena','phenomenon','s'),
1797 ('philodendra','philodendron','s'),
1798 ('pieds-a-terre','pied-a-terre','s'),
1799 ('pineta','pinetum','s'),
1800 ('plateaux','plateau','s'),
1801 ('plena','plenum','s'),
1802 ('pocketknives','pocketknife','s'),
1803 ('portmanteaux','portmanteau','s'),
1804 ('potlies','potbelly','s'),
1805 ('praxes','praxis','s'),
1806 ('praxises','praxis','s'),
1807 ('proboscides','proboscis','s'),
1808 ('proboscises','proboscis','s'),
1809 ('prostheses','prosthesis','s'),
1810 ('protozoa','protozoan','s'),
1811 ('pudenda','pudendum','s'),
1812 ('putti','putto','s'),
1813 ('quanta','quantum','s'),
1814 ('quarterstaves','quarterstaff','s'),
1815 ('quizzes','quiz','s'),
1816 ('reales','real','s'),
1817 ('recta','rectum','s'),
1818 ('referenda','referendum','s'),
1819 ('reis','real','s'),
1820 ('rhinoceroses','rhinoceros','s'),
1821 ('roes','roe','s'),
1822 ('rondeaux','rondeau','s'),
1823 ('rostra','rostrum','s'),
1824 ('runners-up','runner-up','s'),
1825 ('sancta','sanctum','s'),
1826 ('sawboneses','sawbones','s'),
1827 ('scarves','scarf','s'),
1828 ('scherzi','scherzo','s'),
1829 ('scleroses','sclerosis','s'),
1830 ('scrota','scrotum','s'),
1831 ('secretaries-general','secretary-general','s'),
1832 ('selves','self','s'),
1833 ('sera','serum','s'),
1834 ('seraphim','seraph','s'),
1835 ('sheaves','sheaf','s'),
1836 ('shelves','shelf','s'),
1837 ('simulacra','simulacrum','s'),
1838 ('sisters-in-law','sister-in-law','s'),
1839 ('soli','solo','s'),
1840 ('soliloquies','soliloquy','s'),
1841 ('sons-in-law','son-in-law','s'),
1842 ('spectra','spectrum','s'),
1843 ('sphinges','sphinx','s'),
1844 ('splayfeet','splayfoot','s'),
1845 ('sputa','sputum','s'),
1846 ('stamina','stamen','s'),
1847 ('stelae','stele','s'),
1848 ('stepchildren','stepchild','s'),
1849 ('sterna','sternum','s'),
1850 ('strata','stratum','s'),
1851 ('stretti','stretto','s'),
1852 ('summonses','summons','s'),
1853 ('swamies','swami','s'),
1854 ('swathes','swathe','s'),
1855 ('synopses','synopsis','s'),
1856 ('syntheses','synthesis','s'),
1857 ('tableaux','tableau','s'),
1858 ('taxies','taxi','s'),
1859 ('teeth','tooth','s'),
1860 ('tempi','tempo','s'),
1861 ('tenderfeet','tenderfoot','s'),
1862 ('testes','testis','s'),
1863 ('theses','thesis','s'),
1864 ('thieves','thief','s'),
1865 ('thoraces','thorax','s'),
1866 ('titmice','titmouse','s'),
1867 ('tootses','toots','s'),
1868 ('torsi','torso','s'),
1869 ('tricepses','triceps','s'),
1870 ('triumviri','triumvir','s'),
1871 ('trousseaux','trousseau','s'),
1872 ('turves','turf','s'),
1873 ('tympana','tympanum','s'),
1874 ('ultimata','ultimatum','s'),
1875 ('vacua','vacuum','s'),
1876 ('vertices','vertex','s'),
1877 ('vertigines','vertigo','s'),
1878 ('virtuosi','virtuoso','s'),
1879 ('vortices','vortex','s'),
1880 ('wagons-lits','wagon-lit','s'),
1881 ('weirdies','weirdie','s'),
1882 ('werewolves','werewolf','s'),
1883 ('wharves','wharf','s'),
1884 ('whippers-in','whipper-in','s'),
1885 ('wolves','wolf','s'),
1886 ('woodlice','woodlouse','s'),
1887 ('yogin','yogi','s'),
1888 ('zombies','zombie','s')
1889 ]
1890     irregular_verbs += irregular_verbs_wordnet(None)
1891     irregular_nouns += irregular_nouns_wordnet(None)
1892
1893     def setitem(self,dict,key,value):
1894         dict[key]=value
1895
1896 if __name__=='__main__':
1897     import sys
1898     p=MontyLemmatiser()
1899     test_string='I am eating'
1900     print map(lambda the_tokenizer_str:p.lemmatise_word(the_tokenizer_str,),test_string.split())
Note: See TracBrowser for help on using the browser.