| 1 |
from nltk import pos_tag, word_tokenize, RegexpParser |
|---|
| 2 |
from nltk.tree import Tree |
|---|
| 3 |
from random import choice |
|---|
| 4 |
[i_word, i_tag] = (0, 1) |
|---|
| 5 |
|
|---|
| 6 |
class PronounHandler: |
|---|
| 7 |
def __init__(self): |
|---|
| 8 |
self.regexp = \ |
|---|
| 9 |
''' |
|---|
| 10 |
SUB: {<PRP><MD>?(<RB>(<CC><MD>?<RB>)*)?<VB.*>} |
|---|
| 11 |
OBJ: {(<TO>|<VB.*>|<IN>)?<PRP>} |
|---|
| 12 |
''' |
|---|
| 13 |
self.chunker = RegexpParser(self.regexp) |
|---|
| 14 |
self.history = dict() |
|---|
| 15 |
self.male = ["his", "him", "he", "himself"] |
|---|
| 16 |
self.female = ["her", "she", "herself"] |
|---|
| 17 |
self.first_plural = ["we", "us", "ourselves", "our"] |
|---|
| 18 |
self.third_plural = ["they", "them", "themselves", "their"] |
|---|
| 19 |
self.matched = dict() |
|---|
| 20 |
self.history_length = 6 |
|---|
| 21 |
|
|---|
| 22 |
def add_to_history(self, sentence, screenname): |
|---|
| 23 |
if self.history.has_key(screenname): |
|---|
| 24 |
if len(self.history[screenname]) >= self.history_length: |
|---|
| 25 |
self.history[screenname] = self.history[screenname][1:] |
|---|
| 26 |
self.history |
|---|
| 27 |
else: |
|---|
| 28 |
self.history[screenname] = [] |
|---|
| 29 |
self.matched[screenname] = {} |
|---|
| 30 |
|
|---|
| 31 |
def resolve_pronouns(self, sentence, screenname): |
|---|
| 32 |
''' |
|---|
| 33 |
Returns sentence with pronouns replaced by best guesses |
|---|
| 34 |
''' |
|---|
| 35 |
|
|---|
| 36 |
if not self.history.has_key(screenname): |
|---|
| 37 |
return sentence |
|---|
| 38 |
|
|---|
| 39 |
tree = self.chunker.parse(sentence) |
|---|
| 40 |
finished_sentence = [] |
|---|
| 41 |
|
|---|
| 42 |
for node in tree: |
|---|
| 43 |
if type(node) is Tree and node.node == "SUB": |
|---|
| 44 |
finished_sentence.extend(self.resolve_subjects(node, sentence, screenname)) |
|---|
| 45 |
elif type(node) is Tree and node.node == "OBJ": |
|---|
| 46 |
finished_sentence.extend(self.resolve_objects(node, sentence, screenname)) |
|---|
| 47 |
else: |
|---|
| 48 |
finished_sentence.append(node) |
|---|
| 49 |
|
|---|
| 50 |
return finished_sentence |
|---|
| 51 |
|
|---|
| 52 |
def resolve_subjects(self, tree, sentence, screenname): |
|---|
| 53 |
new_tree = [] |
|---|
| 54 |
subject = tree.leaves()[0] |
|---|
| 55 |
if subject[i_word] == "you": |
|---|
| 56 |
new_tree.append(("i", subject[i_tag])) |
|---|
| 57 |
for node in tree.leaves()[1:]: |
|---|
| 58 |
if node[i_tag] == "VBP": |
|---|
| 59 |
if node[i_word] == "are": |
|---|
| 60 |
new_tree.append(("am", node[i_tag])) |
|---|
| 61 |
elif node[i_word][-1] == 'h' or node[i_word][-1] == 's' or node[i_word][-1] == 'x': |
|---|
| 62 |
new_tree.append((node[i_word] + "es", node[i_tag])) |
|---|
| 63 |
elif node[i_word] == "'ll": |
|---|
| 64 |
new_tree.append(("will", node[i_tag])) |
|---|
| 65 |
elif node[i_word] == "'re": |
|---|
| 66 |
new_tree.append(("am", node[i_tag])) |
|---|
| 67 |
elif node[i_word] == "'d": |
|---|
| 68 |
new_tree.append(("would", node[i_tag])) |
|---|
| 69 |
else: |
|---|
| 70 |
new_tree.append((node[i_word], node[i_tag])) |
|---|
| 71 |
|
|---|
| 72 |
else: |
|---|
| 73 |
new_tree.append(node) |
|---|
| 74 |
|
|---|
| 75 |
elif subject[i_word] == "i": |
|---|
| 76 |
new_tree.append(("you", subject[i_tag])) |
|---|
| 77 |
for node in tree.leaves()[1:]: |
|---|
| 78 |
if node[i_tag] == "VBP": |
|---|
| 79 |
if node[i_word] == "am": |
|---|
| 80 |
new_tree.append(("is", node[i_tag])) |
|---|
| 81 |
elif node[i_word][-1] == 'h' or node[i_word][-1] == 's' or node[i_word][-1] == 'x': |
|---|
| 82 |
new_tree.append((node[i_word] + "es", "VBZ")) |
|---|
| 83 |
elif node[i_word] == "'ll": |
|---|
| 84 |
new_tree.append(("will", node[i_tag])) |
|---|
| 85 |
elif node[i_word] == "'m": |
|---|
| 86 |
new_tree.append(("'re", node[i_tag])) |
|---|
| 87 |
elif node[i_word] == "'d": |
|---|
| 88 |
new_tree.append(("would", node[i_tag])) |
|---|
| 89 |
else: |
|---|
| 90 |
new_tree.append((subject[i_word] + "s", "VBZ")) |
|---|
| 91 |
else: |
|---|
| 92 |
new_tree.append(node) |
|---|
| 93 |
|
|---|
| 94 |
elif subject[i_word] == "hey": |
|---|
| 95 |
new_tree.append (("hi","NN")) |
|---|
| 96 |
new_tree.extend(tree.leaves()[1:]) |
|---|
| 97 |
|
|---|
| 98 |
elif subject[i_word] == "your": |
|---|
| 99 |
new_tree.append (("my","NN")) |
|---|
| 100 |
new_tree.extend(tree.leaves()[1:]) |
|---|
| 101 |
|
|---|
| 102 |
elif subject[i_word] == "my": |
|---|
| 103 |
new_tree.extend [(screenname,"NN"), ("'s","POS")] |
|---|
| 104 |
new_tree.extend(tree.leaves()[1:]) |
|---|
| 105 |
|
|---|
| 106 |
else: |
|---|
| 107 |
match = self.find_closest_match(sentence, subject, screenname) |
|---|
| 108 |
self.matched[screenname][subject[i_word]] = match |
|---|
| 109 |
new_tree.append(match) |
|---|
| 110 |
new_tree.extend(tree.leaves()[1:]) |
|---|
| 111 |
|
|---|
| 112 |
return new_tree |
|---|
| 113 |
|
|---|
| 114 |
def resolve_objects(self, tree, sentence, screenname): |
|---|
| 115 |
new_tree = tree.leaves()[:-1] |
|---|
| 116 |
subject = tree.leaves()[-1] |
|---|
| 117 |
if subject[i_word] == "you": |
|---|
| 118 |
new_tree.append(("me", subject[i_tag])) |
|---|
| 119 |
|
|---|
| 120 |
elif subject[i_word] == "me" or subject[i_word] == "i": |
|---|
| 121 |
new_tree.append((screenname, "NN")) |
|---|
| 122 |
|
|---|
| 123 |
elif subject[i_word] == "your": |
|---|
| 124 |
new_tree.append (("my","NN")) |
|---|
| 125 |
new_tree.extend(tree.leaves()[1:]) |
|---|
| 126 |
|
|---|
| 127 |
elif subject[i_word] == "my": |
|---|
| 128 |
new_tree.extend [(screenname,"NN"), ("'s","POS")] |
|---|
| 129 |
new_tree.extend(tree.leaves()[1:]) |
|---|
| 130 |
|
|---|
| 131 |
else: |
|---|
| 132 |
match = self.find_closest_match(sentence, subject, screenname) |
|---|
| 133 |
self.matched[screenname][subject[i_word]] = match |
|---|
| 134 |
new_tree.append(match) |
|---|
| 135 |
|
|---|
| 136 |
return new_tree |
|---|
| 137 |
|
|---|
| 138 |
def find_closest_match(self, sentence, subject, screenname): |
|---|
| 139 |
replacement_candidates = [] |
|---|
| 140 |
pronoun = subject[i_word] |
|---|
| 141 |
|
|---|
| 142 |
i = len(self.history[screenname]) - 1 |
|---|
| 143 |
while i >= 0 and not replacement_candidates: |
|---|
| 144 |
print "HERE" |
|---|
| 145 |
history_sentence = brain.their_history[i] |
|---|
| 146 |
j = len(history_sentence) - 1 |
|---|
| 147 |
while j >= 0: |
|---|
| 148 |
word = history_sentence[j] |
|---|
| 149 |
if "NN" in word[i_tag] or "VBG" in word[i_tag]: |
|---|
| 150 |
replacement_candidates.append(word) |
|---|
| 151 |
elif "PRP" in word[i_tag] and self.match_groups(pronoun, word[i_word]): |
|---|
| 152 |
if self.matched[screenname].has_key(word[i_word]): |
|---|
| 153 |
replacement_candidates.append(self.matched[screenname][word[i_word]]) |
|---|
| 154 |
j -= 1 |
|---|
| 155 |
i -= 1 |
|---|
| 156 |
|
|---|
| 157 |
if not replacement_candidates: |
|---|
| 158 |
return subject |
|---|
| 159 |
|
|---|
| 160 |
return choice(replacement_candidates) |
|---|
| 161 |
|
|---|
| 162 |
def match_groups(self, p1, p2): |
|---|
| 163 |
return (p1 in self.male and p2 in self.male) or (p1 in self.female and p2 in self.female) or \ |
|---|
| 164 |
(p1 in self.first_plural and p2 in self.first_plural) or (p1 in self.third_plural and p2 in self.third_plural) |
|---|
| 165 |
|
|---|