| 1 |
import os, sys |
|---|
| 2 |
from nltk import pos_tag, word_tokenize, RegexpParser |
|---|
| 3 |
from nltk.tree import Tree |
|---|
| 4 |
[i_word, i_tag] = (0, 1) |
|---|
| 5 |
|
|---|
| 6 |
class Ask: |
|---|
| 7 |
def __init__(self): |
|---|
| 8 |
self.regexp = \ |
|---|
| 9 |
''' |
|---|
| 10 |
NP: {<DT>?<AJ>*<NN.*>+} |
|---|
| 11 |
INP: {<DT>?<AJ>*<PRP><NN.*>+} |
|---|
| 12 |
PPro: {<DT>?<AJ>*<PRP>} |
|---|
| 13 |
MyNP: {<PRP\$><NP>*} |
|---|
| 14 |
AdjN: {<DT>?<JJ><NP>} |
|---|
| 15 |
Opinion: {<MD>} |
|---|
| 16 |
VB: {(<JJR>?<TO>?((<V.*><RB>)|<VB.*>)+)*} |
|---|
| 17 |
PP: {<IN><NP*>} |
|---|
| 18 |
Adj: {<RB.*><JJ>} |
|---|
| 19 |
RBs: {<RB><RB>+} |
|---|
| 20 |
''' |
|---|
| 21 |
self.chunker = RegexpParser(self.regexp) |
|---|
| 22 |
self.seen = {} |
|---|
| 23 |
|
|---|
| 24 |
def recurse_tree(self, tree, sub=None): |
|---|
| 25 |
''' |
|---|
| 26 |
Converts a tree into a list of tuples and returns it |
|---|
| 27 |
sub = string of replacing rule to run (read the code below) |
|---|
| 28 |
self.seen = dict of how many times we've self.seen each node type |
|---|
| 29 |
''' |
|---|
| 30 |
sentence = [] |
|---|
| 31 |
|
|---|
| 32 |
for index, node in enumerate(tree): |
|---|
| 33 |
if type(node) is Tree: |
|---|
| 34 |
self.seen[node.node] = self.seen[node.node] + 1 if self.seen.has_key(node.node) else 1 |
|---|
| 35 |
sentence.extend(self.recurse_tree(node)) |
|---|
| 36 |
elif type(node) is tuple: |
|---|
| 37 |
self.seen[node[i_tag][0]] = self.seen[node[i_tag][0]] + 1 if self.seen.has_key(node[i_tag][0]) else 1 |
|---|
| 38 |
skip = False |
|---|
| 39 |
|
|---|
| 40 |
if sub == 'I': |
|---|
| 41 |
if self.seen != {} and node[i_tag][0] == 'V': |
|---|
| 42 |
if node[i_word] == 'am': |
|---|
| 43 |
sentence.extend([('are', 'VBP'), ('you', 'PRP')]) |
|---|
| 44 |
skip = True |
|---|
| 45 |
elif self.seen.has_key('V') and self.seen['V'] == 1 and node[i_tag] == 'VBP' and node[i_word] not in ['do', 'have']: |
|---|
| 46 |
sentence.extend([('do', 'VBP'), ('you', 'PRP')]) |
|---|
| 47 |
|
|---|
| 48 |
if node[i_word] == "not": |
|---|
| 49 |
sentence.append(('you', 'PRP')) |
|---|
| 50 |
|
|---|
| 51 |
if node[i_word] == 'your': |
|---|
| 52 |
sentence.append(('my', 'PRP$')) |
|---|
| 53 |
elif not skip: |
|---|
| 54 |
sentence.append(node) |
|---|
| 55 |
|
|---|
| 56 |
if sub == 'I' and node[i_word] == "n't": |
|---|
| 57 |
sentence.append(('you', 'PRP')) |
|---|
| 58 |
|
|---|
| 59 |
return sentence |
|---|
| 60 |
|
|---|
| 61 |
def tree_is_type(self, tree, name): |
|---|
| 62 |
return type(tree) == Tree and tree.node == name |
|---|
| 63 |
|
|---|
| 64 |
def ask(self, sentence): |
|---|
| 65 |
self.seen = {} |
|---|
| 66 |
|
|---|
| 67 |
tree = self.chunker.parse(sentence) |
|---|
| 68 |
|
|---|
| 69 |
|
|---|
| 70 |
if type(tree[-1]) == tuple and tree[-1][i_tag] == 'JJ': |
|---|
| 71 |
question = [("how", "WRB")] |
|---|
| 72 |
question.append(tree[-1]) |
|---|
| 73 |
question.append(('?', '.')) |
|---|
| 74 |
|
|---|
| 75 |
return question |
|---|
| 76 |
|
|---|
| 77 |
|
|---|
| 78 |
elif len(tree) == 2: |
|---|
| 79 |
if self.tree_is_type(tree[0], 'PPro'): |
|---|
| 80 |
PPro = self.recurse_tree(tree[0]) |
|---|
| 81 |
|
|---|
| 82 |
if 'i' in [w[i_word].lower() for w in PPro] and self.tree_is_type(tree[1], 'VB'): |
|---|
| 83 |
VB = self.recurse_tree(tree[1], 'I') |
|---|
| 84 |
|
|---|
| 85 |
question = [("why", "WRB")] + VB |
|---|
| 86 |
|
|---|
| 87 |
if len(tree) > 3: |
|---|
| 88 |
for index in range(3, len(tree)): |
|---|
| 89 |
question.extend(self.recurse_tree(tree[index])) |
|---|
| 90 |
|
|---|
| 91 |
return question + [('?', '.')] |
|---|
| 92 |
|
|---|
| 93 |
|
|---|
| 94 |
elif len(tree) >= 3: |
|---|
| 95 |
if self.tree_is_type(tree[0], 'PPro'): |
|---|
| 96 |
PPro = self.recurse_tree(tree[0]) |
|---|
| 97 |
|
|---|
| 98 |
if 'i' in [w[i_word].lower() for w in PPro] and self.tree_is_type(tree[1], 'VB'): |
|---|
| 99 |
VB = self.recurse_tree(tree[1], 'I') |
|---|
| 100 |
|
|---|
| 101 |
if (self.tree_is_type(tree[2], 'NP') or self.tree_is_type(tree[2], 'MyNP')): |
|---|
| 102 |
NP = self.recurse_tree(tree[2]) |
|---|
| 103 |
question = [("why", "WRB")] + VB + NP |
|---|
| 104 |
|
|---|
| 105 |
if len(tree) > 3: |
|---|
| 106 |
for index in range(3, len(tree)): |
|---|
| 107 |
question.extend(self.recurse_tree(tree[index])) |
|---|
| 108 |
|
|---|
| 109 |
return question + [('?', '.')] |
|---|
| 110 |
return None |
|---|
| 111 |
|
|---|
| 112 |
|
|---|
| 113 |
|
|---|