| 1 |
|
|---|
| 2 |
print "Loading..." |
|---|
| 3 |
|
|---|
| 4 |
|
|---|
| 5 |
import math, pickle, os, re, sys, threading |
|---|
| 6 |
from Ask import Ask |
|---|
| 7 |
from time import time, sleep |
|---|
| 8 |
from operator import itemgetter |
|---|
| 9 |
|
|---|
| 10 |
|
|---|
| 11 |
import networkx as nx |
|---|
| 12 |
from random import choice |
|---|
| 13 |
from nltk import pos_tag |
|---|
| 14 |
|
|---|
| 15 |
|
|---|
| 16 |
from Log import Log |
|---|
| 17 |
from Cleaner import * |
|---|
| 18 |
from MessageQueue import MessageQueue |
|---|
| 19 |
from QuestionHandler import QuestionHandler |
|---|
| 20 |
from PronounHandler import PronounHandler |
|---|
| 21 |
from Graph import DistanceGraph, AssociationGraph |
|---|
| 22 |
from MontyLemmatiser import MontyLemmatiser |
|---|
| 23 |
from heapq import * |
|---|
| 24 |
|
|---|
| 25 |
(i_word, i_tag) = (0, 1) |
|---|
| 26 |
(i_backward, i_forward) = (0, 1) |
|---|
| 27 |
(i_time, i_recipient, i_message, i_away) = (0, 1, 2, 3) |
|---|
| 28 |
|
|---|
| 29 |
class SABBrain: |
|---|
| 30 |
def __init__(self, screenname, online=False): |
|---|
| 31 |
''' Assign default internal values and load graph data ''' |
|---|
| 32 |
|
|---|
| 33 |
|
|---|
| 34 |
self.screenname = screenname |
|---|
| 35 |
self.log = Log() |
|---|
| 36 |
self.online = online |
|---|
| 37 |
self.cleaner = Cleaner(self.log) |
|---|
| 38 |
self.queue = MessageQueue(self) |
|---|
| 39 |
self.lemmatiser = MontyLemmatiser() |
|---|
| 40 |
self.asker = Ask() |
|---|
| 41 |
|
|---|
| 42 |
|
|---|
| 43 |
self.weight_tolerance = 0.02 |
|---|
| 44 |
self.minimum_results = 2 |
|---|
| 45 |
self.heap_maximum = 3 |
|---|
| 46 |
|
|---|
| 47 |
|
|---|
| 48 |
self.d = DistanceGraph(self.log) |
|---|
| 49 |
self.a = AssociationGraph(self.log) |
|---|
| 50 |
self.q = DistanceGraph(self.log, file_path=r"Brain/questions.pickle") |
|---|
| 51 |
self.start = ("start", "start") |
|---|
| 52 |
self.end = ("end", "end") |
|---|
| 53 |
self.d.add_node(self.start) |
|---|
| 54 |
self.q.add_node(self.start) |
|---|
| 55 |
self.d.add_node(self.end) |
|---|
| 56 |
self.q.add_node(self.end) |
|---|
| 57 |
|
|---|
| 58 |
|
|---|
| 59 |
self.toggle = dict() |
|---|
| 60 |
self.toggle["html"] = True |
|---|
| 61 |
self.toggle["internet"] = True |
|---|
| 62 |
self.toggle["spelling"] = True |
|---|
| 63 |
self.toggle["swap"] = True |
|---|
| 64 |
self.toggle["simplify"] = False |
|---|
| 65 |
self.toggle["contractions"] = True |
|---|
| 66 |
|
|---|
| 67 |
|
|---|
| 68 |
self.forget_history_seconds = 60 |
|---|
| 69 |
self.history_last_updated = dict() |
|---|
| 70 |
self.their_history = dict() |
|---|
| 71 |
self.our_history = dict() |
|---|
| 72 |
self.history_length = 7 |
|---|
| 73 |
self.history_weight = 0.0 |
|---|
| 74 |
for i in range(1, self.history_length+1): |
|---|
| 75 |
self.history_weight += 1.0/i |
|---|
| 76 |
self.pronoun_handlers = PronounHandler() |
|---|
| 77 |
|
|---|
| 78 |
|
|---|
| 79 |
print 'Loading brain files...' |
|---|
| 80 |
self.load_brain() |
|---|
| 81 |
|
|---|
| 82 |
print 'Ready!' |
|---|
| 83 |
print '' |
|---|
| 84 |
|
|---|
| 85 |
def save(self): |
|---|
| 86 |
''' Save all of our brain graphs to disk ''' |
|---|
| 87 |
self.d.save() |
|---|
| 88 |
self.a.save() |
|---|
| 89 |
self.q.save() |
|---|
| 90 |
|
|---|
| 91 |
def load_brain(self): |
|---|
| 92 |
''' Load brain data into our graphs from pickle files on disk and *.yml files into the Cleaner ''' |
|---|
| 93 |
self.d.load() |
|---|
| 94 |
self.a.load() |
|---|
| 95 |
self.q.load() |
|---|
| 96 |
self.cleaner.load() |
|---|
| 97 |
|
|---|
| 98 |
def is_question(self, sentence): |
|---|
| 99 |
'''Checks if a sentence is a question based on keywords in it or a ?''' |
|---|
| 100 |
|
|---|
| 101 |
|
|---|
| 102 |
for index, word in enumerate(sentence): |
|---|
| 103 |
if 'W' in word[i_tag] and not word[i_word] == "that": |
|---|
| 104 |
return index |
|---|
| 105 |
|
|---|
| 106 |
|
|---|
| 107 |
interjections = 0; |
|---|
| 108 |
for interindex, word in enumerate(sentence): |
|---|
| 109 |
if word[i_word] in ["oh", "uh", "well", "huh", "now"] or ',' in word[i_tag]: |
|---|
| 110 |
index = index - 1; |
|---|
| 111 |
interjections = interjections + 1; |
|---|
| 112 |
continue |
|---|
| 113 |
else: |
|---|
| 114 |
break |
|---|
| 115 |
|
|---|
| 116 |
if index >= 2: |
|---|
| 117 |
if 'VB' in sentence[0 + interjections][i_tag] and ('NN' in sentence[1 + interjections][i_tag] or 'PRP' in sentence[1 + interjections][i_tag]) and 'VB' in sentence[2 + interjections][i_tag]: |
|---|
| 118 |
return -1; |
|---|
| 119 |
|
|---|
| 120 |
if index >= 1: |
|---|
| 121 |
if sentence[0 + interjections][i_word] in ["is", "are"]: |
|---|
| 122 |
return -2; |
|---|
| 123 |
|
|---|
| 124 |
if sentence[-1][i_word] == '?': |
|---|
| 125 |
return -3; |
|---|
| 126 |
|
|---|
| 127 |
return -4; |
|---|
| 128 |
|
|---|
| 129 |
def parse_incoming_message(self, sender, message): |
|---|
| 130 |
''' Stores messages in a graph for use in creating sentences later ''' |
|---|
| 131 |
self.log.delete() |
|---|
| 132 |
self.log.enter(locals=locals()) |
|---|
| 133 |
|
|---|
| 134 |
|
|---|
| 135 |
sender = self.cleaner.simplify(sender) |
|---|
| 136 |
|
|---|
| 137 |
|
|---|
| 138 |
message = self.cleaner.clean_sentence(message, sender, self.toggle) |
|---|
| 139 |
sentence = pos_tag(message) |
|---|
| 140 |
|
|---|
| 141 |
self.log.add("Adding %s to the graph" % sentence) |
|---|
| 142 |
self.add_to_graph(sentence, sender, self.q) if self.is_question(sentence) > -4 else self.add_to_graph (sentence, sender, self.d) |
|---|
| 143 |
|
|---|
| 144 |
|
|---|
| 145 |
self.update_history(sender, sentence, 1) |
|---|
| 146 |
|
|---|
| 147 |
|
|---|
| 148 |
self.log.leave() |
|---|
| 149 |
|
|---|
| 150 |
|
|---|
| 151 |
|
|---|
| 152 |
|
|---|
| 153 |
return sentence |
|---|
| 154 |
|
|---|
| 155 |
def is_word_nva(self, word): |
|---|
| 156 |
''' Returns True if this word is a Noun, Verb, or Adjective and not the word 'is', False otherwise ''' |
|---|
| 157 |
lemmatised_word = map(lambda the_tokenizer_str:self.lemmatiser.lemmatise_word(the_tokenizer_str,),[word[i_word]])[0] |
|---|
| 158 |
try: |
|---|
| 159 |
return ("NN" in word[i_tag] or "VB" in word[i_tag] or "JJ" in word[i_tag] or word[i_tag] == "RP") and lemmatised_word != 'be' |
|---|
| 160 |
except: |
|---|
| 161 |
print "is_word_nva(%s) raised an exception." % str(word) |
|---|
| 162 |
return False |
|---|
| 163 |
|
|---|
| 164 |
def add_to_graph(self, sentence, screenname, specified_graph): |
|---|
| 165 |
''' Adds the given 'sentence' to the 'specified_graph' (normally the distance or questions graph)''' |
|---|
| 166 |
self.log.enter("Updating graphs") |
|---|
| 167 |
|
|---|
| 168 |
resolved_sentence = self.pronoun_handlers.resolve_pronouns(sentence, screenname) |
|---|
| 169 |
|
|---|
| 170 |
for index1, pair1 in enumerate(sentence): |
|---|
| 171 |
if not specified_graph.has_node(pair1): |
|---|
| 172 |
specified_graph.add_node(pair1) |
|---|
| 173 |
|
|---|
| 174 |
rpair1 = resolved_sentence[index1] |
|---|
| 175 |
if (self.is_word_nva(rpair1)): |
|---|
| 176 |
self.a.increment(rpair1) |
|---|
| 177 |
|
|---|
| 178 |
|
|---|
| 179 |
if specified_graph == self.d: |
|---|
| 180 |
specified_graph.add_edge(self.start, rpair1, index1 + 1) |
|---|
| 181 |
else: |
|---|
| 182 |
specified_graph.add_edge(self.start, pair1, index1 + 1) |
|---|
| 183 |
|
|---|
| 184 |
|
|---|
| 185 |
if specified_graph == self.d: |
|---|
| 186 |
specified_graph.add_edge(rpair1, self.end, len(sentence) - index1) |
|---|
| 187 |
else: |
|---|
| 188 |
specified_graph.add_edge(pair1, self.end, len(sentence) - index1) |
|---|
| 189 |
|
|---|
| 190 |
|
|---|
| 191 |
for index2 in range(0, index1): |
|---|
| 192 |
pair2 = sentence[index2] |
|---|
| 193 |
rpair2 = resolved_sentence[index2] |
|---|
| 194 |
|
|---|
| 195 |
|
|---|
| 196 |
if self.is_word_nva(rpair1) and self.is_word_nva(rpair2): |
|---|
| 197 |
self.a.add_edge(rpair1, rpair2, 1.0) |
|---|
| 198 |
|
|---|
| 199 |
|
|---|
| 200 |
distance = index1 - index2 |
|---|
| 201 |
|
|---|
| 202 |
if specified_graph == self.d: |
|---|
| 203 |
specified_graph.add_edge(rpair2, rpair1, distance) |
|---|
| 204 |
else: |
|---|
| 205 |
specified_graph.add_edge(pair2, pair1, distance) |
|---|
| 206 |
|
|---|
| 207 |
self.log.leave() |
|---|
| 208 |
|
|---|
| 209 |
def compute_frequency(self, history_index, history_tuple): |
|---|
| 210 |
''' Word relevance based on history position function ''' |
|---|
| 211 |
freq = (history_index+1) * (1.0/int(self.a.label(history_tuple))) |
|---|
| 212 |
self.log.add("%s = freq(%d, %s)" % (str(freq), history_index, str(history_tuple))) |
|---|
| 213 |
return freq |
|---|
| 214 |
|
|---|
| 215 |
def compute_word_associativity(self, word1_tuple, word2_tuple): |
|---|
| 216 |
''' Compute bayesian probability of word1 given word2 ''' |
|---|
| 217 |
occurences_2 = self.a.label(word2_tuple) |
|---|
| 218 |
return self.a.get_edge(word1_tuple, word2_tuple)/(self.history_weight * occurences_2) |
|---|
| 219 |
|
|---|
| 220 |
def compute_word_associativity_history(self, word, screenname): |
|---|
| 221 |
''' Compute independent bayesian probability of seeing this word given the current history ''' |
|---|
| 222 |
|
|---|
| 223 |
|
|---|
| 224 |
if not self.is_word_nva(word): |
|---|
| 225 |
return 0.0 |
|---|
| 226 |
|
|---|
| 227 |
weight = 0.0 |
|---|
| 228 |
for history_index, history_sentence in enumerate(self.their_history[screenname]): |
|---|
| 229 |
for history_word in history_sentence: |
|---|
| 230 |
if self.is_word_nva(history_word): |
|---|
| 231 |
weight += (1.0 if word == history_word else self.compute_word_associativity(word, history_word)) |
|---|
| 232 |
|
|---|
| 233 |
return weight |
|---|
| 234 |
|
|---|
| 235 |
def update_history(self, their_name, sentence, theirs): |
|---|
| 236 |
''' Appends 'sentence' to their_name's history ''' |
|---|
| 237 |
self.log.enter(locals=locals()) |
|---|
| 238 |
|
|---|
| 239 |
if self.history_last_updated.has_key(their_name) and self.history_last_updated[their_name] < time(): |
|---|
| 240 |
print "This is the first message from %s in awhile, clearing history before replying..." % str(their_name) |
|---|
| 241 |
self.history_last_updated[their_name] = time() + self.forget_history_seconds |
|---|
| 242 |
self.their_history[their_name] = [] |
|---|
| 243 |
self.our_history[their_name] = [] |
|---|
| 244 |
else: |
|---|
| 245 |
self.history_last_updated[their_name] = time() + self.forget_history_seconds |
|---|
| 246 |
|
|---|
| 247 |
self.pronoun_handlers.add_to_history(sentence, their_name) |
|---|
| 248 |
|
|---|
| 249 |
if theirs and self.their_history.has_key(their_name): |
|---|
| 250 |
for history_index, history_sentence in enumerate(self.their_history[their_name]): |
|---|
| 251 |
for history_word in history_sentence: |
|---|
| 252 |
for word in sentence: |
|---|
| 253 |
self.a.add_edge(history_word, word, (history_index + 1.0)/(self.history_length + 1.0)) |
|---|
| 254 |
|
|---|
| 255 |
|
|---|
| 256 |
if theirs: |
|---|
| 257 |
if self.their_history.has_key(their_name): |
|---|
| 258 |
if len(self.their_history[their_name]) == self.history_length: |
|---|
| 259 |
self.their_history[their_name] = self.their_history[their_name][1:] |
|---|
| 260 |
self.their_history[their_name].append(sentence) |
|---|
| 261 |
else: self.their_history[their_name].append(sentence) |
|---|
| 262 |
|
|---|
| 263 |
|
|---|
| 264 |
else: |
|---|
| 265 |
self.their_history[their_name] = [] |
|---|
| 266 |
self.their_history[their_name].append(sentence) |
|---|
| 267 |
|
|---|
| 268 |
else: |
|---|
| 269 |
if self.our_history.has_key(their_name): |
|---|
| 270 |
if len(self.our_history[their_name]) == self.history_length: |
|---|
| 271 |
self.our_history[their_name] = self.our_history[their_name][1:] |
|---|
| 272 |
self.our_history[their_name].append(sentence) |
|---|
| 273 |
else: self.our_history[their_name].append(sentence) |
|---|
| 274 |
|
|---|
| 275 |
|
|---|
| 276 |
else: |
|---|
| 277 |
self.our_history[their_name] = [] |
|---|
| 278 |
self.our_history[their_name].append(sentence) |
|---|
| 279 |
|
|---|
| 280 |
self.log.leave() |
|---|
| 281 |
|
|---|
| 282 |
def answer_question(self, screenname, sentence, position): |
|---|
| 283 |
''' Converts the question to normal form and searches for an answer in the distance graph ''' |
|---|
| 284 |
self.log.enter(locals=locals()) |
|---|
| 285 |
|
|---|
| 286 |
question_handler = { |
|---|
| 287 |
'what' : QuestionHandler.answer_what |
|---|
| 288 |
|
|---|
| 289 |
|
|---|
| 290 |
|
|---|
| 291 |
|
|---|
| 292 |
|
|---|
| 293 |
|
|---|
| 294 |
|
|---|
| 295 |
|
|---|
| 296 |
|
|---|
| 297 |
} |
|---|
| 298 |
|
|---|
| 299 |
result = None |
|---|
| 300 |
answered = False |
|---|
| 301 |
|
|---|
| 302 |
|
|---|
| 303 |
if position >= 0: |
|---|
| 304 |
|
|---|
| 305 |
self.log.enter("Answering a '%s' question..." % 'what') |
|---|
| 306 |
result = question_handler['what'](self, screenname, sentence[position:], self.log) |
|---|
| 307 |
self.log.leave() |
|---|
| 308 |
answered = True |
|---|
| 309 |
elif position == -1: |
|---|
| 310 |
position = position + 1; |
|---|
| 311 |
self.log.enter("Answering a '%s' question..." % 'VB NN VB') |
|---|
| 312 |
result = QuestionHandler.answer_vbnnvb(self, screenname, sentence[position:], self.log) |
|---|
| 313 |
self.log.leave() |
|---|
| 314 |
answered = True |
|---|
| 315 |
elif position == -2: |
|---|
| 316 |
position = position + 2; |
|---|
| 317 |
self.log.enter("Answering a '%s' question..." % 'is are') |
|---|
| 318 |
result = QuestionHandler.answer_isare(self, screenname, sentence[position:], self.log) |
|---|
| 319 |
self.log.leave() |
|---|
| 320 |
answered = True |
|---|
| 321 |
else: |
|---|
| 322 |
self.log.add("They asked a question we can't answer. Generating a confused response.") |
|---|
| 323 |
result = QuestionHandler.answer_other(self, screenname, sentence) |
|---|
| 324 |
self.log.leave("Result: %s" % str(result)) |
|---|
| 325 |
|
|---|
| 326 |
return (answered, result) |
|---|
| 327 |
|
|---|
| 328 |
def nva_word_count(self, sentence): |
|---|
| 329 |
nva_words = [word for word in sentence if self.is_word_nva(word)] |
|---|
| 330 |
return len(nva_words) |
|---|
| 331 |
|
|---|
| 332 |
def compute_sentence_weight(self, sentence, screenname): |
|---|
| 333 |
''' Computes the "weight" of a sentence (how useful it would be as a reply to previous sentences) ''' |
|---|
| 334 |
|
|---|
| 335 |
weight = 1.0 |
|---|
| 336 |
|
|---|
| 337 |
if self.our_history.has_key(screenname): |
|---|
| 338 |
if sentence in self.our_history[screenname]: |
|---|
| 339 |
self.log.add("Already-used sentence detected! Penalizing with a large weight") |
|---|
| 340 |
return 100 |
|---|
| 341 |
else: |
|---|
| 342 |
self.log.add("We haven't said anything yet") |
|---|
| 343 |
|
|---|
| 344 |
for history_index, history_sentence in enumerate(self.their_history[screenname]): |
|---|
| 345 |
for tuple in sentence: |
|---|
| 346 |
nva = self.is_word_nva(tuple) |
|---|
| 347 |
|
|---|
| 348 |
|
|---|
| 349 |
for history_tuple in history_sentence: |
|---|
| 350 |
if tuple == history_tuple and nva: |
|---|
| 351 |
weight += 0.6 |
|---|
| 352 |
elif self.a.has_edge(tuple, history_tuple): |
|---|
| 353 |
tmp = self.compute_word_associativity(tuple, history_tuple) |
|---|
| 354 |
weight += (-1 * math.log(tmp) if tmp > 0.0 else 1.0) |
|---|
| 355 |
|
|---|
| 356 |
|
|---|
| 357 |
if self.our_history.has_key(screenname): |
|---|
| 358 |
for history_tuple in self.our_history[screenname]: |
|---|
| 359 |
if tuple == history_tuple and nva: |
|---|
| 360 |
self.log.add("%s duplicated, decreasing sentence weight" % str(tuple)) |
|---|
| 361 |
weight += 0.5 |
|---|
| 362 |
|
|---|
| 363 |
|
|---|
| 364 |
return weight |
|---|
| 365 |
|
|---|
| 366 |
def find_best_sentence(self, recipient, sentenceChoices): |
|---|
| 367 |
self.log.enter(locals=locals()) |
|---|
| 368 |
all_sentences = {} |
|---|
| 369 |
|
|---|
| 370 |
|
|---|
| 371 |
all_repeat_replies = True |
|---|
| 372 |
contains_repeat = False |
|---|
| 373 |
for sentence in sentenceChoices: |
|---|
| 374 |
weight = self.compute_sentence_weight(sentence, recipient) |
|---|
| 375 |
all_sentences[tuple(sentence)] = weight |
|---|
| 376 |
self.log.add("%f = %s" % (weight, str(sentence))) |
|---|
| 377 |
if weight < 100: |
|---|
| 378 |
all_repeat_replies = False |
|---|
| 379 |
else: |
|---|
| 380 |
contains_repeat = True |
|---|
| 381 |
|
|---|
| 382 |
|
|---|
| 383 |
if all_repeat_replies: |
|---|
| 384 |
return choice(["tell me more", "teach me", "explain something new to me", "I don't know much about that. Teach me more.", |
|---|
| 385 |
"my small brain can't comprehend what you said yet", "can you rephrase that?", "what do you mean?"]) |
|---|
| 386 |
elif contains_repeat: |
|---|
| 387 |
self.log.enter("some non-repeats and some repeats were created") |
|---|
| 388 |
everything = all_sentences.items() |
|---|
| 389 |
|
|---|
| 390 |
|
|---|
| 391 |
for key, value in everything: |
|---|
| 392 |
if value >= 100: |
|---|
| 393 |
self.log.add("remove: %s" % str(key)) |
|---|
| 394 |
del all_sentences[key] |
|---|
| 395 |
else: |
|---|
| 396 |
self.log.add("keep: %s" % str(key)) |
|---|
| 397 |
self.log.leave() |
|---|
| 398 |
|
|---|
| 399 |
|
|---|
| 400 |
all_sentences = sorted(all_sentences.iteritems(), key=itemgetter(1)) |
|---|
| 401 |
all_sentences.reverse() |
|---|
| 402 |
if int(len(all_sentences) * self.weight_tolerance) < self.minimum_results: |
|---|
| 403 |
best_sentences = all_sentences[:self.minimum_results] |
|---|
| 404 |
else: |
|---|
| 405 |
best_sentences = all_sentences[:int(len(all_sentences) * self.weight_tolerance)] |
|---|
| 406 |
|
|---|
| 407 |
|
|---|
| 408 |
self.log.leave("TOP CHOICES:" + str(best_sentences)) |
|---|
| 409 |
return choice(best_sentences)[0] |
|---|
| 410 |
|
|---|
| 411 |
def find_best_reply_starters(self, screenname): |
|---|
| 412 |
''' Return words that look like subjects of our current conversation ''' |
|---|
| 413 |
self.log.enter(locals=locals()) |
|---|
| 414 |
|
|---|
| 415 |
history_words = {} |
|---|
| 416 |
for history_index, history_sentence in enumerate(self.their_history[screenname]): |
|---|
| 417 |
for history_tuple in history_sentence: |
|---|
| 418 |
if (history_tuple[i_tag] == '.' or history_tuple[i_tag] == 'end' or not self.is_word_nva(history_tuple)): |
|---|
| 419 |
continue |
|---|
| 420 |
|
|---|
| 421 |
|
|---|
| 422 |
if history_words.has_key(history_tuple): |
|---|
| 423 |
history_words[history_tuple] += self.compute_frequency(history_index, history_tuple) |
|---|
| 424 |
else: |
|---|
| 425 |
history_words[history_tuple] = self.compute_frequency(history_index, history_tuple) |
|---|
| 426 |
|
|---|
| 427 |
|
|---|
| 428 |
history_words = sorted(history_words.iteritems(), key=itemgetter(1)) |
|---|
| 429 |
if int(len(history_words) * self.weight_tolerance) < self.minimum_results: |
|---|
| 430 |
bestWords = history_words[:self.minimum_results] |
|---|
| 431 |
else: |
|---|
| 432 |
bestWords = history_words[:int(len(history_words) * self.weight_tolerance)] |
|---|
| 433 |
|
|---|
| 434 |
self.log.leave("TOP REPLY STARTERS:" + str(bestWords)) |
|---|
| 435 |
return [[word[0]] for word in bestWords] |
|---|
| 436 |
|
|---|
| 437 |
def find_worthy_words(self, words, screenname, sentence): |
|---|
| 438 |
''' Returns a list of words that would be a good addition to the 'sentence' we're constructing ''' |
|---|
| 439 |
self.log.enter() |
|---|
| 440 |
self.log.add("adding to sentence: %s" % str(sentence)) |
|---|
| 441 |
|
|---|
| 442 |
worthy_words = {} |
|---|
| 443 |
nva_average = 0.0 |
|---|
| 444 |
|
|---|
| 445 |
for word in words: |
|---|
| 446 |
|
|---|
| 447 |
if not word in sentence: |
|---|
| 448 |
if self.is_word_nva(word): |
|---|
| 449 |
worthy_words[word] = self.compute_word_associativity_history(word, screenname) |
|---|
| 450 |
nva_average += worthy_words[word] |
|---|
| 451 |
self.log.add("%s = %s" % (word[0], str(worthy_words[word]))) |
|---|
| 452 |
else: |
|---|
| 453 |
self.log.add("%s = filler" % (word[0])) |
|---|
| 454 |
worthy_words[word] = -1.0 |
|---|
| 455 |
else: |
|---|
| 456 |
self.log.add("duplicate word ignored: %s" % str(word)) |
|---|
| 457 |
|
|---|
| 458 |
|
|---|
| 459 |
nva_average = 0.0 if len(worthy_words) == 0 else nva_average/len(worthy_words) |
|---|
| 460 |
|
|---|
| 461 |
|
|---|
| 462 |
all_options = sorted(worthy_words.iteritems(), key=itemgetter(1)) |
|---|
| 463 |
all_options.reverse() |
|---|
| 464 |
|
|---|
| 465 |
self.log.enter("all_options") |
|---|
| 466 |
self.log.add("%s" % str(all_options)) |
|---|
| 467 |
self.log.leave() |
|---|
| 468 |
|
|---|
| 469 |
|
|---|
| 470 |
if (len(all_options) > 5) and (all_options[0][1] < nva_average * 1.25): |
|---|
| 471 |
self.log.add("average of all words: %f" % nva_average) |
|---|
| 472 |
self.log.add("best word: %s = %f, %f too low" % (all_options[0][0][0], all_options[0][1], (nva_average * 1.25) - all_options[0][1])) |
|---|
| 473 |
all_options = [] |
|---|
| 474 |
|
|---|
| 475 |
|
|---|
| 476 |
final_list = [chosen_word[0] for chosen_word in all_options if ((chosen_word[1] >= nva_average * math.log(len(all_options))) or (chosen_word[1] == -1.0))] |
|---|
| 477 |
|
|---|
| 478 |
self.log.leave("chosen words: %s" % str(final_list)) |
|---|
| 479 |
return final_list |
|---|
| 480 |
|
|---|
| 481 |
|
|---|
| 482 |
def evaluate_pronouns(self, sentence, recipient): |
|---|
| 483 |
'''Resolves a pronoun to be the most recent relevant noun phrase seen''' |
|---|
| 484 |
pass |
|---|
| 485 |
|
|---|
| 486 |
def generate_ngrams(self, sentences, graph, screenname, direction): |
|---|
| 487 |
''' Returns ngrams for the given list of sentence starters ''' |
|---|
| 488 |
self.log.enter(locals=locals()) |
|---|
| 489 |
|
|---|
| 490 |
i = 0 |
|---|
| 491 |
final_sentences = [] |
|---|
| 492 |
sentence_heap = [] |
|---|
| 493 |
for sentence in sentences: |
|---|
| 494 |
heappush(sentence_heap, (self.compute_sentence_weight(sentence, screenname), sentence)) |
|---|
| 495 |
|
|---|
| 496 |
while sentence_heap and len(final_sentences) < min: |
|---|
| 497 |
sentence = heappop(sentence_heap)[1] |
|---|
| 498 |
i += 1 |
|---|
| 499 |
self.log.enter("sentence (%d): %s" % (i, ' '.join([w[0] for w in sentence]))) |
|---|
| 500 |
|
|---|
| 501 |
if (sentence[-1] == self.end and direction == i_forward) or (sentence[0] == self.start and direction == i_backward): |
|---|
| 502 |
final_sentences.append(sentence) |
|---|
| 503 |
self.log.leave() |
|---|
| 504 |
continue |
|---|
| 505 |
|
|---|
| 506 |
original_sentence_weight = self.compute_sentence_weight(sentence, screenname) |
|---|
| 507 |
if direction == i_forward: |
|---|
| 508 |
newWords = [word for word in graph.successors(sentence[-1]) if graph.edge(sentence[-1], word, 1)] |
|---|
| 509 |
else: |
|---|
| 510 |
newWords = [word for word in graph.predecessors(sentence[0]) if graph.edge(word, sentence[0], 1)] |
|---|
| 511 |
|
|---|
| 512 |
newValues = {} |
|---|
| 513 |
|
|---|
| 514 |
for newWord in newWords: |
|---|
| 515 |
newValues[newWord] = 0.0 |
|---|
| 516 |
for index, word in enumerate(sentence): |
|---|
| 517 |
if (direction == i_forward and graph.ngram_probability(word, newWord, len(sentence) - index, 1) >= 0.15) or \ |
|---|
| 518 |
(direction == i_backward and graph.ngram_probability(word, newWord, index + 1, -1) >= 0.15): |
|---|
| 519 |
newValues[newWord] += (1.0/(len(sentence) - index)) |
|---|
| 520 |
newValues[newWord] /= min(len(sentence), 4) |
|---|
| 521 |
|
|---|
| 522 |
filteredWords = self.find_worthy_words([chosenWord for chosenWord, wordWeight in newValues.iteritems() if wordWeight >= 0.5], screenname, sentence) |
|---|
| 523 |
|
|---|
| 524 |
for new_word in filteredWords: |
|---|
| 525 |
if direction == i_forward: |
|---|
| 526 |
new_sentence = sentence[:] |
|---|
| 527 |
new_sentence.append(new_word) |
|---|
| 528 |
else: |
|---|
| 529 |
new_sentence = [new_word] |
|---|
| 530 |
new_sentence.extend(sentence) |
|---|
| 531 |
|
|---|
| 532 |
|
|---|
| 533 |
new_sentence_weight = self.compute_sentence_weight(new_sentence, screenname) |
|---|
| 534 |
|
|---|
| 535 |
if len(sentence_heap) >= self.heap_maximum: |
|---|
| 536 |
sentence_heap.sort() |
|---|
| 537 |
if sentence_heap[-1][0] > new_sentence_weight: |
|---|
| 538 |
sentence_heap = sentence_heap[:-1] |
|---|
| 539 |
heapify(sentence_heap) |
|---|
| 540 |
heappush(sentence_heap, (new_sentence_weight, new_sentence)) |
|---|
| 541 |
self.log.add("adding new sentence weight %f to sentences" % (new_sentence_weight)) |
|---|
| 542 |
|
|---|
| 543 |
else: |
|---|
| 544 |
heappush(sentence_heap, (new_sentence_weight, new_sentence)) |
|---|
| 545 |
self.log.add("adding new sentence %f to sentences" % (new_sentence_weight)) |
|---|
| 546 |
|
|---|
| 547 |
self.log.leave() |
|---|
| 548 |
|
|---|
| 549 |
self.log.enter("these sentences may be useful") |
|---|
| 550 |
for sentence in final_sentences: self.log.add(str(sentence)) |
|---|
| 551 |
self.log.leave() |
|---|
| 552 |
|
|---|
| 553 |
self.log.leave() |
|---|
| 554 |
return final_sentences |
|---|
| 555 |
|
|---|
| 556 |
def create_relevant_sentences(self, recipient): |
|---|
| 557 |
''' Create a sentence from scratch that asks a question ''' |
|---|
| 558 |
self.log.enter(locals=locals()) |
|---|
| 559 |
|
|---|
| 560 |
reply_starters = self.find_best_reply_starters(recipient) |
|---|
| 561 |
first_halfs = self.generate_ngrams(reply_starters, self.d, recipient, i_backward) |
|---|
| 562 |
full_sentences = self.generate_ngrams(first_halfs, self.d, recipient, i_forward) |
|---|
| 563 |
first_half_qs = self.generate_ngrams(reply_starters, self.q, recipient, i_backward) |
|---|
| 564 |
full_questions = self.generate_ngrams(first_half_qs, self.q, recipient, i_forward) |
|---|
| 565 |
|
|---|
| 566 |
new_question = self.asker.ask(self.their_history[recipient][-1]) |
|---|
| 567 |
if not new_question == None: |
|---|
| 568 |
full_questions.append(new_question) |
|---|
| 569 |
|
|---|
| 570 |
full_sentences.extend(full_questions) |
|---|
| 571 |
|
|---|
| 572 |
self.log.leave() |
|---|
| 573 |
return full_sentences |
|---|
| 574 |
|
|---|
| 575 |
def best_reply_sentences(self, recipient): |
|---|
| 576 |
''' Create a sentence from scratch based on the current conversation ''' |
|---|
| 577 |
self.log.enter(locals=locals()) |
|---|
| 578 |
recipient = self.cleaner.simplify(recipient) |
|---|
| 579 |
|
|---|
| 580 |
|
|---|
| 581 |
relevant_sentences = self.create_relevant_sentences(recipient) |
|---|
| 582 |
|
|---|
| 583 |
statements = [sentence for sentence in relevant_sentences if sentence[-1] == self.end] |
|---|
| 584 |
questions = [sentence for sentence in relevant_sentences if sentence[-2][i_word] == '?'] |
|---|
| 585 |
|
|---|
| 586 |
|
|---|
| 587 |
reply_choices = statements[:] |
|---|
| 588 |
reply_choices.extend(questions) |
|---|
| 589 |
|
|---|
| 590 |
if len(reply_choices) == 0: |
|---|
| 591 |
if len(questions) == 0 and len(statements) == 0: |
|---|
| 592 |
return "tell me something new" |
|---|
| 593 |
elif len(questions) == 0: |
|---|
| 594 |
return choice(statements) |
|---|
| 595 |
else: |
|---|
| 596 |
return choice(questions) |
|---|
| 597 |
|
|---|
| 598 |
best_sentence = self.find_best_sentence(recipient, reply_choices) |
|---|
| 599 |
self.log.leave() |
|---|
| 600 |
return best_sentence |
|---|
| 601 |
|
|---|
| 602 |
def generate_reply(self, recipient, sentence): |
|---|
| 603 |
''' Gets a sentence to send, in tuple form, and converts it to a string before logging/returning it ''' |
|---|
| 604 |
start_time = time() |
|---|
| 605 |
self.log.enter(locals=locals()) |
|---|
| 606 |
|
|---|
| 607 |
|
|---|
| 608 |
if isinstance(sentence, str): |
|---|
| 609 |
reply = sentence |
|---|
| 610 |
|
|---|
| 611 |
|
|---|
| 612 |
else: |
|---|
| 613 |
recipient = self.cleaner.simplify(recipient) |
|---|
| 614 |
if self.is_question(sentence)>-4 : |
|---|
| 615 |
result = self.answer_question(recipient, sentence, self.is_question(sentence)) |
|---|
| 616 |
else: |
|---|
| 617 |
result = (False, self.best_reply_sentences(recipient)) |
|---|
| 618 |
(answered, reply) = result |
|---|
| 619 |
|
|---|
| 620 |
(answered, reply) = result |
|---|
| 621 |
self.log.add("question answered: %s" % str(answered)) |
|---|
| 622 |
self.log.add("reply: %s" % str(reply)) |
|---|
| 623 |
|
|---|
| 624 |
if isinstance(reply, tuple) or isinstance(reply, list): |
|---|
| 625 |
|
|---|
| 626 |
weight = self.compute_sentence_weight(reply, recipient) |
|---|
| 627 |
self.log.add("Reply weight: %f" % weight) |
|---|
| 628 |
if (weight < 1): |
|---|
| 629 |
self.log.add("This sentence is off-topic") |
|---|
| 630 |
self.update_history(recipient, reply, 0) |
|---|
| 631 |
|
|---|
| 632 |
reply_plaintext = '' |
|---|
| 633 |
self.log.enter("building the reply sentence") |
|---|
| 634 |
for group in reply: |
|---|
| 635 |
if group == self.start: |
|---|
| 636 |
continue |
|---|
| 637 |
|
|---|
| 638 |
if group == self.end: |
|---|
| 639 |
break |
|---|
| 640 |
|
|---|
| 641 |
self.log.add("tuple: '%s'" % str(group)) |
|---|
| 642 |
self.log.add("sentence so far: '%s'\n" % reply_plaintext) |
|---|
| 643 |
|
|---|
| 644 |
|
|---|
| 645 |
word = self.cleaner.simplify(group[i_word]) |
|---|
| 646 |
if (word == "'m" or word == 'am') and re.search(r'\byou$', reply_plaintext) != None: |
|---|
| 647 |
reply_plaintext += " are" |
|---|
| 648 |
elif word == recipient: |
|---|
| 649 |
reply_plaintext += " you" |
|---|
| 650 |
elif word == self.cleaner.simplify(self.screenname): |
|---|
| 651 |
reply_plaintext += " i" |
|---|
| 652 |
else: |
|---|
| 653 |
try: |
|---|
| 654 |
space = '' if group[i_tag] in ['.', ',', '(', '{', '['] or '\'' in group[i_word] else ' ' |
|---|
| 655 |
except: |
|---|
| 656 |
|
|---|
| 657 |
space = '' |
|---|
| 658 |
reply_plaintext += '%s%s' % (space, group[i_word]) |
|---|
| 659 |
|
|---|
| 660 |
|
|---|
| 661 |
reply = re.sub(r'[\.\?\!\;]$', '', reply_plaintext.strip()) |
|---|
| 662 |
|
|---|
| 663 |
|
|---|
| 664 |
self.log.add("queued outgoing message to %s: %s" % (recipient, reply)) |
|---|
| 665 |
self.queue.add_outgoing(recipient, reply, time() - start_time) |
|---|
| 666 |
self.log.leave() |
|---|
| 667 |
|
|---|
| 668 |
if __name__ == "__main__": |
|---|
| 669 |
bot = SABBrain("sigartbot") |
|---|
| 670 |
try: |
|---|
| 671 |
bot.log.enabled = '-debug' in sys.argv |
|---|
| 672 |
graphA = '-graph' in sys.argv and 'a' in sys.argv |
|---|
| 673 |
graphD = '-graph' in sys.argv and 'd' in sys.argv |
|---|
| 674 |
graphQ = '-graph' in sys.argv and 'q' in sys.argv |
|---|
| 675 |
|
|---|
| 676 |
def process_queue(): |
|---|
| 677 |
while bot.running: |
|---|
| 678 |
sleep(1) |
|---|
| 679 |
bot.queue.process() |
|---|
| 680 |
|
|---|
| 681 |
|
|---|
| 682 |
bot.running = True |
|---|
| 683 |
threading.Thread(target=process_queue).start() |
|---|
| 684 |
|
|---|
| 685 |
while 1: |
|---|
| 686 |
sentence = '' |
|---|
| 687 |
try: |
|---|
| 688 |
sentence = raw_input('') |
|---|
| 689 |
print '' |
|---|
| 690 |
except: |
|---|
| 691 |
raise |
|---|
| 692 |
|
|---|
| 693 |
|
|---|
| 694 |
bot.queue.add_incoming('coolsam', sentence) |
|---|
| 695 |
|
|---|
| 696 |
|
|---|
| 697 |
if (graphA): bot.a.draw_graph() |
|---|
| 698 |
if (graphD): bot.d.draw_graph() |
|---|
| 699 |
if (graphQ): bot.q.draw_graph() |
|---|
| 700 |
|
|---|
| 701 |
except KeyboardInterrupt: |
|---|
| 702 |
bot.running = False |
|---|
| 703 |
print "\nSigArtBot is shutting down...\n", |
|---|
| 704 |
bot.save() |
|---|
| 705 |
print "done!" |
|---|
| 706 |
bot = None |
|---|
| 707 |
sys.exit(0) |
|---|