root/imbot/SABBrain.py

Revision 118, 26.2 kB (checked in by pantley2, 3 years ago)

IMBOT: still trying to prevent it from repeating itself

Line 
1 #!/usr/bin/python
2 print "Loading..."
3
4 # modules that come with Python
5 import math, pickle, os, re, sys, threading
6 from Ask import Ask
7 from time import time, sleep
8 from operator import itemgetter
9
10 # third party dependencies
11 import networkx as nx
12 from random import choice
13 from nltk import pos_tag
14
15 # our code
16 from Log import Log
17 from Cleaner import *
18 from MessageQueue import MessageQueue
19 from QuestionHandler import QuestionHandler
20 from PronounHandler import PronounHandler
21 from Graph import DistanceGraph, AssociationGraph
22 from MontyLemmatiser import MontyLemmatiser
23 from heapq import *
24
25 (i_word, i_tag) = (0, 1)
26 (i_backward, i_forward) = (0, 1)
27 (i_time, i_recipient, i_message, i_away) = (0, 1, 2, 3)
28
29 class SABBrain:
30         def __init__(self, screenname, online=False):
31                 ''' Assign default internal values and load graph data '''
32                
33                 # general
34                 self.screenname = screenname
35                 self.log = Log()
36                 self.online = online
37                 self.cleaner = Cleaner(self.log)
38                 self.queue = MessageQueue(self)
39                 self.lemmatiser = MontyLemmatiser()
40                 self.asker = Ask()
41                
42                 # internal values that may need tweaking
43                 self.weight_tolerance = 0.02 # weight tolerance (smaller = more strict, larger = less strict)
44                 self.minimum_results = 2     # minimum number of results to aim for
45                 self.heap_maximum = 3
46                
47                 # graphs
48                 self.d = DistanceGraph(self.log)
49                 self.a = AssociationGraph(self.log)
50                 self.q = DistanceGraph(self.log, file_path=r"Brain/questions.pickle")
51                 self.start = ("start", "start")
52                 self.end = ("end", "end")
53                 self.d.add_node(self.start)
54                 self.q.add_node(self.start)
55                 self.d.add_node(self.end)
56                 self.q.add_node(self.end)
57                
58                 # runtime modes that can be turned on and off
59                 self.toggle = dict()
60                 self.toggle["html"] = True
61                 self.toggle["internet"] = True
62                 self.toggle["spelling"] = True
63                 self.toggle["swap"] = True
64                 self.toggle["simplify"] = False
65                 self.toggle["contractions"] = True
66                
67                 # message history per user, where weight is the maximum weight that can be applied to any given word pair
68                 self.forget_history_seconds = 60
69                 self.history_last_updated = dict()
70                 self.their_history = dict()
71                 self.our_history = dict()
72                 self.history_length = 7
73                 self.history_weight = 0.0
74                 for i in range(1, self.history_length+1):
75                         self.history_weight += 1.0/i
76                 self.pronoun_handlers = PronounHandler()
77                
78                 # load any existing brain data
79                 print 'Loading brain files...'
80                 self.load_brain()
81                
82                 print 'Ready!'
83                 print ''
84        
85         def save(self):
86                 ''' Save all of our brain graphs to disk '''
87                 self.d.save()  # distances graph
88                 self.a.save()  # associations graph
89                 self.q.save()  # questions graph
90        
91         def load_brain(self):
92                 ''' Load brain data into our graphs from pickle files on disk and *.yml files into the Cleaner '''
93                 self.d.load()  # distances graph
94                 self.a.load()  # associations graph
95                 self.q.load()  # questions graph
96                 self.cleaner.load()  # *.yml files
97        
98         def is_question(self, sentence):
99                 '''Checks if a sentence is a question based on keywords in it or a ?'''
100                
101                 # tag contains W for all Wh-type words and how
102                 for index, word in enumerate(sentence):
103                         if 'W' in word[i_tag] and not word[i_word] == "that":
104                                 return index
105                
106                 # bypass interjections
107                 interjections = 0;
108                 for interindex, word in enumerate(sentence):
109                         if word[i_word] in ["oh", "uh", "well", "huh", "now"] or ',' in word[i_tag]:
110                                 index = index - 1;
111                                 interjections = interjections + 1;
112                                 continue
113                         else:
114                                 break
115                
116                 if index >= 2:
117                         if 'VB' in sentence[0 + interjections][i_tag] and ('NN' in sentence[1 + interjections][i_tag] or 'PRP' in sentence[1 + interjections][i_tag]) and 'VB' in sentence[2 + interjections][i_tag]:
118                                 return -1;
119                
120                 if index >= 1:
121                         if sentence[0 + interjections][i_word] in ["is", "are"]:
122                                 return -2;
123                
124                 if sentence[-1][i_word] == '?':
125                         return -3;
126                        
127                 return -4;
128        
129         def parse_incoming_message(self, sender, message):
130                 ''' Stores messages in a graph for use in creating sentences later '''
131                 self.log.delete()
132                 self.log.enter(locals=locals())
133                
134                 # remove the formatting from the screennames before they get used in a graph
135                 sender = self.cleaner.simplify(sender)
136                
137                 # convert the message string into a list of (word, part-of-speech) tuples
138                 message = self.cleaner.clean_sentence(message, sender, self.toggle)
139                 sentence = pos_tag(message)
140                
141                 self.log.add("Adding %s to the graph" % sentence)               
142                 self.add_to_graph(sentence, sender, self.q) if self.is_question(sentence) > -4 else self.add_to_graph (sentence, sender, self.d)
143                
144                 # update their_name's history
145                 self.update_history(sender, sentence, 1)               
146                
147                 # return the sentence in tokenized form (needed to generate the reply)
148                 self.log.leave()
149                
150                 # if you want to see what the out put is, uncomment next line
151                 #print sentence
152                
153                 return sentence
154        
155         def is_word_nva(self, word):
156                 ''' Returns True if this word is a Noun, Verb, or Adjective and not the word 'is', False otherwise '''
157                 lemmatised_word = map(lambda the_tokenizer_str:self.lemmatiser.lemmatise_word(the_tokenizer_str,),[word[i_word]])[0]
158                 try:
159                         return ("NN" in word[i_tag] or "VB" in word[i_tag] or "JJ" in word[i_tag] or word[i_tag] == "RP") and lemmatised_word != 'be'
160                 except:
161                         print "is_word_nva(%s) raised an exception." % str(word)
162                         return False
163        
164         def add_to_graph(self, sentence, screenname, specified_graph):
165                 ''' Adds the given 'sentence' to the 'specified_graph' (normally the distance or questions graph)'''
166                 self.log.enter("Updating graphs")
167                
168                 resolved_sentence = self.pronoun_handlers.resolve_pronouns(sentence, screenname)
169                 # for every word in the sentence
170                 for index1, pair1 in enumerate(sentence):
171                         if not specified_graph.has_node(pair1):
172                                 specified_graph.add_node(pair1)
173                        
174                         rpair1 = resolved_sentence[index1]
175                         if (self.is_word_nva(rpair1)):
176                                 self.a.increment(rpair1)
177                        
178                         # add an edge between the word and self.start
179                         if specified_graph == self.d:
180                                 specified_graph.add_edge(self.start, rpair1, index1 + 1)
181                         else:
182                                 specified_graph.add_edge(self.start, pair1, index1 + 1)
183                        
184                         # add an edge between the word and self.end
185                         if specified_graph == self.d:
186                                 specified_graph.add_edge(rpair1, self.end, len(sentence) - index1)
187                         else:
188                                 specified_graph.add_edge(pair1, self.end, len(sentence) - index1)
189                        
190                         # compare it to every other word in the sentence
191                         for index2 in range(0, index1):
192                                 pair2 = sentence[index2]
193                                 rpair2 = resolved_sentence[index2]
194                                
195                                 # history association
196                                 if self.is_word_nva(rpair1) and self.is_word_nva(rpair2):
197                                         self.a.add_edge(rpair1, rpair2, 1.0)
198                                
199                                 # word distance
200                                 distance = index1 - index2
201                                
202                                 if specified_graph == self.d:
203                                         specified_graph.add_edge(rpair2, rpair1, distance)
204                                 else:
205                                         specified_graph.add_edge(pair2, pair1, distance)
206                                        
207                 self.log.leave()
208        
209         def compute_frequency(self, history_index, history_tuple):
210                 ''' Word relevance based on history position function '''
211                 freq = (history_index+1) * (1.0/int(self.a.label(history_tuple)))
212                 self.log.add("%s = freq(%d, %s)" % (str(freq), history_index, str(history_tuple)))
213                 return freq
214        
215         def compute_word_associativity(self, word1_tuple, word2_tuple):
216                 ''' Compute bayesian probability of word1 given word2 '''
217                 occurences_2 = self.a.label(word2_tuple)
218                 return self.a.get_edge(word1_tuple, word2_tuple)/(self.history_weight * occurences_2)
219        
220         def compute_word_associativity_history(self, word, screenname):
221                 ''' Compute independent bayesian probability of seeing this word given the current history '''
222                
223                 # don't try to find a weight for non-NVA words
224                 if not self.is_word_nva(word):
225                         return 0.0
226                
227                 weight = 0.0
228                 for history_index, history_sentence in enumerate(self.their_history[screenname]):
229                         for history_word in history_sentence:
230                                 if self.is_word_nva(history_word):
231                                         weight += (1.0 if word == history_word else self.compute_word_associativity(word, history_word))
232                
233                 return weight
234        
235         def update_history(self, their_name, sentence, theirs):
236                 ''' Appends 'sentence' to their_name's history '''
237                 self.log.enter(locals=locals())
238                
239                 if self.history_last_updated.has_key(their_name) and self.history_last_updated[their_name] < time():
240                         print "This is the first message from %s in awhile, clearing history before replying..." % str(their_name)
241                         self.history_last_updated[their_name] = time() + self.forget_history_seconds
242                         self.their_history[their_name] = []
243                         self.our_history[their_name] = []
244                 else:
245                         self.history_last_updated[their_name] = time() + self.forget_history_seconds
246                
247                 self.pronoun_handlers.add_to_history(sentence, their_name)
248                 # add association edges if 'their_name' != self.screenname
249                 if theirs and self.their_history.has_key(their_name):
250                         for history_index, history_sentence in enumerate(self.their_history[their_name]):
251                                 for history_word in history_sentence:
252                                         for word in sentence:
253                                                 self.a.add_edge(history_word, word, (history_index + 1.0)/(self.history_length + 1.0))
254                
255                 # update their history or our history, depending on who sent the sentence
256                 if theirs:
257                         if self.their_history.has_key(their_name):
258                                 if len(self.their_history[their_name]) == self.history_length:
259                                         self.their_history[their_name] = self.their_history[their_name][1:]
260                                         self.their_history[their_name].append(sentence)
261                                 else: self.their_history[their_name].append(sentence)
262                        
263                         # we've never talked to this person before, so this is the only sentence
264                         else:
265                                 self.their_history[their_name] = []
266                                 self.their_history[their_name].append(sentence)
267                
268                 else:
269                         if self.our_history.has_key(their_name):
270                                 if len(self.our_history[their_name]) == self.history_length:
271                                         self.our_history[their_name] = self.our_history[their_name][1:]
272                                         self.our_history[their_name].append(sentence)
273                                 else: self.our_history[their_name].append(sentence)
274                        
275                         # we've never talked to this person before, so this is the only sentence
276                         else:
277                                 self.our_history[their_name] = []
278                                 self.our_history[their_name].append(sentence)
279                
280                 self.log.leave()
281        
282         def answer_question(self, screenname, sentence, position):
283                 ''' Converts the question to normal form and searches for an answer in the distance graph '''
284                 self.log.enter(locals=locals())
285                
286                 question_handler = {
287                         'what'  : QuestionHandler.answer_what
288                         # 'why'         : QuestionHandler.answer_why,
289                         # 'which'       : QuestionHandler.answer_what,
290                         # 'how'         : QuestionHandler.answer_how,
291                         # 'when'        : QuestionHandler.answer_when,
292                         # 'where'       : QuestionHandler.answer_where,
293                         # 'whence'      : QuestionHandler.answer_where,
294                         # 'who'         : QuestionHandler.answer_who,
295                         # 'whom'        : QuestionHandler.answer_who,
296                         # 'whoever' : QuestionHandler.answer_who,
297                 }
298                
299                 result = None
300                 answered = False
301                
302                 # case for what
303                 if position >= 0:
304                         #return question_handler[sentence[position][i_word]](self, screenname, sentence[position:])
305                         self.log.enter("Answering a '%s' question..." % 'what')
306                         result = question_handler['what'](self, screenname, sentence[position:], self.log)
307                         self.log.leave()
308                         answered = True
309                 elif position == -1:
310                         position = position + 1;
311                         self.log.enter("Answering a '%s' question..." % 'VB NN VB')
312                         result = QuestionHandler.answer_vbnnvb(self, screenname, sentence[position:], self.log)
313                         self.log.leave()
314                         answered = True
315                 elif position == -2:
316                         position = position + 2;
317                         self.log.enter("Answering a '%s' question..." % 'is are')
318                         result = QuestionHandler.answer_isare(self, screenname, sentence[position:], self.log)
319                         self.log.leave()
320                         answered = True
321                 else:
322                         self.log.add("They asked a question we can't answer. Generating a confused response.")
323                         result = QuestionHandler.answer_other(self, screenname, sentence)
324                 self.log.leave("Result: %s" % str(result))
325                
326                 return (answered, result)
327        
328         def nva_word_count(self, sentence):
329                 nva_words = [word for word in sentence if self.is_word_nva(word)]
330                 return len(nva_words)
331        
332         def compute_sentence_weight(self, sentence, screenname):
333                 ''' Computes the "weight" of a sentence (how useful it would be as a reply to previous sentences) '''
334                 # for every word in this sentence, compare it to 'screenname's history of sentences
335                 weight = 1.0
336                
337                 if self.our_history.has_key(screenname):
338                         if sentence in self.our_history[screenname]:
339                                 self.log.add("Already-used sentence detected! Penalizing with a large weight")
340                                 return 100
341                         else:
342                                 self.log.add("We haven't said anything yet")
343                
344                 for history_index, history_sentence in enumerate(self.their_history[screenname]):       
345                         for tuple in sentence:
346                                 nva = self.is_word_nva(tuple)
347                                
348                                 # if this word and their history word have some sort of connection, increase sentence weight
349                                 for history_tuple in history_sentence:
350                                         if tuple == history_tuple and nva:
351                                                 weight += 0.6
352                                         elif self.a.has_edge(tuple, history_tuple):
353                                                 tmp = self.compute_word_associativity(tuple, history_tuple)
354                                                 weight += (-1 * math.log(tmp) if tmp > 0.0 else 1.0)
355                                        
356                                 # if this word is something we said recently, penalize its use
357                                 if self.our_history.has_key(screenname):
358                                         for history_tuple in self.our_history[screenname]:
359                                                 if tuple == history_tuple and nva:
360                                                         self.log.add("%s duplicated, decreasing sentence weight" % str(tuple))
361                                                         weight += 0.5
362                
363                 #adjusted_weight = weight / math.log(self.nva_word_count(sentence) + 2)
364                 return weight
365        
366         def find_best_sentence(self, recipient, sentenceChoices):
367                 self.log.enter(locals=locals())
368                 all_sentences = {}
369                
370                 # give every sentence a weight
371                 all_repeat_replies = True
372                 contains_repeat = False
373                 for sentence in sentenceChoices:
374                         weight = self.compute_sentence_weight(sentence, recipient)
375                         all_sentences[tuple(sentence)] = weight
376                         self.log.add("%f = %s" % (weight, str(sentence)))
377                         if weight < 100:
378                                 all_repeat_replies = False
379                         else:
380                                 contains_repeat = True
381                
382                 # when we have a mix of new replies and repeats
383                 if all_repeat_replies:
384                         return choice(["tell me more", "teach me", "explain something new to me", "I don't know much about that. Teach me more.",
385                                 "my small brain can't comprehend what you said yet", "can you rephrase that?", "what do you mean?"])
386                 elif contains_repeat:
387                         self.log.enter("some non-repeats and some repeats were created")
388                         everything = all_sentences.items()
389                        
390                         # remove the repeats
391                         for key, value in everything:
392                                 if value >= 100:
393                                         self.log.add("remove: %s" % str(key))
394                                         del all_sentences[key]
395                                 else:
396                                         self.log.add("keep: %s" % str(key))
397                         self.log.leave()
398                
399                 # narrow our choices down to just the best sentences
400                 all_sentences = sorted(all_sentences.iteritems(), key=itemgetter(1))
401                 all_sentences.reverse()
402                 if int(len(all_sentences) * self.weight_tolerance) < self.minimum_results:
403                         best_sentences = all_sentences[:self.minimum_results]
404                 else:
405                         best_sentences = all_sentences[:int(len(all_sentences) * self.weight_tolerance)]
406                
407                 # return one of the best sentences
408                 self.log.leave("TOP CHOICES:" + str(best_sentences))
409                 return choice(best_sentences)[0]
410        
411         def find_best_reply_starters(self, screenname):
412                 ''' Return words that look like subjects of our current conversation '''
413                 self.log.enter(locals=locals())
414                 # compute the frequency for every word in the history
415                 history_words = {}
416                 for history_index, history_sentence in enumerate(self.their_history[screenname]):
417                         for history_tuple in history_sentence:
418                                 if (history_tuple[i_tag] == '.' or history_tuple[i_tag] == 'end' or not self.is_word_nva(history_tuple)):
419                                         continue
420                                
421                                 # if the word appears more than once, just keep adding, because bigger is better
422                                 if history_words.has_key(history_tuple):
423                                         history_words[history_tuple] += self.compute_frequency(history_index, history_tuple)
424                                 else:
425                                         history_words[history_tuple] = self.compute_frequency(history_index, history_tuple)
426                
427                 # choose the best words
428                 history_words = sorted(history_words.iteritems(), key=itemgetter(1))
429                 if int(len(history_words) * self.weight_tolerance) < self.minimum_results:
430                         bestWords = history_words[:self.minimum_results]
431                 else:
432                         bestWords = history_words[:int(len(history_words) * self.weight_tolerance)]
433                
434                 self.log.leave("TOP REPLY STARTERS:" + str(bestWords))
435                 return [[word[0]] for word in bestWords]
436                
437         def find_worthy_words(self, words, screenname, sentence):
438                 ''' Returns a list of words that would be a good addition to the 'sentence' we're constructing '''
439                 self.log.enter()
440                 self.log.add("adding to sentence: %s" % str(sentence))
441                
442                 worthy_words = {}
443                 nva_average = 0.0
444                 #questionable_words = {}
445                 for word in words:
446                         # penalize words that we've already used
447                         if not word in sentence:
448                                 if self.is_word_nva(word):
449                                         worthy_words[word] = self.compute_word_associativity_history(word, screenname)
450                                         nva_average += worthy_words[word]
451                                         self.log.add("%s = %s" % (word[0], str(worthy_words[word])))
452                                 else:
453                                         self.log.add("%s = filler" % (word[0]))
454                                         worthy_words[word] = -1.0 # placeholder value
455                         else:
456                                 self.log.add("duplicate word ignored: %s" % str(word))
457                
458                 # add each non-NVA word with the value of 'the average of all NVA words' so they have a chance of being chosen
459                 nva_average = 0.0 if len(worthy_words) == 0 else nva_average/len(worthy_words)
460                
461                 # sort the results, putting the best at the top
462                 all_options = sorted(worthy_words.iteritems(), key=itemgetter(1))
463                 all_options.reverse()
464                
465                 self.log.enter("all_options")
466                 self.log.add("%s" % str(all_options))
467                 self.log.leave()
468                
469                 # if nothing looks useful, clear the whole list
470                 if (len(all_options) > 5) and (all_options[0][1] < nva_average * 1.25):
471                         self.log.add("average of all words: %f" % nva_average)
472                         self.log.add("best word: %s = %f, %f too low" % (all_options[0][0][0], all_options[0][1], (nva_average * 1.25) - all_options[0][1]))
473                         all_options = []
474                
475                 # narrow all the options down to just the best ones
476                 final_list = [chosen_word[0] for chosen_word in all_options if ((chosen_word[1] >= nva_average * math.log(len(all_options))) or (chosen_word[1] == -1.0))]
477
478                 self.log.leave("chosen words: %s" % str(final_list))
479                 return final_list
480        
481        
482         def evaluate_pronouns(self, sentence, recipient):
483                 '''Resolves a pronoun to be the most recent relevant noun phrase seen'''
484                 pass
485        
486         def generate_ngrams(self, sentences, graph, screenname, direction):
487                 ''' Returns ngrams for the given list of sentence starters '''         
488                 self.log.enter(locals=locals())
489                
490                 i = 0
491                 final_sentences = []
492                 sentence_heap = []
493                 for sentence in sentences:
494                         heappush(sentence_heap, (self.compute_sentence_weight(sentence, screenname), sentence))
495                
496                 while sentence_heap and len(final_sentences) < min:
497                         sentence = heappop(sentence_heap)[1]
498                         i += 1
499                         self.log.enter("sentence (%d): %s" % (i, ' '.join([w[0] for w in sentence])))
500                        
501                         if (sentence[-1] == self.end and direction == i_forward) or (sentence[0] == self.start and direction == i_backward):
502                                 final_sentences.append(sentence)
503                                 self.log.leave()
504                                 continue
505                        
506                         original_sentence_weight = self.compute_sentence_weight(sentence, screenname)
507                         if direction == i_forward:
508                                 newWords = [word for word in graph.successors(sentence[-1]) if graph.edge(sentence[-1], word, 1)]
509                         else:
510                                 newWords = [word for word in graph.predecessors(sentence[0]) if graph.edge(word, sentence[0], 1)]
511                        
512                         newValues = {}
513                        
514                         for newWord in newWords:
515                                 newValues[newWord] = 0.0
516                                 for index, word in enumerate(sentence):
517                                         if (direction == i_forward and graph.ngram_probability(word, newWord, len(sentence) - index, 1) >= 0.15) or \
518                                         (direction == i_backward and graph.ngram_probability(word, newWord, index + 1, -1) >= 0.15):
519                                                 newValues[newWord] += (1.0/(len(sentence) - index))
520                                 newValues[newWord] /= min(len(sentence), 4)
521                        
522                         filteredWords = self.find_worthy_words([chosenWord for chosenWord, wordWeight in newValues.iteritems() if wordWeight >= 0.5], screenname, sentence)
523                        
524                         for new_word in filteredWords:
525                                 if direction == i_forward:
526                                         new_sentence = sentence[:]
527                                         new_sentence.append(new_word)
528                                 else:
529                                         new_sentence = [new_word]
530                                         new_sentence.extend(sentence)
531                                
532                                 # use the new sentence if it seems like we're making progress
533                                 new_sentence_weight = self.compute_sentence_weight(new_sentence, screenname)
534                                
535                                 if len(sentence_heap) >= self.heap_maximum:
536                                         sentence_heap.sort()
537                                         if sentence_heap[-1][0] > new_sentence_weight:
538                                                 sentence_heap = sentence_heap[:-1]
539                                                 heapify(sentence_heap)
540                                                 heappush(sentence_heap, (new_sentence_weight, new_sentence))
541                                                 self.log.add("adding new sentence weight %f to sentences" % (new_sentence_weight))
542                                        
543                                 else:
544                                         heappush(sentence_heap, (new_sentence_weight, new_sentence))
545                                         self.log.add("adding new sentence %f to sentences" % (new_sentence_weight))
546                                        
547                         self.log.leave()
548                
549                 self.log.enter("these sentences may be useful")
550                 for sentence in final_sentences: self.log.add(str(sentence))
551                 self.log.leave()
552                
553                 self.log.leave()
554                 return final_sentences                 
555                
556         def create_relevant_sentences(self, recipient):
557                 ''' Create a sentence from scratch that asks a question '''
558                 self.log.enter(locals=locals())
559                
560                 reply_starters = self.find_best_reply_starters(recipient)
561                 first_halfs = self.generate_ngrams(reply_starters, self.d, recipient, i_backward)
562                 full_sentences = self.generate_ngrams(first_halfs, self.d, recipient, i_forward)
563                 first_half_qs = self.generate_ngrams(reply_starters, self.q, recipient, i_backward)
564                 full_questions = self.generate_ngrams(first_half_qs, self.q, recipient, i_forward)
565                
566                 new_question = self.asker.ask(self.their_history[recipient][-1])
567                 if not new_question == None:
568                         full_questions.append(new_question)
569                        
570                 full_sentences.extend(full_questions)
571                
572                 self.log.leave()
573                 return full_sentences
574        
575         def best_reply_sentences(self, recipient):
576                 ''' Create a sentence from scratch based on the current conversation '''
577                 self.log.enter(locals=locals())
578                 recipient = self.cleaner.simplify(recipient)
579                
580                 # create sentences including the chosen words from the best reply starters function
581                 relevant_sentences = self.create_relevant_sentences(recipient)
582                
583                 statements = [sentence for sentence in relevant_sentences if sentence[-1] == self.end]
584                 questions = [sentence for sentence in relevant_sentences if sentence[-2][i_word] == '?']
585                
586                 # use both statements and questions
587                 reply_choices = statements[:]
588                 reply_choices.extend(questions)
589                
590                 if len(reply_choices) == 0:
591                         if len(questions) == 0 and len(statements) == 0:                               
592                                 return "tell me something new" # we're speechless
593                         elif len(questions) == 0:
594                                 return choice(statements)
595                         else:
596                                 return choice(questions)
597                
598                 best_sentence = self.find_best_sentence(recipient, reply_choices)
599                 self.log.leave()
600                 return best_sentence
601        
602         def generate_reply(self, recipient, sentence):
603                 ''' Gets a sentence to send, in tuple form, and converts it to a string before logging/returning it '''
604                 start_time = time()
605                 self.log.enter(locals=locals())
606                
607                 # if we received an instant reply, use it
608                 if isinstance(sentence, str):
609                         reply = sentence
610                
611                 # otherwise try to formulate a reply based on sentence
612                 else:
613                         recipient = self.cleaner.simplify(recipient)
614                         if self.is_question(sentence)>-4 :
615                                 result = self.answer_question(recipient, sentence, self.is_question(sentence))
616                         else:
617                                 result = (False, self.best_reply_sentences(recipient))
618                         (answered, reply) = result
619                        
620                         (answered, reply) = result
621                         self.log.add("question answered: %s" % str(answered))
622                         self.log.add("reply: %s" % str(reply))
623                        
624                         if isinstance(reply, tuple) or isinstance(reply, list):
625                                 # update their_name's history
626                                 weight = self.compute_sentence_weight(reply, recipient)
627                                 self.log.add("Reply weight: %f" % weight)
628                                 if (weight < 1):
629                                         self.log.add("This sentence is off-topic")
630                                 self.update_history(recipient, reply, 0)
631                                
632                                 reply_plaintext = ''
633                                 self.log.enter("building the reply sentence")
634                                 for group in reply:
635                                         if group == self.start:
636                                                 continue
637                                                
638                                         if group == self.end:
639                                                 break
640                                        
641                                         self.log.add("tuple: '%s'" % str(group))
642                                         self.log.add("sentence so far: '%s'\n" % reply_plaintext)
643                                        
644                                         # last-second swapping
645                                         word = self.cleaner.simplify(group[i_word])
646                                         if (word == "'m" or word == 'am') and re.search(r'\byou$', reply_plaintext) != None:
647                                                 reply_plaintext += " are"
648                                         elif word == recipient:
649                                                 reply_plaintext += " you"
650                                         elif word == self.cleaner.simplify(self.screenname):
651                                                 reply_plaintext += " i"
652                                         else:
653                                                 try:
654                                                         space = '' if group[i_tag] in ['.', ',', '(', '{', '['] or '\'' in group[i_word] else ' '
655                                                 except:
656                                                         #print "group:", group
657                                                         space = ''
658                                                 reply_plaintext += '%s%s' % (space, group[i_word])
659                                
660                                 # remove the period from sentences ending with one (allows for the possibility of questions without question marks)
661                                 reply = re.sub(r'[\.\?\!\;]$', '', reply_plaintext.strip())
662                
663                 # add this response to the message queue
664                 self.log.add("queued outgoing message to %s: %s" % (recipient, reply))
665                 self.queue.add_outgoing(recipient, reply, time() - start_time)
666                 self.log.leave()
667
668 if __name__ == "__main__":
669         bot = SABBrain("sigartbot")
670         try:
671                 bot.log.enabled = '-debug' in sys.argv
672                 graphA = '-graph' in sys.argv and 'a' in sys.argv
673                 graphD = '-graph' in sys.argv and 'd' in sys.argv
674                 graphQ = '-graph' in sys.argv and 'q' in sys.argv
675                
676                 def process_queue():
677                         while bot.running:
678                                 sleep(1)
679                                 bot.queue.process()
680                
681                 # process messages in a second thread so we can type and receive messages at the same time
682                 bot.running = True
683                 threading.Thread(target=process_queue).start()
684                
685                 while 1:
686                         sentence = ''
687                         try:
688                                 sentence = raw_input('')
689                                 print ''
690                         except:
691                                 raise
692                        
693                         # tell the bot we got another message
694                         bot.queue.add_incoming('coolsam', sentence)
695                        
696                         # display graph(s) each cycle if the -graph flag was used
697                         if (graphA): bot.a.draw_graph()
698                         if (graphD): bot.d.draw_graph()
699                         if (graphQ): bot.q.draw_graph()
700        
701         except KeyboardInterrupt:
702                 bot.running = False
703                 print "\nSigArtBot is shutting down...\n",
704                 bot.save()
705                 print "done!"
706                 bot = None
707                 sys.exit(0)
Note: See TracBrowser for help on using the browser.