root/imbot/SABBrainWeightedGraph.py

Revision 51, 18.8 kB (checked in by pantley2, 3 years ago)

IMBOT: Broken due to major code cleanup.

The bot files are now located directly in the /imbot/ directory.

Also note the addition of Graph.py and Cleaner.py

Line 
1 import re, sys, time
2 import yaml
3 import pickle
4 import MontyLingua
5 import networkx as nx
6 import matplotlib.pyplot as plot # not officially compatible with Python 2.6
7 from random import choice
8 from random import uniform
9 import math
10
11 (i_word, i_tag, i_time) = (0, 1, 2)
12
13 class SABBrain:
14     def __init__(self):
15         self.ml = MontyLingua.MontyLingua()
16         self.g = nx.LabeledDiGraph()
17         self.struct = nx.LabeledDiGraph()
18         self.current_timestamp = dict()
19         self.elapsed_timestamp = dict()
20        
21         self.lastStruct = "start"
22         self.struct.add_node("start")
23         self.struct.add_node("declarative")
24         self.struct.add_node("interrogative")
25         self.struct.add_node("exclamatory")
26         self.struct.add_node("imperative")
27        
28         self.update_graph(self.struct, "declarative", "declarative")
29         self.update_graph(self.struct, "declarative", "interrogative")
30         self.update_graph(self.struct, "declarative", "exclamatory")
31         self.update_graph(self.struct, "declarative", "imperative")
32         self.update_graph(self.struct, "interrogative", "declarative")
33         self.update_graph(self.struct, "interrogative", "interrogative")
34         self.update_graph(self.struct, "interrogative", "exclamatory")
35         self.update_graph(self.struct, "interrogative", "imperative")
36         self.update_graph(self.struct, "exclamatory", "declarative")
37         self.update_graph(self.struct, "exclamatory", "interrogative")
38         self.update_graph(self.struct, "exclamatory", "exclamatory")
39         self.update_graph(self.struct, "exclamatory", "imperative")
40         self.update_graph(self.struct, "imperative", "declarative")
41         self.update_graph(self.struct, "imperative", "interrogative")
42         self.update_graph(self.struct, "imperative", "interrogative")
43         self.update_graph(self.struct, "imperative", "imperative")
44        
45         print 'Loading brain...',
46         try:
47             self.g = nx.read_gpickle(r"Brain\brain.pickle")
48             print 'Done!'
49         except IOError:
50             print "file not found. This is normal behavior if the bot has never run before."
51        
52         print '\n'
53        
54     def halfSig(self, x):
55         return 2 * (1/(1+math.e**(-x/10)) - .5)
56    
57     def read_file(self, filename):
58         '''Returns the contents of a new-line-delimited text file as a list'''
59         myfile = file(filename, 'r')
60         contents = yaml.load(myfile.read())
61         myfile.close()
62         return contents
63    
64     def translate_from_html_to_english(self, text, debug=False):
65         '''Returns a version of 'text' where HTML codes have been replaced with human-readable text'''
66         html = self.read_file('filters/translate html to english.yml')
67         for first, second in html:
68             text = re.sub(r'(?i)\b%s\b' % first, r'_%s_' % second, text)
69             if debug: print '\t', text, ": %s <-> %s" % (first, second)
70         return text
71    
72     def translate_from_internet_lingo(self, text):
73         '''Returns a version of 'text' where Intenret lingo has been translated to English'''
74         for internet, english in self.read_file('filters/translate from internet lingo.yml'):
75             text = re.sub(r'(?i)\b%s\b' % internet, r'_%s_' % english, text)
76         return re.sub(r'\b_(.*?)_\b', r'\1', text)
77
78     def fix_spelling_mistakes(self, text):
79         '''Returns a version of 'text' where spelling mistakes have been corrected'''
80         for key, value in self.read_file('filters/fix spelling mistakes.yml'):
81             text = re.sub(r'(?i)\b%s\b' % key, r'_%s_' % value, text)
82         return re.sub(r'\b_(.*?)_\b', r'\1', text)
83
84     def swap_persons(self, text, debug=False):
85         '''Returns a version of 'text' where persons have been swapped. For example: "I" with "you"'''
86         if debug: print "swap_persons():"
87        
88         for first, second in self.read_file('filters/swap persons.yml'):
89             text = re.sub(r'(?i)\b%s\b' % first, r'_%s_' % second, text)
90             text = re.sub(r'(?i)\b%s\b' % second, r'_%s_' % first, text)
91             if debug: print '\t', text, ": %s <-> %s" % (first, second)
92         text = re.sub(r'\b_(.*?)_\b', r'\1', text)
93        
94         # unswap 'not much, you?' or it will become 'not much, me?'
95         return re.sub(r'([\.,] ?)(me|I)( *\?*)$', r'\1you\3', text)
96    
97     def clean_sentence(self, sentence, debug=False):
98         '''Returns a cleaned up version of the string 'sentence' as a list of (word, tag) tuples'''
99         # clean it up first
100         if debug: print "original input:", sentence
101         sentence = self.translate_from_html_to_english(sentence, debug)
102         if debug: print "translate_from_html_to_english():", sentence
103         sentence = self.translate_from_internet_lingo(sentence)
104         if debug: print "translate_from_internet_lingo():", sentence
105         sentence = self.fix_spelling_mistakes(sentence)
106         if debug: print "fix_spelling_mistakes():", sentence
107         # sentence = self.swap_persons(sentence, debug)
108         # if debug: print "\n\tSwapped:", sentence, '\n'
109         sentence = self.ml.tokenize(sentence, 1)
110         if debug: print "ml.tokenize():", sentence
111         return sentence
112    
113     def update_graph(self, graph, node1, node2):
114         '''Increases edge weight between node1 and node2'''
115        
116         if not self.struct.has_edge(node1, node2):
117             print "Old edge count = 0"
118             graph.add_edge(node1, node2, 1)
119         else:
120             count = graph.get_edge(node1, node2)
121             print "Old edge count ="
122             print count
123             count = count + 1
124             graph.add_edge(node1, node2, count)
125         print "New edge count from"
126         print node1
127         print "to"
128         print node2
129         print "is"
130         print graph.get_edge(node1, node2)
131    
132     def get_edge_weight(self, graph, node1, node2):
133         '''Returns the weight of the edge from node1 to node2'''
134        
135         if not graph.has_edge(node1, node2):
136             return 0
137         else:
138             count = graph.get_edge(node1, node2)
139             return 2*((1/(1 + math.e**(-(count/10)))) - .5)
140        
141     def get_weighted_choice(self, graph, node):
142         choices = graph.successors(node)
143        
144         sum = 0
145         for current in choices:
146             sum = sum + self.get_edge_weight(graph, node, current)
147        
148         selector = uniform(0, sum)
149         for current in choices:
150             selector = selector - self.get_edge_weight(graph, node, current)
151             if (selector <= 0): return current
152        
153         return choices[len(choices) - 1]
154        
155        
156        
157        
158    
159     def parse_incoming_message(self, sentence, sender, debug=False):
160        
161         sender = sender.replace(' ', '').lower()
162         if debug: print '-' * 79
163        
164         # update the timestamp
165         prev_time = time.time() - 3400 if not self.current_timestamp.has_key(sender) else self.current_timestamp[sender]
166         self.current_timestamp[sender] = time.time()
167         self.elapsed_timestamp[sender] = self.current_timestamp[sender] - prev_time
168         if debug: print "Seconds after previous message:", self.elapsed_timestamp[sender]
169        
170         # tag the sentence for its parts of speech
171         sentence = self.clean_sentence(sentence, debug)
172         tagged_sentence = self.ml.tag_tokenized(sentence).split(' ')
173         if debug: print "tagged:", tagged_sentence, '\n'
174        
175         # reformat the tagged sentence: "word/part-of-speech" becomes (word, part-of-speech)
176         sentence = []
177         for index, pair in enumerate(tagged_sentence):
178             (word, tag) = pair.split(r'/')
179             word = self.ml.theMontyLemmatiser.lemmatise_word(word).lower()
180             sentence.append((word, tag, self.elapsed_timestamp[sender]))
181        
182         # end every sentence with punctuation
183         if ((sentence[-1][i_tag] != '.') and (sentence[-1][i_word] != '@')):
184             sentence.append(('.', '.', self.elapsed_timestamp[sender]))
185        
186         if debug: print "Adding %s to the graph" % sentence
187        
188         # update the senence structure progression graph and lastStruct
189         if (sentence[-1][i_word] == '.'):
190             self.update_graph(self.struct, self.lastStruct, "declarative")
191             self.lastStruct = "declarative"
192         elif (sentence[-1][i_word] == '?'):
193             self.update_graph(self.struct, self.lastStruct, "interrogative")
194             self.lastStruct = "interrogative"
195         elif (sentence[-1][i_word] == '!'):
196             self.update_graph(self.struct, self.lastStruct, "exclamatory")
197             self.lastStruct = "exclamatory"
198         else:
199             self.update_graph(self.struct, self.lastStruct, "imperative")
200             self.lastStruct = "imperative"
201        
202        
203        
204         # for every word in the sentence
205         for index1, pair1 in enumerate(sentence):
206             # verify pair1 exists
207             if (not self.g.has_node(pair1)):
208                 self.g.add_node(pair1)
209                 print "Adding new node: (%s, %s, %s)" % (pair1[0], pair1[1], pair1[2])
210            
211             # compare it to every other word in the sentence
212             for index2, pair2 in enumerate(sentence):
213                 # don't compare a word to itself
214                 if index1 == index2: continue
215                
216                 # verify pair2 exists
217                 if (not self.g.has_node(pair2)):
218                     self.g.add_node(pair2)
219                
220                 # keep the words in order
221                 (left, right) = (pair1, pair2) if index1 < index2 else (pair2, pair1)
222                 (left_i, right_i) = (index1, index2) if index1 < index2 else (index2, index1)
223                
224                 # update the edge data
225                 if not self.g.has_edge(left, right):
226                     data = dict()
227                     data[abs(left_i - right_i)] = 1
228                     self.g.add_edge(left, right, data)
229                 else:
230                     # get any existing edge information
231                     data = self.g.get_edge(left, right)[0]
232                    
233                     # if it doesn't exist yet, create it
234                     if type(data) != dict:
235                         data = dict()
236                    
237                     # store new distance in dict
238                     data[abs(left_i - right_i)] = 1
239                     self.g.remove_edge(left, right)
240                     self.g.add_edge(left, right, data)
241        
242         # save changes to disk
243         nx.write_gpickle(self.g, r"Brain\brain.pickle")
244        
245         # return the sentence in tokenized form (needed to generate the reply)
246         return sentence
247    
248     def debug_starting_words(self, elapsed_time):
249         print '\nFinding a word to start a new sentence (%s seconds after previous message)' % elapsed_time
250         for choice_index, choice_pair in enumerate(self.g.nodes()):
251             usable = '+' if not self.g.predecessors(choice_pair) and ((1-max(elapsed_time, choice_pair[i_time])/(elapsed_time + choice_pair[i_time]))**2 * 100 * (min(elapsed_time, choice_pair[i_time])/(elapsed_time + choice_pair[i_time])) > 1) else '-'
252             if usable or len(self.g.nodes()) < 10:
253                 print "\t\t\t", '%s "%s"' % (usable, choice_pair[i_word])
254    
255     def debug_word_compatability(self, reply, reply_index, reply_pair, choices, elapsed_time):
256         print '\n\t\t    Verifying compatability with word #%d %s' % (reply_index, reply_pair)
257         for choice_index, choice_pair in enumerate(choices):
258             if self.g.has_edge(reply_pair, choice_pair):
259                 usable = '+' if self.g.get_edge(reply_pair, choice_pair)[0].has_key(abs(reply_index - len(reply))) and ((1-max(elapsed_time, choice_pair[i_time])/(elapsed_time + choice_pair[i_time]))**2 * 100 * (min(elapsed_time, choice_pair[i_time])/(elapsed_time + choice_pair[i_time])) > 1) else '-'
260                 print "\t\t\t", '%s "%s" %s "%s"' % (usable, reply_pair[i_word], self.g.get_edge(reply_pair, choice_pair)[0].keys(), choice_pair[i_word])
261             else:
262                 print "\t\t\t  ", '"%s" has no connection to "%s"' % (reply_pair[i_word], choice_pair[i_word])
263    
264     def debug_sentence_and_successors(self, reply, choices):
265         print "\n\n\tSentence so far:", [group[i_word] for group in reply]
266         print "\t\tUnfiltered:\n\t\t\t", '\n\t\t\t'.join([str(group) for group in choices]), '\n'
267    
268     def debug_problem_detected(self):
269         print '\t!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
270         print "\t!PROBLEM DETECTED. ENDING THIS SENTENCE IMMEDIATELY.!"
271         print '\t!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
272    
273     def create_random_sentence(self, recipient, debug=False):
274         if (debug): print 'Creating a random response'
275         recipient = recipient.replace(' ', '').lower()
276        
277         # get the elapsed time between this message and the last one we received
278         elapsed_time = self.elapsed_timestamp[recipient]
279        
280         # choose the sentence structure
281         self.lastStruct = self.get_weighted_choice(self.struct, self.lastStruct)
282        
283         print "Structure to be used is"
284         print self.lastStruct
285        
286        
287         # start the sentence with a random word that has started a sentence before
288         if debug: self.debug_starting_words(elapsed_time)
289         next = choice([pair for pair in self.g.nodes() if not self.g.predecessors(pair) and
290             ((1 - max(elapsed_time, pair[i_time])/(elapsed_time + pair[i_time]))**2 * \
291             100 * min(elapsed_time, pair[i_time])/(elapsed_time + pair[i_time]) > 1) ])
292         reply = [next]
293        
294         while (True):
295             choices = self.g.successors(reply[-1])
296             if debug: self.debug_sentence_and_successors(reply, choices)
297            
298             # if there's only one possible word left, use it
299             if (len(choices) == 1 and choices[0][i_tag] == '.'):
300                 if debug: print "\tUsing the last available word"
301                 reply.append(choices.pop(0))
302            
303             # if there are no more words to add to the sentence, stop working on the sentence
304             if (not len(choices)): break
305            
306             # for every word in our reply so far, filter out the choices for the next word that aren't good matches
307             for reply_index, reply_pair in enumerate(reply):                   
308                 if debug: self.debug_word_compatability(reply, reply_index, reply_pair, choices, elapsed_time)
309                 choices = [choice_pair for choice_pair in choices if self.g.has_edge(reply_pair, choice_pair) and
310                     self.g.get_edge(reply_pair, choice_pair)[0].has_key(abs(reply_index - len(reply))) and
311                     ((1-max(elapsed_time, choice_pair[i_time])/(elapsed_time + choice_pair[i_time]))**2 * 100 * (min(elapsed_time, choice_pair[i_time])/(elapsed_time + choice_pair[i_time])) > 1) ]
312            
313             if debug: print "\n\t\tFiltered:\n\t\t\t", '\n\t\t\t'.join([str(group) for group in choices]), '\n'
314            
315             # saves the program from crashing, although this condition should never occur in the first place
316             if (not len(choices)):
317                 if (debug): self.debug_problem_detected()
318                 break
319            
320             # use a random word from our list of valid choices
321             reply.append(choice(choices))
322        
323         # return the sentence we created as a list of tuples
324         return reply
325    
326     def generate_reply(self, sentence, username, debug=False):
327         reply = self.create_random_sentence(username, False)
328        
329         reply_plaintext = ''
330         for group in reply:
331             space = '' if group[i_tag] == '.' or group[i_tag] == ',' else ' '
332             reply_plaintext += '%s%s' % (space, self.ml.theMontyNLGenerator.conjugate_verb(group[i_word], group[i_tag]))
333        
334         # remove the period from sentences ending with one (allows for the possibility of questions without question marks)
335         reply_plaintext = re.sub(r'\.\s*$', '', reply_plaintext)
336         return re.sub(r'^\s+', '', reply_plaintext)
337
338 if __name__ == "__main__":
339     try:
340         bot = SABBrain()
341         debug = (len(sys.argv) == 2 and sys.argv[1] == '-debug')
342        
343         while 1:
344             sentence = ''
345             try:
346                 sentence = raw_input('> ')
347             except:
348                 raise
349            
350             sentence = bot.parse_incoming_message(sentence, 'ThePIant', debug)
351            
352            
353             # if you supply any command-line arguments, a graph will display each cycle
354             if (len(sys.argv) > 2):
355                 # positions for all nodes
356                 pos = nx.spring_layout(bot.g)
357
358                 # nodes
359                 nx.draw_networkx_nodes(bot.g, pos, node_size=2000)
360
361                 # edges
362                 nx.draw_networkx_edges(bot.g, pos, edgelist=[(u,v) for (u,v,d) in bot.g.edges(data=True) if len(d.keys()) == 1], width=1, alpha=1.0, edge_color='r')
363                 nx.draw_networkx_edges(bot.g, pos, edgelist=[(u,v) for (u,v,d) in bot.g.edges(data=True) if len(d.keys()) == 2], width=1, alpha=0.6, edge_color='g')
364                 nx.draw_networkx_edges(bot.g, pos, edgelist=[(u,v) for (u,v,d) in bot.g.edges(data=True) if len(d.keys()) == 3], width=1, alpha=0.5, edge_color='b')
365                 nx.draw_networkx_edges(bot.g, pos, edgelist=[(u,v) for (u,v,d) in bot.g.edges(data=True) if len(d.keys()) == 4], width=1, alpha=0.4)
366                 nx.draw_networkx_edges(bot.g, pos, edgelist=[(u,v) for (u,v,d) in bot.g.edges(data=True) if len(d.keys()) == 5], width=1, alpha=0.3)
367                 nx.draw_networkx_edges(bot.g, pos, edgelist=[(u,v) for (u,v,d) in bot.g.edges(data=True) if len(d.keys()) == 6], width=1, alpha=0.2)
368                 nx.draw_networkx_edges(bot.g, pos, edgelist=[(u,v) for (u,v,d) in bot.g.edges(data=True) if len(d.keys()) == 7], width=1, alpha=0.2, style='dashed')
369                 nx.draw_networkx_edges(bot.g, pos, edgelist=[(u,v) for (u,v,d) in bot.g.edges(data=True) if len(d.keys()) >  7], width=1, alpha=0.2, style='dotted')
370
371                 # labels
372                 nx.draw_networkx_labels(bot.g, pos, font_size=8, font_family='sans-serif')
373
374                 # turn off x and y axes labels
375                 plot.xticks([])
376                 plot.yticks([])
377
378                 # display graph
379                 plot.show()
380            
381             time1 = time.time()
382             reply = bot.generate_reply(sentence, 'thepiant', debug)
383             print '%79s' % reply
384             time2 = time.time()
385             print 'Computed in', str(round(time2 - time1, 2)), 'seconds.'
386             print '=' * 79
387            
388     except KeyboardInterrupt:
389         print "\nSigArtBot is shutting down...\n"
390         sys.exit(0)
Note: See TracBrowser for help on using the browser.