root/ggpa/QLearner.h

Revision 1, 3.5 kB (checked in by pantley2, 4 years ago)

GGPA code from the good old days of SIGART

Line 
1 /*
2  * QLearner.h
3  * Class that will handle the QLearning algorithm
4  */
5
6
7 #ifndef Q_LEARNER
8 #define Q_LEARNER
9
10 #include <string>
11 #include "GameWorld.h"
12 #include "QFunction.h"
13 #include "QNeuralNetwork.h"
14 #include "State.h"
15 #include "Action.h"
16
17 #define DEFAULT_LEARN_TIME -1.0
18
19 class QLearner {
20
21 private:
22     GameWorld* gameWorld;
23     QFunction* qFunction;
24
25
26 public:
27
28     /*
29      * Initializes the QLearner to learn on the given world.
30      */
31     QLearner(GameWorld* world);
32
33     /*
34      * learn
35      * Parameters: learnTime - the amount of time allowed for learning.
36      *                         by default it will use the time dictated
37      *                         in the GameWorld class.
38      * Return type: none.
39      * Begins training on the given game.
40      */
41     void learn(double learnTime = DEFAULT_LEARN_TIME);
42
43     /*
44      * getAction
45      * Parameters: decisionTime - the amount of time allowed for making the
46      *                            decision.  By default, the time stored
47      *                            in the GameWorld class is used.
48      * Return type: Action*
49      * Returns an action for the current state
50      */
51     Action* getAction(double decisionTime = DEFAULT_LEARN_TIME);
52
53 private:
54
55     /**
56      * performAction
57      * Parameters: action - the action to perform
58      * Return type: none
59      * Performs the action in the game world, handling any extra overhead
60      */
61     void performAction(Action* action);
62
63     /**
64      * getMaxUtility
65      * Parameters: state - the current state
66      *             actions - the set of actions we are interested in
67      * Return type: double
68      * Returns the maximum value for the QFunction over all of the actions.
69      */
70     double getMaxUtility(State* state, vector<Action*> actions);
71
72     /*
73      * updateUtility
74      * Parameters: state - the current state
75      *             action - the current action
76      *             reward - the immediate reward
77      *             nextUtility - the expected utility of the resulting state
78      * No return type
79      * Updates the utility for the given state-action pair to a value
80      * determined by reward and nextUtility.
81      */
82     void updateUtility(State* state, Action* action, int reward,
83                        double nextUtility);
84
85     /*
86      * chooseMove
87      * Parameters: possibleActions - a vector of actions to choose from
88      * Return type: Action*
89      * Returns one of the actions from the vector.  This function is intended
90      * for use with the primary role during the learning process.  Using it
91      * in other conditions may give unexpected results.
92      */
93     Action* chooseMove(vector<Action*> possibleActions) const;
94
95     /*
96      * getRandomMove
97      * Parameters: possibleActions - a vector of actions to choose from
98      * Return type: Action*
99      * Randomly selects one of the actions from the vector.  Random seeding
100      * is not handled in this function.
101      */
102     Action* getRandomMove(vector<Action*> possibleActions) const;
103
104     /*
105      * getRandomMove
106      * Parameters: possibleActions - a vector of actions to choose from
107      *             weights - a vector of double representing the weights
108      *                       for each of the actions.
109      * Return type: Action*
110      * Randomly selects one of the actions from the vector based on the
111      * distribution defined by weights.  This does not need to be normalized.
112      */
113     Action* getRandomMove(vector<Action*> possibleActions,
114                           vector<double> weights) const;
115
116
117 };
118
119 #endif
120
Note: See TracBrowser for help on using the browser.