root/ggpa/QLearner.h
| Revision 1, 3.5 kB (checked in by pantley2, 4 years ago) |
|---|
| Line | |
|---|---|
| 1 | /* |
| 2 | * QLearner.h |
| 3 | * Class that will handle the QLearning algorithm |
| 4 | */ |
| 5 | |
| 6 | |
| 7 | #ifndef Q_LEARNER |
| 8 | #define Q_LEARNER |
| 9 | |
| 10 | #include <string> |
| 11 | #include "GameWorld.h" |
| 12 | #include "QFunction.h" |
| 13 | #include "QNeuralNetwork.h" |
| 14 | #include "State.h" |
| 15 | #include "Action.h" |
| 16 | |
| 17 | #define DEFAULT_LEARN_TIME -1.0 |
| 18 | |
| 19 | class QLearner { |
| 20 | |
| 21 | private: |
| 22 | GameWorld* gameWorld; |
| 23 | QFunction* qFunction; |
| 24 | |
| 25 | |
| 26 | public: |
| 27 | |
| 28 | /* |
| 29 | * Initializes the QLearner to learn on the given world. |
| 30 | */ |
| 31 | QLearner(GameWorld* world); |
| 32 | |
| 33 | /* |
| 34 | * learn |
| 35 | * Parameters: learnTime - the amount of time allowed for learning. |
| 36 | * by default it will use the time dictated |
| 37 | * in the GameWorld class. |
| 38 | * Return type: none. |
| 39 | * Begins training on the given game. |
| 40 | */ |
| 41 | void learn(double learnTime = DEFAULT_LEARN_TIME); |
| 42 | |
| 43 | /* |
| 44 | * getAction |
| 45 | * Parameters: decisionTime - the amount of time allowed for making the |
| 46 | * decision. By default, the time stored |
| 47 | * in the GameWorld class is used. |
| 48 | * Return type: Action* |
| 49 | * Returns an action for the current state |
| 50 | */ |
| 51 | Action* getAction(double decisionTime = DEFAULT_LEARN_TIME); |
| 52 | |
| 53 | private: |
| 54 | |
| 55 | /** |
| 56 | * performAction |
| 57 | * Parameters: action - the action to perform |
| 58 | * Return type: none |
| 59 | * Performs the action in the game world, handling any extra overhead |
| 60 | */ |
| 61 | void performAction(Action* action); |
| 62 | |
| 63 | /** |
| 64 | * getMaxUtility |
| 65 | * Parameters: state - the current state |
| 66 | * actions - the set of actions we are interested in |
| 67 | * Return type: double |
| 68 | * Returns the maximum value for the QFunction over all of the actions. |
| 69 | */ |
| 70 | double getMaxUtility(State* state, vector<Action*> actions); |
| 71 | |
| 72 | /* |
| 73 | * updateUtility |
| 74 | * Parameters: state - the current state |
| 75 | * action - the current action |
| 76 | * reward - the immediate reward |
| 77 | * nextUtility - the expected utility of the resulting state |
| 78 | * No return type |
| 79 | * Updates the utility for the given state-action pair to a value |
| 80 | * determined by reward and nextUtility. |
| 81 | */ |
| 82 | void updateUtility(State* state, Action* action, int reward, |
| 83 | double nextUtility); |
| 84 | |
| 85 | /* |
| 86 | * chooseMove |
| 87 | * Parameters: possibleActions - a vector of actions to choose from |
| 88 | * Return type: Action* |
| 89 | * Returns one of the actions from the vector. This function is intended |
| 90 | * for use with the primary role during the learning process. Using it |
| 91 | * in other conditions may give unexpected results. |
| 92 | */ |
| 93 | Action* chooseMove(vector<Action*> possibleActions) const; |
| 94 | |
| 95 | /* |
| 96 | * getRandomMove |
| 97 | * Parameters: possibleActions - a vector of actions to choose from |
| 98 | * Return type: Action* |
| 99 | * Randomly selects one of the actions from the vector. Random seeding |
| 100 | * is not handled in this function. |
| 101 | */ |
| 102 | Action* getRandomMove(vector<Action*> possibleActions) const; |
| 103 | |
| 104 | /* |
| 105 | * getRandomMove |
| 106 | * Parameters: possibleActions - a vector of actions to choose from |
| 107 | * weights - a vector of double representing the weights |
| 108 | * for each of the actions. |
| 109 | * Return type: Action* |
| 110 | * Randomly selects one of the actions from the vector based on the |
| 111 | * distribution defined by weights. This does not need to be normalized. |
| 112 | */ |
| 113 | Action* getRandomMove(vector<Action*> possibleActions, |
| 114 | vector<double> weights) const; |
| 115 | |
| 116 | |
| 117 | }; |
| 118 | |
| 119 | #endif |
| 120 |
Note: See TracBrowser for help on using the browser.
