rl_agent: ValueIteration.hh Source File

Go to the documentation of this file.
00001 
00006 #ifndef _VALUEITERATION_HH_
00007 #define _VALUEITERATION_HH_
00008 
00009 #include <rl_common/Random.h>
00010 #include <rl_common/core.hh>
00011 
00012 #include <set>
00013 #include <vector>
00014 #include <map>
00015 
00017 class ValueIteration: public Planner {
00018 public:
00019 
00031   ValueIteration(int numactions, float gamma,
00032                  int MAX_LOOPS, float MAX_TIME, int modelType,
00033                  const std::vector<float> &featmax, 
00034                  const std::vector<float> &featmin, const std::vector<int> &statesPerDim,
00035                  Random rng = Random());
00036 
00039   ValueIteration(const ValueIteration &);
00040 
00041   virtual ~ValueIteration();
00042 
00043   virtual void setModel(MDPModel* model);
00044   virtual bool updateModelWithExperience(const std::vector<float> &last, 
00045                                          int act, 
00046                                          const std::vector<float> &curr, 
00047                                          float reward, bool term);
00048   virtual void planOnNewModel();
00049   virtual int getBestAction(const std::vector<float> &s);
00050   virtual void savePolicy(const char* filename);
00051 
00053   void initStates();
00054 
00056   void fillInState(std::vector<float>s, int depth);
00057 
00058   bool PLANNERDEBUG;
00059   bool POLICYDEBUG; //= false; //true;
00060   bool MODELDEBUG;
00061   bool ACTDEBUG;
00062 
00064   MDPModel* model;
00065 
00069   typedef const std::vector<float> *state_t;
00070 
00071 
00072 protected:
00073 
00074 
00075   struct state_info;
00076   struct model_info;
00077 
00079   struct state_info {
00080 
00081     int id;
00082 
00083     int stepsAway;
00084     bool fresh;
00085 
00086     // experience data
00087     std::vector<int> visits;
00088 
00089     // data filled in from models
00090     StateActionInfo* modelInfo;
00091 
00092     //std::map<state_t, std::vector<float> > P;
00093     //std::vector<float> R;
00094     //std::vector<bool> known;
00095 
00096     // q values from policy creation
00097     std::vector<float> Q;
00098 
00099   };
00100 
00102   void initStateInfo(state_info* info);
00103   
00107   state_t canonicalize(const std::vector<float> &s);
00108 
00110   void deleteInfo(state_info* info);
00111 
00113   void initNewState(state_t s);
00114 
00116   void createPolicy();
00117 
00119   void printStates();
00120 
00122   void calculateReachableStates();
00123 
00125   void removeUnreachableStates();
00126 
00128   void updateStatesFromModel();
00129 
00131   void updateStateActionFromModel(const std::vector<float> &state, int j);
00132 
00134   double getSeconds();
00135 
00137   std::vector<float> discretizeState(const std::vector<float> &s);
00138 
00139 private:
00140 
00144   std::set<std::vector<float> > statespace;
00145 
00147   std::map<state_t, state_info> statedata;
00148 
00149   std::vector<float> featmax;
00150   std::vector<float> featmin;
00151 
00152   std::vector<float> prevstate;
00153   int prevact;
00154 
00155   double planTime;
00156 
00157   int nstates;
00158   int nactions; 
00159   
00160   int MAX_STEPS;
00161   bool timingType;
00162 
00163   const int numactions;
00164   const float gamma;
00165 
00166   const int MAX_LOOPS;
00167   const float MAX_TIME;
00168   const int modelType;
00169   const std::vector<int> statesPerDim;
00170 
00171 };
00172 
00173 
00174 #endif