agent.cpp
Go to the documentation of this file.
00001 #include <ros/ros.h>
00002 
00003 #include <rl_msgs/RLStateReward.h>
00004 #include <rl_msgs/RLEnvDescription.h>
00005 #include <rl_msgs/RLAction.h>
00006 #include <rl_msgs/RLExperimentInfo.h>
00007 #include <rl_msgs/RLEnvSeedExperience.h>
00008 
00009 #include <ros/callback_queue.h>
00010 
00011 #include <rl_common/core.hh>
00012 #include <rl_common/Random.h>
00013 #include <rl_common/ExperienceFile.hh>
00014 
00015 #include <rl_agent/DiscretizationAgent.hh>
00016 #include <rl_agent/QLearner.hh>
00017 #include <rl_agent/ModelBasedAgent.hh>
00018 #include <rl_agent/SavedPolicy.hh>
00019 #include <rl_agent/Dyna.hh>
00020 #include <rl_agent/Sarsa.hh>
00021 
00022 #include "std_msgs/String.h"
00023 
00024 #include <getopt.h>
00025 #include <stdlib.h>
00026 
00027 #define NODE "RLAgent"
00028 
00029 static ros::Publisher out_rl_action;
00030 static ros::Publisher out_exp_info;
00031 
00032 bool firstAction = true;
00033 int seed = 1;
00034 
00035 Agent* agent = NULL;
00036 bool PRINTS = true;
00037 
00038 rl_msgs::RLExperimentInfo info;
00039 char* agentType;
00040 
00041 // default parameters
00042 float discountfactor = 0.99;
00043 float epsilon = 0.1;
00044 float alpha = 0.3;
00045 float initialvalue = 0.0;
00046 float actrate = 10.0;
00047 float lambda = 0.1;
00048 int M = 5;
00049 int model = C45TREE;
00050 int explore = GREEDY;
00051 int modelcombo = BEST;
00052 int planner = PAR_ETUCT_ACTUAL;
00053 int nmodels = 1;
00054 bool reltrans = true;
00055 int nstates = 0;
00056 int k = 1000;
00057 char *filename = NULL;
00058 int history = 0;
00059 float v = 0;
00060 float n = 0;
00061 // possibly over-written by command line arguments
00062 
00063 
00064 void displayHelp(){
00065   cout << "\n Call agent --agent type [options]\n";
00066   cout << "Agent types: qlearner sarsa modelbased rmax texplore dyna savedpolicy\n";
00067   cout << "\n Options:\n";
00068   cout << "--seed value (integer seed for random number generator)\n";
00069   cout << "--gamma value (discount factor between 0 and 1)\n";
00070   cout << "--epsilon value (epsilon for epsilon-greedy exploration)\n";
00071   cout << "--alpha value (learning rate alpha)\n";
00072   cout << "--initialvalue value (initial q values)\n";
00073   cout << "--actrate value (action selection rate (Hz))\n";
00074   cout << "--lamba value (lamba for eligibility traces)\n";
00075   cout << "--m value (parameter for R-Max)\n";
00076   cout << "--k value (For Dyna: # of model based updates to do between each real world update)\n";
00077   cout << "--history value (# steps of history to use for planning with delay)\n";
00078   cout << "--filename file (file to load saved policy from for savedpolicy agent)\n";
00079   cout << "--model type (tabular,tree,m5tree)\n";
00080   cout << "--planner type (vi,pi,sweeping,uct,parallel-uct,delayed-uct,delayed-parallel-uct)\n";
00081   cout << "--explore type (unknown,greedy,epsilongreedy,variancenovelty)\n";
00082   cout << "--combo type (average,best,separate)\n";
00083   cout << "--nmodels value (# of models)\n";
00084   cout << "--nstates value (optionally discretize domain into value # of states on each feature)\n";
00085   cout << "--reltrans (learn relative transitions)\n";
00086   cout << "--abstrans (learn absolute transitions)\n";
00087   cout << "--v value (For TEXPLORE: b/v coefficient for rewarding state-actions where models disagree)\n";
00088   cout << "--n value (For TEXPLORE: n coefficient for rewarding state-actions which are novel)\n";
00089   cout << "--prints (turn on debug printing of actions/rewards)\n";
00090 
00091   cout << "\n For more info, see: http://www.ros.org/wiki/rl_agent\n";
00092   exit(-1);
00093 
00094 }
00095 
00097 void processState(const rl_msgs::RLStateReward::ConstPtr &stateIn){
00098 
00099   if (agent == NULL){
00100     cout << "no agent yet" << endl;
00101     return;
00102   }
00103 
00104   rl_msgs::RLAction a;
00105 
00106   // first action
00107   if (firstAction){
00108     a.action = agent->first_action(stateIn->state);
00109     info.episode_reward = 0;
00110     info.number_actions = 1;
00111   } else {
00112     info.episode_reward += stateIn->reward;
00113     // if terminal, no action, but calculate reward sum
00114     if (stateIn->terminal){
00115       agent->last_action(stateIn->reward);
00116       cout << "Episode " << info.episode_number << " reward: " << info.episode_reward << endl;
00117       // publish episode reward message
00118       out_exp_info.publish(info);
00119       info.episode_number++;
00120       info.episode_reward = 0;
00121       firstAction = true;
00122       return;
00123     } else {
00124       a.action = agent->next_action(stateIn->reward, stateIn->state);
00125       info.number_actions++;
00126     }
00127   }
00128   firstAction = false;
00129 
00130   // publish agent's action
00131   out_rl_action.publish(a);
00132 }
00133 
00134 
00136 void processSeed(const rl_msgs::RLEnvSeedExperience::ConstPtr &seedIn){
00137 
00138   if (agent == NULL){
00139     cout << "no agent yet" << endl;
00140     return;
00141   }
00142 
00143   std::vector<experience> seeds;
00144   experience seed1;
00145   seed1.s = seedIn->from_state;
00146   seed1.next = seedIn->to_state;
00147   seed1.act = seedIn->action;
00148   seed1.reward = seedIn->reward;
00149   seed1.terminal = seedIn->terminal;
00150   seeds.push_back(seed1);
00151 
00152   agent->seedExp(seeds);
00153 
00154 }
00155 
00157 void processEnvDescription(const rl_msgs::RLEnvDescription::ConstPtr &envIn){
00158 
00159   // initialize the agent based on some info from the environment descriptor
00160   Random rng(seed+1);
00161   agent = NULL;
00162 
00163 
00164   if (strcmp(agentType, "qlearner") == 0){
00165     cout << "Agent: QLearner" << endl;
00166     agent = new QLearner(envIn->num_actions,
00167                          discountfactor, // gamma
00168                          initialvalue, // initial value
00169                          alpha, // alpha
00170                          epsilon, // epsilon
00171                          rng);
00172   }
00173 
00174   else if (strcmp(agentType, "rmax") == 0 || strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0){
00175     cout << "Agent: Model Based" << endl;
00176     agent = new ModelBasedAgent(envIn->num_actions,
00177                                 discountfactor,
00178                                 envIn->max_reward, envIn->reward_range,
00179                                 model, explore, modelcombo,
00180                                 nmodels,
00181                                 planner,
00182                                 0.1, // epsilon
00183                                 lambda,
00184                                 (1.0/actrate), //0.1, //0.1, //0.01, // max time
00185                                 M,
00186                                 envIn->min_state_range, envIn->max_state_range,
00187                                 nstates,
00188                                 history, v, n, false, reltrans, 0.2,
00189                                 envIn->stochastic, envIn->episodic,
00190                                 rng);
00191 
00192   }
00193 
00194   else if (strcmp(agentType, "dyna") == 0){
00195     cout << "Agent: Dyna" << endl;
00196     agent = new Dyna(envIn->num_actions, discountfactor,
00197                      initialvalue, alpha, k, epsilon,
00198                      rng);
00199   }
00200 
00201   else if (strcmp(agentType, "sarsa") == 0){
00202     cout << "Agent: Sarsa" << endl;
00203     agent = new Sarsa(envIn->num_actions, discountfactor,
00204                       initialvalue, alpha, epsilon, lambda,
00205                       rng);
00206   }
00207 
00208   else if (strcmp(agentType, "savedpolicy") == 0){
00209     cout << "Agent: Saved Policy" << endl;
00210     agent = new SavedPolicy(envIn->num_actions, filename);
00211   }
00212 
00213   else {
00214     cout << "Invalid Agent!" << endl;
00215     displayHelp();
00216     exit(-1);
00217   }
00218 
00219   Agent* a2 = agent;
00220   // not for model based when doing continuous model
00221   if (nstates > 0 && (model != M5ALLMULTI || strcmp(agentType, "qlearner") == 0)){
00222     int totalStates = powf(nstates,envIn->min_state_range.size());
00223     if (PRINTS) cout << "Discretize with " << nstates << ", total: " << totalStates << endl;
00224     agent = new DiscretizationAgent(nstates, a2,
00225                                     envIn->min_state_range,
00226                                     envIn->max_state_range, PRINTS);
00227   }
00228 
00229 
00230   firstAction = true;
00231   info.episode_number = 0;
00232   info.episode_reward = 0;
00233 }
00234 
00235 
00237 int main(int argc, char *argv[])
00238 {
00239   ros::init(argc, argv, NODE);
00240   ros::NodeHandle node;
00241 
00242   // agent is required required
00243   if (argc < 3){
00244     cout << "--agent type  option is required" << endl;
00245     displayHelp();
00246     exit(-1);
00247   }
00248 
00249   // parse options to change these parameters
00250   agentType = argv[1];
00251   seed = std::atoi(argv[2]);
00252 
00253   // parse agent type first
00254   bool gotAgent = false;
00255   for (int i = 1; i < argc-1; i++){
00256     if (strcmp(argv[i], "--agent") == 0){
00257       gotAgent = true;
00258       agentType = argv[i+1];
00259     }
00260   }
00261   if (!gotAgent) {
00262     cout << "--agent type  option is required" << endl;
00263     displayHelp();
00264   }
00265 
00266   // set some default options for rmax or texplore
00267   if (strcmp(agentType, "rmax") == 0){
00268     model = RMAX;
00269     explore = EXPLORE_UNKNOWN;
00270     modelcombo = BEST;
00271     planner = VALUE_ITERATION;
00272     nmodels = 1;
00273     reltrans = false;
00274     M = 5;
00275   } else if (strcmp(agentType, "texplore") == 0){
00276     model = C45TREE;
00277     explore = DIFF_AND_NOVEL_BONUS;
00278     v = 0;
00279     n = 0;
00280     modelcombo = AVERAGE;
00281     planner = PAR_ETUCT_ACTUAL;
00282     nmodels = 5;
00283     reltrans = true;
00284     M = 0;
00285   }
00286 
00287   char ch;
00288   const char* optflags = "geairlmoxpcn:";
00289   int option_index = 0;
00290   static struct option long_options[] = {
00291     {"gamma", 1, 0, 'g'},
00292     {"discountfactor", 1, 0, 'g'},
00293     {"epsilon", 1, 0, 'e'},
00294     {"alpha", 1, 0, 'a'},
00295     {"initialvalue", 1, 0, 'i'},
00296     {"actrate", 1, 0, 'r'},
00297     {"lambda", 1, 0, 'l'},
00298     {"m", 1, 0, 'm'},
00299     {"model", 1, 0, 'o'},
00300     {"explore", 1, 0, 'x'},
00301     {"planner", 1, 0, 'p'},
00302     {"combo", 1, 0, 'c'},
00303     {"nmodels", 1, 0, '#'},
00304     {"reltrans", 0, 0, 't'},
00305     {"abstrans", 0, 0, '0'},
00306     {"seed", 1, 0, 's'},
00307     {"agent", 1, 0, 'q'},
00308     {"prints", 0, 0, 'd'},
00309     {"nstates", 1, 0, 'w'},
00310     {"k", 1, 0, 'k'},
00311     {"filename", 1, 0, 'f'},
00312     {"history", 1, 0, 'y'},
00313     {"b", 1, 0, 'b'},
00314     {"v", 1, 0, 'v'},
00315     {"n", 1, 0, 'n'}
00316   };
00317 
00318   bool epsilonChanged = false;
00319   bool actrateChanged = false;
00320   bool mChanged = false;
00321   bool bvnChanged = false;
00322   bool lambdaChanged = false;
00323 
00324   while(-1 != (ch = getopt_long_only(argc, argv, optflags, long_options, &option_index))) {
00325     switch(ch) {
00326 
00327     case 'g':
00328       discountfactor = std::atof(optarg);
00329       cout << "discountfactor: " << discountfactor << endl;
00330       break;
00331 
00332     case 'e':
00333       epsilonChanged = true;
00334       epsilon = std::atof(optarg);
00335       cout << "epsilon: " << epsilon << endl;
00336       break;
00337 
00338     case 'y':
00339       {
00340         if (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0){
00341           history = std::atoi(optarg);
00342           cout << "history: " << history << endl;
00343         } else {
00344           cout << "--history is not a valid option for agent: " << agentType << endl;
00345           exit(-1);
00346         }
00347         break;
00348       }
00349 
00350     case 'k':
00351       {
00352         if (strcmp(agentType, "dyna") == 0){
00353           k = std::atoi(optarg);
00354           cout << "k: " << k << endl;
00355         } else {
00356           cout << "--k is only a valid option for the Dyna agent" << endl;
00357           exit(-1);
00358         }
00359         break;
00360       }
00361 
00362     case 'f':
00363       filename = optarg;
00364       cout << "policy filename: " <<  filename << endl;
00365       break;
00366 
00367     case 'a':
00368       {
00369         if (strcmp(agentType, "qlearner") == 0 || strcmp(agentType, "dyna") == 0 || strcmp(agentType, "sarsa") == 0){
00370           alpha = std::atof(optarg);
00371           cout << "alpha: " << alpha << endl;
00372         } else {
00373           cout << "--alpha option is only valid for Q-Learning, Dyna, and Sarsa" << endl;
00374           exit(-1);
00375         }
00376         break;
00377       }
00378 
00379     case 'i':
00380       {
00381         if (strcmp(agentType, "qlearner") == 0 || strcmp(agentType, "dyna") == 0 || strcmp(agentType, "sarsa") == 0){
00382           initialvalue = std::atof(optarg);
00383           cout << "initialvalue: " << initialvalue << endl;
00384         } else {
00385           cout << "--initialvalue option is only valid for Q-Learning, Dyna, and Sarsa" << endl;
00386           exit(-1);
00387         }
00388         break;
00389       }
00390 
00391     case 'r':
00392       {
00393         actrateChanged = true;
00394         if (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0 || strcmp(agentType, "rmax") == 0){
00395           actrate = std::atof(optarg);
00396           cout << "actrate: " << actrate << endl;
00397         } else {
00398           cout << "Model-free methods do not require an action rate" << endl;
00399           exit(-1);
00400         }
00401         break;
00402       }
00403 
00404     case 'l':
00405       {
00406         lambdaChanged = true;
00407         if (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0 || strcmp(agentType, "rmax") == 0 || strcmp(agentType, "sarsa") == 0){
00408           lambda = std::atof(optarg);
00409           cout << "lambda: " << lambda << endl;
00410         } else {
00411           cout << "--lambda option is invalid for this agent: " << agentType << endl;
00412           exit(-1);
00413         }
00414         break;
00415       }
00416 
00417     case 'm':
00418       {
00419         mChanged = true;
00420         if (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0 || strcmp(agentType, "rmax") == 0){
00421           M = std::atoi(optarg);
00422           cout << "M: " << M << endl;
00423         } else {
00424           cout << "--M option only useful for model-based agents, not " << agentType << endl;
00425           exit(-1);
00426         }
00427         break;
00428       }
00429 
00430     case 'o':
00431       {
00432         if (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0 || strcmp(agentType, "rmax") == 0){
00433           if (strcmp(optarg, "tabular") == 0) model = RMAX;
00434           else if (strcmp(optarg, "tree") == 0) model = C45TREE;
00435           else if (strcmp(optarg, "texplore") == 0) model = C45TREE;
00436           else if (strcmp(optarg, "c45tree") == 0) model = C45TREE;
00437           else if (strcmp(optarg, "m5tree") == 0) model = M5ALLMULTI;
00438           if (strcmp(agentType, "rmax") == 0 && model != RMAX){
00439             cout << "R-Max should use tabular model" << endl;
00440             exit(-1);
00441           }
00442         } else {
00443           cout << "Model-free methods do not need a model, --model option does nothing for this agent type" << endl;
00444           exit(-1);
00445         }
00446         cout << "model: " << modelNames[model] << endl;
00447         break;
00448       }
00449 
00450     case 'x':
00451       {
00452         if (strcmp(optarg, "unknown") == 0) explore = EXPLORE_UNKNOWN;
00453         else if (strcmp(optarg, "greedy") == 0) explore = GREEDY;
00454         else if (strcmp(optarg, "epsilongreedy") == 0) explore = EPSILONGREEDY;
00455         else if (strcmp(optarg, "unvisitedstates") == 0) explore = UNVISITED_BONUS;
00456         else if (strcmp(optarg, "unvisitedactions") == 0) explore = UNVISITED_ACT_BONUS;
00457         else if (strcmp(optarg, "variancenovelty") == 0) explore = DIFF_AND_NOVEL_BONUS;
00458         if (strcmp(agentType, "rmax") == 0 && explore != EXPLORE_UNKNOWN){
00459           cout << "R-Max should use \"--explore unknown\" exploration" << endl;
00460           exit(-1);
00461         }
00462         else if (strcmp(agentType, "texplore") != 0 && strcmp(agentType, "modelbased") != 0 && strcmp(agentType, "rmax") != 0 && (explore != GREEDY && explore != EPSILONGREEDY)) {
00463           cout << "Model free methods must use either greedy or epsilon-greedy exploration!" << endl;
00464           explore = EPSILONGREEDY;
00465           exit(-1);
00466         }
00467         cout << "explore: " << exploreNames[explore] << endl;
00468         break;
00469       }
00470 
00471     case 'p':
00472       {
00473         if (strcmp(optarg, "vi") == 0) planner = VALUE_ITERATION;
00474         else if (strcmp(optarg, "valueiteration") == 0) planner = VALUE_ITERATION;
00475         else if (strcmp(optarg, "policyiteration") == 0) planner = POLICY_ITERATION;
00476         else if (strcmp(optarg, "pi") == 0) planner = POLICY_ITERATION;
00477         else if (strcmp(optarg, "sweeping") == 0) planner = PRI_SWEEPING;
00478         else if (strcmp(optarg, "prioritizedsweeping") == 0) planner = PRI_SWEEPING;
00479         else if (strcmp(optarg, "uct") == 0) planner = ET_UCT_ACTUAL;
00480         else if (strcmp(optarg, "paralleluct") == 0) planner = PAR_ETUCT_ACTUAL;
00481         else if (strcmp(optarg, "realtimeuct") == 0) planner = PAR_ETUCT_ACTUAL;
00482         else if (strcmp(optarg, "realtime-uct") == 0) planner = PAR_ETUCT_ACTUAL;
00483         else if (strcmp(optarg, "parallel-uct") == 0) planner = PAR_ETUCT_ACTUAL;
00484         else if (strcmp(optarg, "delayeduct") == 0) planner = POMDP_ETUCT;
00485         else if (strcmp(optarg, "delayed-uct") == 0) planner = POMDP_ETUCT;
00486         else if (strcmp(optarg, "delayedparalleluct") == 0) planner = POMDP_PAR_ETUCT;
00487         else if (strcmp(optarg, "delayed-parallel-uct") == 0) planner = POMDP_PAR_ETUCT;
00488         if (strcmp(agentType, "texplore") != 0 && strcmp(agentType, "modelbased") != 0 && strcmp(agentType, "rmax") != 0){
00489           cout << "Model-free methods do not require planners, --planner option does nothing with this agent" << endl;
00490           exit(-1);
00491         }
00492         if (strcmp(agentType, "rmax") == 0 && planner != VALUE_ITERATION){
00493           cout << "Typical implementation of R-Max would use value iteration, but another planner type is ok" << endl;
00494         }
00495         cout << "planner: " << plannerNames[planner] << endl;
00496         break;
00497       }
00498 
00499     case 'c':
00500       {
00501         if (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0){
00502           if (strcmp(optarg, "average") == 0) modelcombo = AVERAGE;
00503           else if (strcmp(optarg, "weighted") == 0) modelcombo = WEIGHTAVG;
00504           else if (strcmp(optarg, "best") == 0) modelcombo = BEST;
00505           else if (strcmp(optarg, "separate") == 0) modelcombo = SEPARATE;
00506           cout << "modelcombo: " << comboNames[modelcombo] << endl;
00507         } else {
00508           cout << "--combo is an invalid option for agent: " << agentType << endl;
00509           exit(-1);
00510         }
00511         break;
00512       }
00513 
00514     case '#':
00515       {
00516         if (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0){
00517           nmodels = std::atoi(optarg);
00518           cout << "nmodels: " << nmodels << endl;
00519         } else {
00520           cout << "--nmodels is an invalid option for agent: " << agentType << endl;
00521           exit(-1);
00522         }
00523         if (nmodels < 1){
00524           cout << "nmodels must be > 0" << endl;
00525           exit(-1);
00526         }
00527         break;
00528       }
00529 
00530     case 't':
00531       {
00532         if (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0){
00533           reltrans = true;
00534           cout << "reltrans: " << reltrans << endl;
00535         } else {
00536           cout << "--reltrans is an invalid option for agent: " << agentType << endl;
00537           exit(-1);
00538         }
00539         break;
00540       }
00541 
00542     case '0':
00543       {
00544         if (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0){
00545           reltrans = false;
00546           cout << "reltrans: " << reltrans << endl;
00547         } else {
00548           cout << "--abstrans is an invalid option for agent: " << agentType << endl;
00549           exit(-1);
00550         }
00551         break;
00552       }
00553 
00554     case 's':
00555       seed = std::atoi(optarg);
00556       cout << "seed: " << seed << endl;
00557       break;
00558 
00559     case 'q':
00560       // already processed this one
00561       cout << "agent: " << agentType << endl;
00562       break;
00563 
00564     case 'd':
00565       PRINTS = true;
00566       break;
00567 
00568     case 'w':
00569       nstates = std::atoi(optarg);
00570       cout << "nstates for discretization: " << nstates << endl;
00571       break;
00572 
00573     case 'v':
00574     case 'b':
00575       {
00576         bvnChanged = true;
00577         if (strcmp(agentType, "texplore") == 0){
00578           v = std::atof(optarg);
00579           cout << "v coefficient (variance bonus): " << v << endl;
00580         }
00581         else {
00582           cout << "--v and --b are invalid options for agent: " << agentType << endl;
00583           exit(-1);
00584         }
00585         break;
00586       }
00587 
00588     case 'n':
00589       {
00590         bvnChanged = true;
00591         if (strcmp(agentType, "texplore") == 0){
00592           n = std::atof(optarg);
00593           cout << "n coefficient (novelty bonus): " << n << endl;
00594         }
00595         else {
00596           cout << "--n is an invalid option for agent: " << agentType << endl;
00597           exit(-1);
00598         }
00599         break;
00600       }
00601 
00602     case 'h':
00603     case '?':
00604     case 0:
00605     default:
00606       displayHelp();
00607       break;
00608     }
00609   }
00610 
00611   // default back to greedy if no coefficients
00612   if (explore == DIFF_AND_NOVEL_BONUS && v == 0 && n == 0)
00613     explore = GREEDY;
00614 
00615   // check for conflicting options
00616   // changed epsilon but not doing epsilon greedy exploration
00617   if (epsilonChanged && explore != EPSILONGREEDY){
00618     cout << "No reason to change epsilon when not using epsilon-greedy exploration" << endl;
00619     exit(-1);
00620   }
00621 
00622   // set history value but not doing uct w/history planner
00623   if (history > 0 && (planner == VALUE_ITERATION || planner == POLICY_ITERATION || planner == PRI_SWEEPING)){
00624     cout << "No reason to set history higher than 0 if not using a UCT planner" << endl;
00625     exit(-1);
00626   }
00627 
00628   // set action rate but not doing real-time planner
00629   if (actrateChanged && (planner == VALUE_ITERATION || planner == POLICY_ITERATION || planner == PRI_SWEEPING)){
00630     cout << "No reason to set actrate if not using a UCT planner" << endl;
00631     exit(-1);
00632   }
00633 
00634   // set lambda but not doing uct (lambda)
00635   if (lambdaChanged && (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0 || strcmp(agentType, "rmax") == 0) && (planner == VALUE_ITERATION || planner == POLICY_ITERATION || planner == PRI_SWEEPING)){
00636     cout << "No reason to set actrate if not using a UCT planner" << endl;
00637     exit(-1);
00638   }
00639 
00640   // set n/v/b but not doing that diff_novel exploration
00641   if (bvnChanged && explore != DIFF_AND_NOVEL_BONUS){
00642     cout << "No reason to set n or v if not doing variance & novelty exploration" << endl;
00643     exit(-1);
00644   }
00645 
00646   // set combo other than best but only doing 1 model
00647   if (modelcombo != BEST && nmodels == 1){
00648     cout << "No reason to have model combo other than best with nmodels = 1" << endl;
00649     exit(-1);
00650   }
00651 
00652   // set M but not doing explore unknown
00653   if (mChanged && explore != EXPLORE_UNKNOWN){
00654     cout << "No reason to set M if not doing R-max style Explore Unknown exploration" << endl;
00655     exit(-1);
00656   }
00657 
00658   int qDepth = 1;
00659 
00660   // Set up Publishers
00661   out_rl_action = node.advertise<rl_msgs::RLAction>("rl_agent/rl_action",qDepth, false);
00662   out_exp_info = node.advertise<rl_msgs::RLExperimentInfo>("rl_agent/rl_experiment_info",qDepth, false);
00663 
00664   // Set up subscribers
00665   ros::TransportHints noDelay = ros::TransportHints().tcpNoDelay(true);
00666 
00667   ros::Subscriber rl_description =  node.subscribe("rl_env/rl_env_description", qDepth, processEnvDescription, noDelay);
00668   ros::Subscriber rl_state =  node.subscribe("rl_env/rl_state_reward", qDepth, processState, noDelay);
00669   ros::Subscriber rl_seed =  node.subscribe("rl_env/rl_seed", 20, processSeed, noDelay);
00670 
00671   ROS_INFO(NODE ": starting main loop");
00672 
00673   ros::spin();                          // handle incoming data
00674   //while (ros::ok()){
00675   //  ros::getGlobalCallbackQueue()->callAvailable(ros::WallDuration(0.1));
00676   //}
00677 
00678   if(agent != NULL) {
00679     if(filename != NULL) {
00680       agent->savePolicy(filename);
00681     }
00682   }
00683 
00684   return 0;
00685 }
00686 
00687 
00688 


rl_agent
Author(s): Todd Hester
autogenerated on Thu Jun 6 2019 22:00:13