ParallelETUCT.cc
Go to the documentation of this file.
00001 
00011 #include "ParallelETUCT.hh"
00012 #include <algorithm>
00013 
00014 #include <sys/time.h>
00015 
00016 
00017 ParallelETUCT::ParallelETUCT(int numactions, float gamma, float rrange, float lambda,
00018                              int MAX_ITER, float MAX_TIME, int MAX_DEPTH, int modelType,
00019                              const std::vector<float> &fmax, const std::vector<float> &fmin,
00020                              const std::vector<int> &nstatesPerDim, bool trackActual, int historySize, Random r):
00021   numactions(numactions), gamma(gamma), rrange(rrange), lambda(lambda),
00022   MAX_ITER(MAX_ITER), MAX_TIME(MAX_TIME),
00023   MAX_DEPTH(MAX_DEPTH), modelType(modelType), statesPerDim(nstatesPerDim),
00024   trackActual(trackActual), HISTORY_SIZE(historySize),
00025   HISTORY_FL_SIZE(historySize*numactions),
00026   CLEAR_SIZE(25)
00027 {
00028   rng = r;
00029 
00030   nstates = 0;
00031   nsaved = 0;
00032   nactions = 0;
00033   lastUpdate = -1;
00034 
00035   seedMode = false;
00036   timingType = true;
00037 
00038   previnfo = NULL;
00039   model = NULL;
00040   planTime = getSeconds();
00041   initTime = getSeconds();
00042   setTime = getSeconds();
00043 
00044   PLANNERDEBUG = false;
00045   POLICYDEBUG = false; //true; //false; //true; //false;
00046   ACTDEBUG = false; //true;
00047   MODELDEBUG = false;
00048   UCTDEBUG = false;
00049   PTHREADDEBUG = false;
00050   ATHREADDEBUG = false;//true;
00051   MTHREADDEBUG = false; //true;
00052   TIMINGDEBUG = false;
00053   REALSTATEDEBUG = false;
00054   HISTORYDEBUG = false; //true;
00055 
00056   if (statesPerDim[0] > 0){
00057     cout << "Planner Parallel ETUCT using discretization of " << statesPerDim[0] << endl;
00058   }
00059   if (trackActual){
00060     cout << "Parallel ETUCT tracking real state values" << endl;
00061   }
00062   cout << "Planner using history size: " << HISTORY_SIZE << endl;
00063 
00064   featmax = fmax;
00065   featmin = fmin;
00066 
00067   pthread_mutex_init(&update_mutex, NULL);
00068   pthread_mutex_init(&history_mutex, NULL);
00069   pthread_mutex_init(&nactions_mutex, NULL);
00070   pthread_mutex_init(&plan_state_mutex, NULL);
00071   pthread_mutex_init(&statespace_mutex, NULL);
00072   pthread_mutex_init(&model_mutex, NULL);
00073   pthread_mutex_init(&list_mutex, NULL);
00074   pthread_cond_init(&list_cond, NULL);
00075 
00076   // start parallel search thread
00077   actualPlanState = std::vector<float>(featmax.size());
00078   discPlanState = NULL;
00079   modelThreadStarted = false;
00080   planThreadStarted = false;
00081   expList.clear();
00082 
00083   if (HISTORY_SIZE == 0){
00084     saHistory.push_back(0.0);
00085   }
00086   else {
00087     if (HISTORYDEBUG) {
00088       cout << "History size of " << HISTORY_SIZE
00089            << " float size of " << HISTORY_FL_SIZE
00090            << " with state size: " << fmin.size()
00091            << " and numact: " << numactions << endl;
00092     }
00093     for (int i = 0; i < HISTORY_FL_SIZE; i++){
00094       saHistory.push_back(0.0);
00095     }
00096   }
00097 
00098   //  initStates();
00099   expfile.initFile("experiences.bin", featmax.size());
00100 }
00101 
00102 ParallelETUCT::~ParallelETUCT() {
00103   // join threads
00104 
00105   //pthread_kill(planThread);
00106   //pthread_kill(modelThread);
00107 
00108   pthread_detach(planThread);//, NULL);
00109   pthread_detach(modelThread);//, NULL);
00110 
00111   pthread_cancel(planThread);//, NULL);
00112   pthread_cancel(modelThread);//, NULL);
00113 
00114   //pthread_join(planThread, NULL);
00115   //pthread_join(modelThread, NULL);
00116 
00117   //pthread_detach(planThread);//, NULL);
00118   //pthread_detach(modelThread);//, NULL);
00119 
00120 
00121   pthread_mutex_lock(&plan_state_mutex);
00122   pthread_mutex_lock(&statespace_mutex);
00123   pthread_mutex_lock(&model_mutex);
00124   pthread_mutex_lock(&list_mutex);
00125 
00126   // delete exp list
00127   expList.clear();
00128 
00129   for (std::map<state_t, state_info>::iterator i = statedata.begin();
00130        i != statedata.end(); i++){
00131 
00132     // get state's info
00133     //cout << "  planner got info" << endl;
00134     state_info* info = &((*i).second);
00135 
00136     deleteInfo(info);
00137   }
00138 
00139   featmax.clear();
00140   featmin.clear();
00141 
00142   statespace.clear();
00143   statedata.clear();
00144 
00145   pthread_mutex_unlock(&plan_state_mutex);
00146   pthread_mutex_unlock(&statespace_mutex);
00147   pthread_mutex_unlock(&model_mutex);
00148   pthread_mutex_unlock(&list_mutex);
00149 
00150 }
00151 
00152 void ParallelETUCT::setModel(MDPModel* m){
00153 
00154   model = m;
00155 
00156 }
00157 
00158 
00160 // Functional functions :) //
00162 
00163 
00164 
00166 bool ParallelETUCT::updateModelWithExperience(const std::vector<float> &laststate,
00167                                               int lastact,
00168                                               const std::vector<float> &currstate,
00169                                               float reward, bool term){
00170   //  if (PLANNERDEBUG) cout << "updateModelWithExperience(last = " << &laststate
00171   //     << ", curr = " << &currstate
00172   //        << ", lastact = " << lastact
00173   //     << ", r = " << reward
00174   //     << ", term = " << term
00175   //     << ")" << endl;
00176 
00177   //cout << "updateModel" << endl << flush;
00178 
00179   if (!timingType)
00180     planTime = getSeconds();
00181   initTime = getSeconds();
00182 
00183   // canonicalize these things
00184   state_t last = canonicalize(laststate);
00185 
00186   prevstate = last;
00187   prevact = lastact;
00188 
00189   // get state info
00190   pthread_mutex_lock(&statespace_mutex);
00191   previnfo = &(statedata[last]);
00192   pthread_mutex_unlock(&statespace_mutex);
00193 
00194   if (MODELDEBUG){
00195     cout << "Update with exp from state: ";
00196     for (unsigned i = 0; i < last->size(); i++){
00197       cout << (laststate)[i] << ", ";
00198     }
00199     cout << " action: " << lastact;
00200     cout << " to state: ";
00201     for (unsigned i = 0; i < currstate.size(); i++){
00202       cout << (currstate)[i] << ", ";
00203     }
00204     cout << " and reward: " << reward << endl;
00205   }
00206 
00207   // add experiences to list to later be updated into model
00208   if (ATHREADDEBUG)
00209     cout << "*** Action thread wants list lock ***" << endl << flush;
00210   if (TIMINGDEBUG) cout << "Want list mutex, time: " << (getSeconds()-initTime) << endl;
00211   pthread_mutex_lock(&list_mutex);
00212   if (TIMINGDEBUG) cout << "got list mutex, time: " << (getSeconds()-initTime) << endl;
00213   experience e;
00214   e.s = laststate;
00215   e.next = currstate;
00216   e.act = lastact;
00217   e.reward = reward;
00218   e.terminal = term;
00219 
00220   if (HISTORY_SIZE > 0){
00221     if (HISTORYDEBUG) {
00222       cout << "Original state vector (size " << e.s.size() << ": " << e.s[0];
00223       for (unsigned i = 1; i < e.s.size(); i++){
00224         cout << "," << e.s[i];
00225       }
00226       cout << endl;
00227     }
00228     // add history onto e.s
00229     pthread_mutex_lock(&history_mutex);
00230     for (int i = 0; i < HISTORY_FL_SIZE; i++){
00231       e.s.push_back(saHistory[i]);
00232     }
00233     pthread_mutex_unlock(&history_mutex);
00234 
00235     if (HISTORYDEBUG) {
00236       cout << "New state vector (size " << e.s.size() << ": " << e.s[0];
00237       for (unsigned i = 1; i < e.s.size(); i++){
00238         cout << "," << e.s[i];
00239       }
00240       cout << endl;
00241     }
00242 
00243     if (!seedMode){
00244       pthread_mutex_lock(&history_mutex);
00245       // push this state and action onto the history vector
00246       /*
00247         for (unsigned i = 0; i < last->size(); i++){
00248         saHistory.push_back((*last)[i]);
00249         saHistory.pop_front();
00250         }
00251 
00252         saHistory.push_back((*last)[3]);
00253         saHistory.pop_front();
00254       */
00255       for (int i = 0; i < numactions; i++){
00256         if (i == lastact)
00257           saHistory.push_back(1.0);
00258         else
00259           saHistory.push_back(0.0);
00260         saHistory.pop_front();
00261       }
00262 
00263       //      saHistory.push_back(lastact);
00264       //saHistory.pop_front();
00265       if (HISTORYDEBUG) {
00266         cout << "New history vector (size " << saHistory.size() << ": " << saHistory[0];
00267         for (unsigned i = 1; i < saHistory.size(); i++){
00268           cout << "," << saHistory[i];
00269         }
00270         cout << endl;
00271       }
00272       pthread_mutex_unlock(&history_mutex);
00273     }
00274   }
00275 
00276   expList.push_back(e);
00277   //expfile.saveExperience(e);
00278   if (ATHREADDEBUG || MTHREADDEBUG)
00279     cout << "added exp to list, size: " << expList.size() << endl << flush;
00280   if (TIMINGDEBUG) cout << "list updated, time: " << (getSeconds()-initTime) << endl;
00281   pthread_cond_signal(&list_cond);
00282   pthread_mutex_unlock(&list_mutex);
00283 
00284   /*
00285     if (e.reward > -0.5 && e.reward < 0){
00286     expfile.saveExperience(e);
00287     nsaved++;
00288     cout << "Saved Experience " << e.reward << endl;
00289     }
00290   */
00291 
00292   if (timingType)
00293     planTime = getSeconds();
00294 
00295   if (TIMINGDEBUG) cout << "leaving updateModel, time: " << (getSeconds()-initTime) << endl;
00296 
00297 
00298   return false;
00299 
00300 }
00301 
00303 void ParallelETUCT::updateStateActionFromModel(state_t s, int a, state_info* info){
00304 
00305   if (HISTORY_SIZE == 0){
00306     pthread_mutex_lock(&info->statemodel_mutex);
00307 
00308     std::deque<float> history(1,0.0);
00309     StateActionInfo* newModel = NULL;
00310     newModel = &(info->historyModel[a][history]);
00311 
00312     updateStateActionHistoryFromModel(*s, a, newModel);
00313     pthread_mutex_unlock(&info->statemodel_mutex);
00314 
00315   }
00316 
00317   else {
00318     pthread_mutex_lock(&info->statemodel_mutex);
00319 
00320     // clear large ones
00321     if (info->historyModel[a].size() > CLEAR_SIZE){
00322 
00323       //      cout << "clearing model map of size " << info->historyModel[a].size() << endl;
00324 
00325       // instead, clear because these take too much memory to keep around
00326       info->historyModel[a].clear();
00327 
00328     } else {
00329 
00330       // fill in for all histories???
00331       for (std::map< std::deque<float>, StateActionInfo>::iterator it = info->historyModel[a].begin();
00332            it != info->historyModel[a].end(); it++){
00333 
00334         std::deque<float> oneHist = (*it).first;
00335         StateActionInfo* newModel = &((*it).second);
00336 
00337         // add history to vector
00338         std::vector<float> modState = *s;
00339         for (int i = 0; i < HISTORY_FL_SIZE; i++){
00340           modState.push_back(oneHist[i]);
00341         }
00342         updateStateActionHistoryFromModel(modState, a, newModel);
00343       }
00344     }
00345 
00346     pthread_mutex_unlock(&info->statemodel_mutex);
00347   }
00348 
00349 }
00350 
00352 void ParallelETUCT::updateStateActionHistoryFromModel(const std::vector<float> modState, int a, StateActionInfo *newModel){
00353 
00354   // update state info
00355   // get state action info for each action
00356   pthread_mutex_lock(&model_mutex);
00357 
00358   model->getStateActionInfo(modState, a, newModel);
00359 
00360   pthread_mutex_lock(&nactions_mutex);
00361   newModel->frameUpdated = nactions;
00362   pthread_mutex_unlock(&nactions_mutex);
00363 
00364   pthread_mutex_unlock(&model_mutex);
00365 
00366   //canonNextStates(newModel);
00367 
00368 }
00369 
00370 void ParallelETUCT::canonNextStates(StateActionInfo* modelInfo){
00371 
00372 
00373   // loop through all next states
00374   for (std::map<std::vector<float>, float>::iterator outIt
00375          = modelInfo->transitionProbs.begin();
00376        outIt != modelInfo->transitionProbs.end(); outIt++){
00377 
00378     std::vector<float> nextstate = (*outIt).first;
00379     bool badState = false;
00380 
00381     // check that it is valid, otherwise replace with current
00382     for (unsigned j = 0; j < nextstate.size(); j++){
00383       float factor = EPSILON;
00384       if (statesPerDim[j] > 0)
00385         factor = (featmax[j] - featmin[j]) / (float)statesPerDim[j];
00386       if (nextstate[j] < (featmin[j]-factor)
00387           || nextstate[j] > (featmax[j]+factor)){
00388         //cout << "next state out of range " << nextstate[j] << endl;
00389         badState = true;
00390         break;
00391       }
00392     }
00393 
00394     if (!badState){
00395 
00396       canonicalize(nextstate);
00397     }
00398   }
00399 }
00400 
00401 
00402 
00403 
00405 int ParallelETUCT::getBestAction(const std::vector<float> &state){
00406   //  if (PLANNERDEBUG) cout << "getBestAction(s = " << &state << ")" << endl;
00407 
00408   pthread_mutex_lock(&nactions_mutex);
00409   nactions++;
00410   pthread_mutex_unlock(&nactions_mutex);
00411 
00412   if (TIMINGDEBUG) cout << "getBestAction, time: " << (getSeconds()-initTime) << endl;
00413 
00414 
00415   state_t s = canonicalize(state);
00416 
00417   // set plan state so uct will search from here
00418   if (ATHREADDEBUG)
00419     cout << "*** Action thread wants plan state lock ***" << endl << flush;
00420   if (TIMINGDEBUG) cout << "want planStateMut, time: " << (getSeconds()-initTime) << endl;
00421 
00422   pthread_mutex_lock(&(plan_state_mutex));
00423   if (TIMINGDEBUG) cout << "got planStateMut, time: " << (getSeconds()-initTime) << endl;
00424 
00425   actualPlanState = state;
00426   discPlanState = s;
00427   setTime = getSeconds();
00428 
00429   if (ATHREADDEBUG){
00430     cout << "Set planning state as: ";
00431     for (unsigned i = 0; i < state.size(); i++){
00432       cout << state[i] << ", ";
00433     }
00434     cout << endl << flush;
00435   }
00436 
00437   // call uct search on it
00438   pthread_mutex_unlock(&(plan_state_mutex));
00439   if (TIMINGDEBUG) cout << "set planState, time: " << (getSeconds()-initTime) << endl;
00440 
00441   // get state info
00442   pthread_mutex_lock(&statespace_mutex);
00443   state_info* info = &(statedata[s]);
00444   pthread_mutex_unlock(&statespace_mutex);
00445 
00446   // wait a bit for some planning from this state
00447 
00448   // depending on how you run the code, this has to be setup differently
00449   // if someone else calls this method at the appropriate rate, do nothing here
00450 
00451   // or this can be where we wait to ensure we run at some rate:
00452   while (((getSeconds()- initTime) < MAX_TIME)){
00453     if (TIMINGDEBUG)
00454       cout << "waiting for time: " << (getSeconds()-initTime) << endl;
00455 
00456     pthread_yield();
00457   }
00458 
00459   if (TIMINGDEBUG) cout << "time up: " << (getSeconds()-initTime) << endl;
00460 
00461   if (TIMINGDEBUG && (getSeconds()-initTime) > 0.15) cout << "**********" << endl;
00462 
00463   pthread_mutex_lock(&info->stateinfo_mutex);
00464 
00465   // Get Q values
00466   std::vector<float> &Q = info->Q;
00467 
00468 
00469   if (ATHREADDEBUG) {
00470     if (previnfo != NULL)
00471       cout << " ... now " << previnfo->uctVisits << " times." << endl;
00472     cout << "Getting best action from state ";
00473     for (unsigned i = 0; i < s->size(); i++){
00474       cout << (*s)[i] << ", ";
00475     }
00476     cout << " sampled " << info->uctVisits << " times.";// << endl << flush;
00477   }
00478 
00479   // Choose an action
00480   const std::vector<float>::iterator a =
00481     random_max_element(Q.begin(), Q.end()); // Choose maximum
00482   int act = a - Q.begin();
00483 
00484   if (TIMINGDEBUG) cout << "got action: " << (getSeconds()-initTime) << endl;
00485 
00486   pthread_mutex_unlock(&info->stateinfo_mutex);
00487 
00488   // return index of action
00489   return act;
00490 }
00491 
00492 
00493 
00494 
00495 
00496 
00497 void ParallelETUCT::planOnNewModel(){
00498   //return;
00499   //  cout << "planOnNewModel" << endl << flush;
00500   // start model learning thread here
00501   if (!modelThreadStarted){
00502     modelThreadStarted = true;
00503     pthread_create(&modelThread, NULL, parallelModelLearningStart, this);
00504   }
00505 
00506   if (!planThreadStarted){
00507     planThreadStarted = true;
00508     pthread_create(&(planThread), NULL, parallelSearchStart, this);
00509   }
00510 
00511 }
00512 
00513 
00514 void* parallelModelLearningStart(void* arg){
00515   cout << "Start model learning thread" << endl << flush;
00516   ParallelETUCT* pe = reinterpret_cast<ParallelETUCT*>(arg);
00517   while(true){
00518     pe->parallelModelLearning();
00519     /*
00520       if (!pe->planThreadStarted){
00521       pe->planThreadStarted = true;
00522       pthread_create(&(pe->planThread), NULL, parallelSearchStart, pe);
00523       }
00524     */
00525   }
00526   return NULL;
00527 }
00528 
00529 void ParallelETUCT::parallelModelLearning(){
00530   //while(true){
00531 
00532   // wait for experience list to be non-empty
00533   pthread_mutex_lock(&list_mutex);
00534   while (expList.size() == 0){
00535     pthread_cond_wait(&list_cond,&list_mutex);
00536   }
00537   pthread_mutex_unlock(&list_mutex);
00538 
00539   // copy over experience list
00540   std::vector<experience> updateList;
00541   if (MTHREADDEBUG) cout << "  *** Model thread wants list lock ***" << endl << flush;
00542   pthread_mutex_lock(&list_mutex);
00543   updateList = expList;
00544   expList.clear();
00545   if (MTHREADDEBUG) cout << "  *** Model thread done with list lock ***" << endl << flush;
00546   pthread_mutex_unlock(&list_mutex);
00547 
00548   /*
00549   // update model
00550   //cout << "*** Model thread wants tree lock ***" << endl << flush;
00551   pthread_mutex_lock(&model_mutex);
00552   if (MTHREADDEBUG) cout << "  Model thread: going to update model with " << updateList.size() << " new experiences" << endl << flush;
00553   //cout << "****update tree with " << updateList.size() << endl << flush;
00554   bool modelChanged = model->updateWithExperiences(updateList);
00555   if (MTHREADDEBUG) cout << "  Model updated" << endl << flush;
00556   pthread_mutex_unlock(&model_mutex);
00557   */
00558 
00559   modelcopy = model->getCopy();
00560   //if (COPYDEBUG) cout << "*** PO: model copied" << endl;
00561 
00562   // update model copy with new experience
00563   bool modelChanged = modelcopy->updateWithExperiences(updateList);
00564 
00565   // set model pointer to point at copy, delete original model                    cout << "acquire model_mutex for update" << endl;
00566   pthread_mutex_lock(&model_mutex);
00567   //cout << "model_mutex acquired for update" << endl;
00568   //if (COPYDEBUG) cout << "*** PO: delete original model and change pointer" << endl;
00569   delete model;
00570   model = modelcopy;
00571   if (MTHREADDEBUG) cout << "  Model updated" << endl << flush;
00572   //if (COPYDEBUG) cout << "*** PO: pointer set to updated model copy" << endl;
00573   pthread_mutex_unlock(&model_mutex);
00574 
00575 
00576 
00577   // if it changed, reset counts, update state actions
00578   if (modelChanged) resetAndUpdateStateActions();
00579 
00580   pthread_yield();
00581 
00582   //}// while loop
00583 } // method
00584 
00585 
00586 
00587 
00588 void ParallelETUCT::resetAndUpdateStateActions(){
00589   //cout << "*** Model changed, updating state actions ***" << endl << flush;
00590   const int MIN_VISITS = 10;
00591 
00592   pthread_mutex_lock(&nactions_mutex);
00593   int updateTime = nactions;
00594   pthread_mutex_unlock(&nactions_mutex);
00595 
00596   // loop through here
00597 
00598   pthread_mutex_lock(&statespace_mutex);
00599 
00600   for (std::set<std::vector<float> >::iterator i = statespace.begin();
00601        i != statespace.end(); i++){
00602     pthread_mutex_unlock(&statespace_mutex);
00603 
00604     state_t s = canonicalize(*i);
00605 
00606     if (MTHREADDEBUG) cout << "  *** Model thread wants search lock ***" << endl;
00607 
00608     if (MTHREADDEBUG) cout << "  *** Model thread got search lock " << endl;
00609 
00610     pthread_mutex_lock(&statespace_mutex);
00611     state_info* info = &(statedata[s]);
00612     pthread_mutex_unlock(&statespace_mutex);
00613 
00614     pthread_mutex_lock(&info->stateinfo_mutex);
00615 
00616     if (info->uctVisits > (MIN_VISITS * numactions))
00617       info->uctVisits = MIN_VISITS * numactions;
00618 
00619     for (int j = 0; j < numactions; j++){
00620       if (info->uctActions[j] > MIN_VISITS)
00621         info->uctActions[j] = MIN_VISITS;
00622       if (info->needsUpdate || info->historyModel[j].size() > CLEAR_SIZE){
00623         updateStateActionFromModel(s, j, info);
00624       }
00625     }
00626     info->needsUpdate = false;
00627     pthread_mutex_unlock(&info->stateinfo_mutex);
00628 
00629     pthread_yield();
00630 
00631     pthread_mutex_lock(&statespace_mutex);
00632 
00633   }
00634   pthread_mutex_unlock(&statespace_mutex);
00635 
00636   pthread_mutex_lock(&update_mutex);
00637   lastUpdate = updateTime;
00638   pthread_mutex_unlock(&update_mutex);
00639 
00640 }
00641 
00642 
00643 
00644 
00646 // Helper Functions       //
00648 
00649 ParallelETUCT::state_t ParallelETUCT::canonicalize(const std::vector<float> &s) {
00650   if (PLANNERDEBUG) cout << "canonicalize(s = " << s[0] << ", "
00651                          << s[1] << ")" << endl;
00652 
00653   // discretize it
00654   std::vector<float> s2;
00655   if (statesPerDim[0] > 0){
00656     s2 = discretizeState(s);
00657   } else {
00658     s2 = s;
00659   }
00660 
00661   pthread_mutex_lock(&statespace_mutex);
00662 
00663   // get state_t for pointer if its in statespace
00664   const std::pair<std::set<std::vector<float> >::iterator, bool> result =
00665     statespace.insert(s2);
00666   state_t retval = &*result.first; // Dereference iterator then get pointer
00667 
00668 
00669   // if not, init this new state
00670   if (result.second) { // s is new, so initialize Q(s,a) for all a
00671     state_info* info = &(statedata[retval]);
00672     int id = nstates++;
00673     pthread_mutex_unlock(&statespace_mutex);
00674     initStateInfo(retval, info, id);
00675   } else {
00676     pthread_mutex_unlock(&statespace_mutex);
00677   }
00678 
00679   return retval;
00680 }
00681 
00682 
00683 // init state info
00684 void ParallelETUCT::initStateInfo(state_t s, state_info* info, int id){
00685   //if (PLANNERDEBUG) cout << "initStateInfo()";
00686 
00687   // init mutex's for this state info
00688   pthread_mutex_init(&info->statemodel_mutex, NULL);
00689   pthread_mutex_init(&info->stateinfo_mutex, NULL);
00690 
00691   pthread_mutex_lock(&info->stateinfo_mutex);
00692 
00693   // model data (transition, reward, known)
00694 
00695   pthread_mutex_lock(&info->statemodel_mutex);
00696   info->historyModel = new std::map< std::deque<float>, StateActionInfo>[numactions];
00697   pthread_mutex_unlock(&info->statemodel_mutex);
00698 
00699   info->id = id;
00700   if (PLANNERDEBUG) cout << " id = " << info->id << endl;
00701 
00702   // model q values, visit counts
00703   info->Q.resize(numactions, 0);
00704   info->uctActions.resize(numactions, 1);
00705   info->uctVisits = 1;
00706   info->visited = 0; //false;
00707 
00708   for (int i = 0; i < numactions; i++){
00709     info->Q[i] = rng.uniform(0,0.01);
00710   }
00711 
00712   info->needsUpdate = true;
00713 
00714   pthread_mutex_unlock(&info->stateinfo_mutex);
00715 
00716   //if (PLANNERDEBUG) cout << "done with initStateInfo()" << endl;
00717 
00718 }
00719 
00720 
00722 void ParallelETUCT::printStates(){
00723 
00724   pthread_mutex_lock(&statespace_mutex);
00725   for (std::set< std::vector<float> >::iterator i = statespace.begin();
00726        i != statespace.end(); i++){
00727     pthread_mutex_unlock(&statespace_mutex);
00728 
00729     state_t s = canonicalize(*i);
00730 
00731     pthread_mutex_lock(&statespace_mutex);
00732     state_info* info = &(statedata[s]);
00733     pthread_mutex_unlock(&statespace_mutex);
00734 
00735     cout << "State " << info->id << ": ";
00736     for (unsigned j = 0; j < s->size(); j++){
00737       cout << (*s)[j] << ", ";
00738     }
00739     cout << endl;
00740 
00741     pthread_mutex_lock(&info->stateinfo_mutex);
00742     //pthread_mutex_lock(&info->statemodel_mutex);
00743     for (int act = 0; act < numactions; act++){
00744       cout << " Q: " << info->Q[act] << endl;
00745       // << " R: " << info->modelInfo[act].reward << endl;
00746     }
00747     // pthread_mutex_unlock(&info->statemodel_mutex);
00748     pthread_mutex_unlock(&info->stateinfo_mutex);
00749 
00750     pthread_mutex_lock(&statespace_mutex);
00751 
00752   }
00753   pthread_mutex_unlock(&statespace_mutex);
00754 
00755 }
00756 
00757 
00758 void ParallelETUCT::deleteInfo(state_info* info){
00759 
00760   delete [] info->historyModel;
00761 
00762 }
00763 
00764 
00765 
00766 double ParallelETUCT::getSeconds(){
00767   struct timezone tz;
00768   timeval timeT;
00769   gettimeofday(&timeT, &tz);
00770   return  timeT.tv_sec + (timeT.tv_usec / 1000000.0);
00771 }
00772 
00773 
00785 float ParallelETUCT::uctSearch(const std::vector<float> &actS, state_t discS, int depth, std::deque<float> &searchHistory){
00786   if (UCTDEBUG){
00787     cout << " uctSearch state ";
00788     for (unsigned i = 0; i < actS.size(); i++){
00789       cout << actS[i] << ", ";
00790     }
00791     cout << " at depth " << depth << endl;
00792   }
00793 
00794   pthread_mutex_lock(&statespace_mutex);
00795   state_info* info = &(statedata[discS]);
00796   pthread_mutex_unlock(&statespace_mutex);
00797 
00798   // if max depth
00799   // iterative deepening (probability inversely proportional to visits)
00800   //float terminateProb = 1.0/(2.0+(float)info->uctVisits);
00801 
00802   // already visited, stop here
00803   if (depth > MAX_DEPTH){
00804     pthread_mutex_lock(&info->stateinfo_mutex);
00805 
00806     // return max q value here
00807     std::vector<float>::iterator maxAct =
00808       std::max_element(info->Q.begin(),
00809                        info->Q.end());
00810     float maxval = *maxAct;
00811 
00812     if (UCTDEBUG)
00813       cout << "Terminated after depth: " << depth
00814         //   << " prob: " << terminateProb
00815            << " Q: " << maxval
00816            << " visited: " << info->visited << endl;
00817 
00818     pthread_mutex_unlock(&info->stateinfo_mutex);
00819 
00820     return maxval;
00821   }
00822 
00823   // select action
00824   int action = selectUCTAction(info);
00825 
00826   // simulate action to get next state and reward
00827   // depending on exploration, may also terminate us
00828   float reward = 0;
00829   bool term = false;
00830 
00831   pthread_mutex_lock(&info->stateinfo_mutex);
00832 
00833   float learnRate;
00834   //float learnRate = 0.001;
00835   //float learnRate = 1.0 / info->uctActions[action];
00836   //    learnRate = 10.0 / (info->uctActions[action] + 100.0);
00837   learnRate = 10.0 / (info->uctActions[action] + 10.0);
00838   //if (learnRate < 0.001 && MAX_TIME < 0.5)
00839   //learnRate = 0.001;
00840   //learnRate = 0.05;
00841   //learnRate = 1.0;
00842 
00843   // tell model learning thread to update this state since we've visited it
00844   info->needsUpdate = true;
00845 
00846   pthread_mutex_unlock(&info->stateinfo_mutex);
00847 
00848   std::vector<float> actualNext = simulateNextState(actS, discS, info, searchHistory, action, &reward, &term);
00849 
00850   // simulate reward from this action
00851   if (term){
00852     // this one terminated
00853     if (UCTDEBUG) cout << "   Terminated on exploration condition" << endl;
00854     pthread_mutex_lock(&info->stateinfo_mutex);
00855 
00856     info->Q[action] += learnRate * (reward - info->Q[action]);
00857     info->uctVisits++;
00858     info->uctActions[action]++;
00859 
00860     if (UCTDEBUG)
00861       cout << " Depth: " << depth << " Selected action " << action
00862            << " r: " << reward
00863            << " StateVisits: " << info->uctVisits
00864            << " ActionVisits: " << info->uctActions[action] << endl;
00865 
00866     pthread_mutex_unlock(&info->stateinfo_mutex);
00867 
00868     return reward;
00869   }
00870 
00871   // simulate next state from this action
00872   state_t discNext = canonicalize(actualNext);
00873 
00874   if (UCTDEBUG)
00875     cout << " Depth: " << depth << " Selected action " << action
00876          << " r: " << reward  << endl;
00877 
00878   pthread_mutex_lock(&info->stateinfo_mutex);
00879   info->visited++; // = true;
00880   pthread_mutex_unlock(&info->stateinfo_mutex);
00881 
00882   if (HISTORY_SIZE > 0){
00883     // update history vector for this state
00884     /*
00885       for (unsigned i = 0; i < (*discS).size(); i++){
00886       searchHistory.push_back((*discS)[i]);
00887       searchHistory.pop_front();
00888       }
00889 
00890       searchHistory.push_back((*discS)[3]);
00891       searchHistory.pop_front();
00892     */
00893     for (int i = 0; i < numactions; i++){
00894       if (i == action)
00895         searchHistory.push_back(1.0);
00896       else
00897         searchHistory.push_back(0.0);
00898       searchHistory.pop_front();
00899     }
00900 
00901     //    searchHistory.push_back(action);
00902     //searchHistory.pop_front();
00903     if (HISTORYDEBUG) {
00904       cout << "New planning history vector (size " << searchHistory.size() << ": " << searchHistory[0];
00905       for (unsigned i = 1; i < searchHistory.size(); i++){
00906         cout << "," << searchHistory[i];
00907       }
00908       cout << endl;
00909     }
00910   }
00911 
00912   // new q value
00913   float newQ = reward + gamma * uctSearch(actualNext, discNext, depth+1, searchHistory);
00914 
00915   pthread_mutex_lock(&info->stateinfo_mutex);
00916 
00917   if (info->visited == 1){
00918 
00919     // update q and visit counts
00920     info->Q[action] += learnRate * (newQ - info->Q[action]);
00921     info->uctVisits++;
00922     info->uctActions[action]++;
00923 
00924     if (UCTDEBUG)
00925       cout << " Depth: " << depth << " newQ: " << newQ
00926            << " StateVisits: " << info->uctVisits
00927            << " ActionVisits: " << info->uctActions[action] << endl;
00928 
00929     if (lambda < 1.0){
00930 
00931       // new idea, return max of Q or new q
00932       std::vector<float>::iterator maxAct =
00933         std::max_element(info->Q.begin(),
00934                          info->Q.end());
00935       float maxval = *maxAct;
00936 
00937       if (UCTDEBUG)
00938         cout << " Replacing newQ: " << newQ;
00939 
00940       // replace with w avg of maxq and new val
00941       newQ = (lambda * newQ) + ((1.0-lambda) * maxval);
00942 
00943       if (UCTDEBUG)
00944         cout << " with wAvg: " << newQ << endl;
00945     }
00946 
00947   }
00948 
00949   info->visited--;
00950   pthread_mutex_unlock(&info->stateinfo_mutex);
00951 
00952   // return q
00953   return newQ;
00954 
00955 }
00956 
00957 
00958 int ParallelETUCT::selectUCTAction(state_info* info){
00959   //  if (UCTDEBUG) cout << "  selectUCTAction" << endl;
00960 
00961   pthread_mutex_lock(&info->stateinfo_mutex);
00962 
00963   std::vector<float> &Q = info->Q;
00964 
00965   if (info->uctActions.size() < (unsigned)numactions){
00966     cout << "ERROR: uctActions has size " << info->uctActions.size() << endl << flush;
00967     info->uctActions.resize(numactions);
00968   }
00969 
00970   // loop through
00971   float rewardBound = rrange;
00972   if (rewardBound < 1.0)
00973     rewardBound = 1.0;
00974   rewardBound /= (1.0 - gamma);
00975   if (UCTDEBUG) cout << "Reward bound: " << rewardBound << endl;
00976 
00977   std::vector<float> uctQ(numactions, 0.0);
00978 
00979   for (int i = 0; i < numactions; i++){
00980 
00981     // this actions value is Q + rMax * 2 sqrt (log N(s) / N(s,a))
00982     uctQ[i] = Q[i] +
00983       rewardBound * 2.0 * sqrt(log((float)info->uctVisits) /
00984                                (float)info->uctActions[i]);
00985 
00986     if (UCTDEBUG)
00987       cout << "  Action: " << i << " Q: " << Q[i]
00988            << " visits: " << info->uctActions[i]
00989            << " value: " << uctQ[i] << endl;
00990   }
00991 
00992   // max element of uctQ
00993   std::vector<float>::iterator maxAct =
00994     max_element(uctQ.begin(), uctQ.end());
00995   float maxval = *maxAct;
00996   int act = maxAct - uctQ.begin();
00997 
00998   if (UCTDEBUG)
00999     cout << "  Selected " << act << " val: " << maxval << endl;
01000 
01001   pthread_mutex_unlock(&info->stateinfo_mutex);
01002 
01003   return act;
01004 
01005 }
01006 
01008 std::vector<float> ParallelETUCT::simulateNextState(const std::vector<float> &actualState, state_t discState, state_info* info, const std::deque<float> &history, int action, float* reward, bool* term){
01009   //if (UCTDEBUG) cout << "  simulateNextState" << endl;
01010 
01011 
01012   // check if its up to date
01013   pthread_mutex_lock(&info->statemodel_mutex);
01014   StateActionInfo* modelInfo = NULL;
01015   modelInfo = &(info->historyModel[action][history]);
01016 
01017   pthread_mutex_lock(&update_mutex);
01018   bool upToDate = modelInfo->frameUpdated >= lastUpdate;
01019   pthread_mutex_unlock(&update_mutex);
01020 
01021   if (!upToDate){
01022     // must put in appropriate history
01023     if (HISTORY_SIZE > 0){
01024       std::vector<float> modState = *discState;
01025       for (int i = 0; i < HISTORY_FL_SIZE; i++){
01026         modState.push_back(history[i]);
01027       }
01028       updateStateActionHistoryFromModel(modState, action, modelInfo);
01029     } else {
01030       updateStateActionHistoryFromModel(*discState, action, modelInfo);
01031     }
01032   }
01033 
01034   *reward = modelInfo->reward;
01035   *term = (rng.uniform() < modelInfo->termProb);
01036 
01037   if (*term){
01038     pthread_mutex_unlock(&info->statemodel_mutex);
01039     return actualState;
01040   }
01041 
01042   float randProb = rng.uniform();
01043 
01044   float probSum = 0.0;
01045   std::vector<float> nextstate;
01046 
01047   if (REALSTATEDEBUG) cout << "randProb: " << randProb << " numNext: " << modelInfo->transitionProbs.size() << endl;
01048 
01049   //if (modelInfo->transitionProbs.size() == 0)
01050   nextstate = actualState;
01051 
01052   for (std::map<std::vector<float>, float>::iterator outIt
01053          = modelInfo->transitionProbs.begin();
01054        outIt != modelInfo->transitionProbs.end(); outIt++){
01055 
01056     float prob = (*outIt).second;
01057     probSum += prob;
01058     if (REALSTATEDEBUG) cout << randProb << ", " << probSum << ", " << prob << endl;
01059 
01060     if (randProb <= probSum){
01061       nextstate = (*outIt).first;
01062       if (REALSTATEDEBUG) cout << "selected state " << randProb << ", " << probSum << ", " << prob << endl;
01063       break;
01064     }
01065   }
01066 
01067   pthread_mutex_unlock(&info->statemodel_mutex);
01068 
01069   if (trackActual){
01070 
01071     // find the relative change from discrete center
01072     std::vector<float> relChange = subVec(nextstate, *discState);
01073 
01074     // add that on to actual current state value
01075     nextstate = addVec(actualState, relChange);
01076 
01077   }
01078 
01079   // check that next state is valid
01080   for (unsigned j = 0; j < nextstate.size(); j++){
01081     float factor = EPSILON;
01082     if (statesPerDim[j] > 0)
01083       factor = (featmax[j] - featmin[j]) / (float)statesPerDim[j];
01084     if (nextstate[j] < (featmin[j]-factor)
01085         || nextstate[j] > (featmax[j]+factor)){
01086       return actualState;
01087     }
01088   }
01089 
01090   // return new actual state
01091   return nextstate;
01092 
01093 }
01094 
01095 std::vector<float> ParallelETUCT::selectRandomState(){
01096 
01097   pthread_mutex_lock(&statespace_mutex);
01098   if (statespace.size() == 0){
01099     pthread_mutex_unlock(&statespace_mutex);
01100     return std::vector<float>(featmax.size());
01101   }
01102   pthread_mutex_unlock(&statespace_mutex);
01103 
01104   // take a random state from the space of ones we've visited
01105   int index = 0;
01106   std::vector<float> state;
01107 
01108   pthread_mutex_lock(&statespace_mutex);
01109   if (statespace.size() > 1){
01110     index = rng.uniformDiscrete(0, statespace.size()-1);
01111   }
01112   pthread_mutex_unlock(&statespace_mutex);
01113 
01114   int cnt = 0;
01115 
01116   if (PTHREADDEBUG) cout << "*** Planning thread wants search lock (randomstate) ***" << endl << flush;
01117 
01118   pthread_mutex_lock(&statespace_mutex);
01119   for (std::set<std::vector<float> >::iterator i = statespace.begin();
01120        i != statespace.end(); i++, cnt++){
01121     if (cnt == index){
01122       state = *i;
01123       break;
01124     }
01125   }
01126   pthread_mutex_unlock(&statespace_mutex);
01127 
01128   return state;
01129 }
01130 
01131 
01132 void* parallelSearchStart(void* arg){
01133   ParallelETUCT* pe = reinterpret_cast<ParallelETUCT*>(arg);
01134 
01135   cout << "start parallel uct planning search thread" << endl << flush;
01136 
01137   while(true){
01138     pe->parallelSearch();
01139   }
01140 
01141   return NULL;
01142 }
01143 
01144 void ParallelETUCT::parallelSearch(){
01145 
01146   std::vector<float> actS;
01147   state_t discS;
01148   std::deque<float> searchHistory;
01149 
01150   // get new planning state
01151   if (PTHREADDEBUG) {
01152     cout << "*** Planning thread wants planning state lock ***" << endl << flush;
01153   }
01154   pthread_mutex_lock(&(plan_state_mutex));
01155   if (HISTORY_SIZE > 0) pthread_mutex_lock(&history_mutex);
01156 
01157   // take the state we're in 
01158   actS = actualPlanState;
01159   discS = discPlanState;
01160   searchHistory = saHistory;
01161 
01162 
01163   // wait for non-null
01164   if (discS == NULL){
01165     pthread_mutex_unlock(&(plan_state_mutex));
01166     if (HISTORY_SIZE > 0) pthread_mutex_unlock(&history_mutex);
01167     return;
01168   }
01169 
01170   if (PTHREADDEBUG){
01171     pthread_mutex_lock(&statespace_mutex);
01172     cout << "  uct search from state s ("
01173          << statedata[discS].uctVisits <<"): ";
01174     pthread_mutex_unlock(&statespace_mutex);
01175 
01176     for (unsigned i = 0; i < discS->size(); i++){
01177       cout << (*discS)[i] << ", ";
01178     }
01179     cout << endl << flush;
01180   }
01181 
01182   // call uct search on it
01183   pthread_mutex_unlock(&(plan_state_mutex));
01184   if (HISTORY_SIZE > 0) pthread_mutex_unlock(&history_mutex);
01185 
01186   if (PTHREADDEBUG) cout << "*** Planning thread wants search lock ***" << endl;
01187   uctSearch(actS, discS, 0, searchHistory);
01188 
01189   pthread_yield();
01190 
01191 }
01192 
01193 
01194 // canonicalize all the states so we already have them in our statespace
01195 void ParallelETUCT::initStates(){
01196   cout << "init states" << endl;
01197   std::vector<float> s(featmin.size());
01198 
01199   fillInState(s,0);
01200 }
01201 
01202 void ParallelETUCT::fillInState(std::vector<float>s, int depth){
01203 
01204   // if depth == size, canonicalize and return
01205   if (depth == (int)featmin.size()){
01206     canonicalize(s);
01207     return;
01208   }
01209 
01210   // go through all features at depth
01211   for (float i = featmin[depth]; i < featmax[depth]+1; i++){
01212     s[depth] = i;
01213     fillInState(s, depth+1);
01214   }
01215 }
01216 
01217 
01218 
01219 void ParallelETUCT::savePolicy(const char* filename){
01220 
01221   ofstream policyFile(filename, ios::out | ios::binary | ios::trunc);
01222 
01223   // first part, save the vector size
01224   int fsize = featmin.size();
01225   policyFile.write((char*)&fsize, sizeof(int));
01226 
01227   // save numactions
01228   policyFile.write((char*)&numactions, sizeof(int));
01229 
01230   // go through all states, and save Q values
01231   pthread_mutex_lock(&statespace_mutex);
01232 
01233   for (std::set< std::vector<float> >::iterator i = statespace.begin();
01234        i != statespace.end(); i++){
01235     pthread_mutex_unlock(&statespace_mutex);
01236 
01237     state_t s = canonicalize(*i);
01238 
01239     pthread_mutex_lock(&statespace_mutex);
01240     state_info* info = &(statedata[s]);
01241     pthread_mutex_unlock(&statespace_mutex);
01242 
01243     // save state
01244     policyFile.write((char*)&((*i)[0]), sizeof(float)*fsize);
01245 
01246     // save q-values
01247     pthread_mutex_lock(&info->stateinfo_mutex);
01248     policyFile.write((char*)&(info->Q[0]), sizeof(float)*numactions);
01249     pthread_mutex_unlock(&info->stateinfo_mutex);
01250 
01251     pthread_mutex_lock(&statespace_mutex);
01252   }
01253   pthread_mutex_unlock(&statespace_mutex);
01254 
01255   policyFile.close();
01256 }
01257 
01258 
01259 
01260 void ParallelETUCT::loadPolicy(const char* filename){
01261 
01262   ifstream policyFile(filename, ios::in | ios::binary);
01263 
01264   // first part, save the vector size
01265   int fsize;
01266   policyFile.read((char*)&fsize, sizeof(int));
01267   cout << "Numfeats loaded: " << fsize << endl << flush;
01268 
01269   // save numactions
01270   int nact;
01271   policyFile.read((char*)&nact, sizeof(int));
01272   cout << "nact loaded: " << nact << endl << flush;
01273   cout << " numactions: " << numactions << endl << flush;
01274 
01275   if (nact != numactions){
01276     cout << "this policy is not valid loaded nact: " << nact
01277          << " was told: " << numactions << endl << flush;
01278     exit(-1);
01279   }
01280 
01281   // go through all states, loading q values
01282   while(!policyFile.eof()){
01283     std::vector<float> state(fsize, 0.0);
01284 
01285     // load state
01286     policyFile.read((char*)&(state[0]), sizeof(float)*fsize);
01287     //if (LOADDEBUG){
01288     //cout << "load policy for state: ";
01289     // printState(state);
01290     //}
01291 
01292     state_t s = canonicalize(state);
01293 
01294     pthread_mutex_lock(&statespace_mutex);
01295     state_info* info = &(statedata[s]);
01296     pthread_mutex_unlock(&statespace_mutex);
01297 
01298     if (policyFile.eof()) break;
01299 
01300     // load q values
01301     pthread_mutex_lock(&info->stateinfo_mutex);
01302 
01303     policyFile.read((char*)&(info->Q[0]), sizeof(float)*numactions);
01304 
01305     info->uctVisits = numactions * 100;
01306 
01307     for (int j = 0; j < numactions; j++){
01308       info->uctActions[j] = 100;
01309     }
01310 
01311     info->needsUpdate = true;
01312 
01313     pthread_mutex_unlock(&info->stateinfo_mutex);
01314 
01315     //if (LOADDEBUG){
01316     //cout << "Q values: " << endl;
01317     //for (int iAct = 0; iAct < numactions; iAct++){
01318     //  cout << " Action: " << iAct << " val: " << info->Q[iAct] << endl;
01319     //}
01320     //}
01321   }
01322 
01323   policyFile.close();
01324   cout << "Policy loaded!!!" << endl << flush;
01325 }
01326 
01327 void ParallelETUCT::logValues(ofstream *of, int xmin, int xmax, int ymin, int ymax){
01328   std::vector<float> state(2, 0.0);
01329   for (int i = xmin ; i < xmax; i++){
01330     for (int j = ymin; j < ymax; j++){
01331       state[0] = j;
01332       state[1] = i;
01333       state_t s = canonicalize(state);
01334 
01335       pthread_mutex_lock(&statespace_mutex);
01336       state_info* info = &(statedata[s]);
01337       pthread_mutex_unlock(&statespace_mutex);
01338 
01339       pthread_mutex_lock(&info->stateinfo_mutex);
01340 
01341       std::vector<float> &Q_s = info->Q;
01342       const std::vector<float>::iterator max =
01343         random_max_element(Q_s.begin(), Q_s.end());
01344       *of << (*max) << ",";
01345 
01346       pthread_mutex_unlock(&info->stateinfo_mutex);
01347 
01348     }
01349   }
01350 }
01351 
01352 
01353 // should do it such that an already discretized state stays the same
01354 // mainly the numerical value of each bin should be the average of that bin
01355 std::vector<float> ParallelETUCT::discretizeState(const std::vector<float> &s){
01356   std::vector<float> ds(s.size());
01357 
01358   for (unsigned i = 0; i < s.size(); i++){
01359 
01360     // since i'm sometimes doing this for discrete domains
01361     // want to center bins on 0, not edge on 0
01362     //cout << "feat " << i << " range: " << featmax[i] << " " << featmin[i] << " " << (featmax[i]-featmin[i]) << " n: " << (float)statesPerDim;
01363 
01364     float factor = (featmax[i] - featmin[i]) / (float)statesPerDim[i];
01365     int bin = 0;
01366     if (s[i] > 0){
01367       bin = (int)((s[i]+factor/2) / factor);
01368     } else {
01369       bin = (int)((s[i]-factor/2) / factor);
01370     }
01371 
01372     ds[i] = factor*bin;
01373     //cout << " factor: " << factor << " bin: " << bin;
01374     //cout << " Original: " << s[i] << " Discrete: " << ds[i] << endl;
01375   }
01376 
01377   return ds;
01378 }
01379 
01380 
01381 std::vector<float> ParallelETUCT::addVec(const std::vector<float> &a, const std::vector<float> &b){
01382   if (a.size() != b.size())
01383     cout << "ERROR: add vector sizes wrong " << a.size() << ", " << b.size() << endl;
01384 
01385   std::vector<float> c(a.size(), 0.0);
01386   for (unsigned i = 0; i < a.size(); i++){
01387     c[i] = a[i] + b[i];
01388   }
01389 
01390   return c;
01391 }
01392 
01393 std::vector<float> ParallelETUCT::subVec(const std::vector<float> &a, const std::vector<float> &b){
01394   if (a.size() != b.size())
01395     cout << "ERROR: sub vector sizes wrong " << a.size() << ", " << b.size() << endl;
01396 
01397   std::vector<float> c(a.size(), 0.0);
01398   for (unsigned i = 0; i < a.size(); i++){
01399     c[i] = a[i] - b[i];
01400   }
01401 
01402   return c;
01403 }
01404 
01405 void ParallelETUCT::setFirst(){
01406   if (HISTORY_SIZE == 0) return;
01407 
01408   if (HISTORYDEBUG) cout << "first action, set sahistory to 0s" << endl;
01409 
01410   pthread_mutex_lock(&(history_mutex));
01411   // first action, reset history vector
01412   saHistory.resize(saHistory.size(), 0.0);
01413   pthread_mutex_unlock(&(history_mutex));
01414 }
01415 
01416 void ParallelETUCT::setSeeding(bool seeding){
01417 
01418   if (HISTORYDEBUG) cout << "set seed mode to " << seeding << endl;
01419   seedMode = seeding;
01420 
01421 }


rl_agent
Author(s): Todd Hester
autogenerated on Thu Jun 6 2019 22:00:13