LinearSplitsTree.cc
Go to the documentation of this file.
00001 #include "LinearSplitsTree.hh"
00002 
00003 // LinearSplitsTree, from the following sources:
00004 
00005 // Include stuff for newmat matrix libraries
00006 
00007 #define WANT_MATH                    // include.h will get math fns
00008                                      // newmatap.h will get include.h
00009 #include "../newmat/newmatap.h"      // need matrix applications
00010 #ifdef use_namespace
00011 using namespace NEWMAT;              // access NEWMAT namespace
00012 #endif
00013 
00014 // TODO:
00015 //  - save regression from split testing rather than re-building it later
00016 
00017 
00018 LinearSplitsTree::LinearSplitsTree(int id, int trainMode, int trainFreq, int m,
00019                                    float featPct, bool simple, float min_er,
00020                                    Random rng):
00021   id(id), mode(trainMode), 
00022   freq(trainFreq), M(m),
00023   featPct(featPct), SIMPLE(simple), 
00024   MIN_ER(min_er), rng(rng)
00025 {
00026 
00027   nnodes = 0;
00028   nOutput = 0;
00029   nExperiences = 0;
00030   hadError = false;
00031   maxnodes = N_LS_NODES;
00032   totalnodes = 0;
00033 
00034   // how close a split has to be to be randomly selected
00035   SPLIT_MARGIN = 0.0; //0.02; //5; //01; //0.05; //0.2; //0.05;
00036 
00037   LMDEBUG = false;// true;
00038   DTDEBUG = false;//true;
00039   SPLITDEBUG = false; //true;
00040   STOCH_DEBUG = false; //true; //false; //true;
00041   INCDEBUG = false; //true; //false; //true;
00042   NODEDEBUG = false;
00043   COPYDEBUG = false; //true;
00044 
00045   cout << "Created linear splits decision tree " << id;
00046   if (SIMPLE) cout << " simple regression";
00047   else cout << " multivariate regrssion";
00048   if (DTDEBUG){
00049     cout << " mode: " << mode << " freq: " << freq << endl;
00050   }
00051   cout << " MIN_ER: " << MIN_ER << endl;
00052 
00053 
00054   initNodes();
00055   initTree();
00056 
00057 }
00058 
00059 LinearSplitsTree::LinearSplitsTree(const LinearSplitsTree& ls):
00060   id(ls.id), mode(ls.mode), 
00061   freq(ls.freq), M(ls.M),
00062   featPct(ls.featPct), SIMPLE(ls.SIMPLE), 
00063   MIN_ER(ls.MIN_ER), rng(ls.rng)
00064 {
00065   COPYDEBUG = ls.COPYDEBUG;
00066   if (COPYDEBUG) cout << "LS copy " << id << endl;
00067   nnodes = 0;
00068   nOutput = ls.nOutput;
00069   nExperiences = ls.nExperiences;
00070   hadError = ls.hadError;
00071   totalnodes = 0;
00072   maxnodes = ls.maxnodes;
00073   SPLIT_MARGIN = ls.SPLIT_MARGIN; 
00074   LMDEBUG = ls.LMDEBUG;
00075   DTDEBUG = ls.DTDEBUG;
00076   SPLITDEBUG = ls.SPLITDEBUG;
00077   STOCH_DEBUG = ls.STOCH_DEBUG; 
00078   INCDEBUG = ls.INCDEBUG; 
00079   NODEDEBUG = ls.NODEDEBUG;
00080 
00081   if (COPYDEBUG) cout << "   LS copy nodes, experiences, root, etc" << endl;
00082   // copy all experiences
00083   for (int i = 0; i < N_LST_EXP; i++){
00084     allExp[i] = ls.allExp[i];
00085   }
00086   if (COPYDEBUG) cout << "   LS copied exp array" << endl;
00087 
00088   // set experience pointers
00089   experiences.resize(ls.experiences.size());
00090   for (unsigned i = 0; i < ls.experiences.size(); i++){
00091     experiences[i] = &(allExp[i]);
00092   }
00093   if (COPYDEBUG) cout << "   LS set experience pointers" << endl;
00094 
00095   // now the tricky part, set the pointers inside the tree nodes correctly
00096   initNodes();
00097 
00098   if (COPYDEBUG) cout << "   LS copy tree " << endl;
00099   root = allocateNode();
00100   lastNode = root;
00101   copyTree(root, ls.root);
00102   if (COPYDEBUG) cout << "   LS  tree copy done" << endl;
00103    
00104   if (COPYDEBUG) {
00105     cout << endl << "New tree: " << endl;
00106     printTree(root, 0);
00107     cout << endl;
00108     cout << "  LS copy done" << endl;
00109   }
00110 
00111 }
00112 
00113 void LinearSplitsTree::copyTree(tree_node* newNode, tree_node* origNode){
00114 
00115   int nodeId = newNode->id;
00116 
00117   if (COPYDEBUG) {
00118     cout << "    Copy node " << newNode->id << " from node " << origNode->id << endl;
00119     cout << "    NewAddy " << newNode << ", old: " << origNode << endl;
00120   }
00121 
00122   // copy node from t
00123   *newNode = *origNode;
00124   newNode->id = nodeId;
00125 
00126   // if it has children, allocate and copy them too
00127   if (origNode->l != NULL && !newNode->leaf){
00128     newNode->l = allocateNode();
00129     if (COPYDEBUG) cout << "     Copy left node " << newNode->l->id << " from " << origNode->l->id << endl;
00130     copyTree(newNode->l, origNode->l);
00131   } else {
00132     newNode->l = NULL;
00133   }
00134 
00135   if (origNode->r != NULL && !newNode->leaf){
00136     newNode->r = allocateNode();
00137     if (COPYDEBUG) cout << "     Copy right node " << newNode->r->id << " from " << origNode->r->id << endl;
00138     copyTree(newNode->r, origNode->r);
00139   } else {
00140     newNode->r = NULL;
00141   }
00142 }
00143 
00144 LinearSplitsTree* LinearSplitsTree::getCopy(){
00145   LinearSplitsTree* copy = new LinearSplitsTree(*this);
00146   return copy;
00147 }
00148 
00149 LinearSplitsTree::~LinearSplitsTree() {
00150   deleteTree(root);
00151   for (unsigned i = N_LST_EXP; i < experiences.size(); i++){
00152     delete experiences[i];
00153   }
00154   experiences.clear();
00155 }
00156 
00157 // here the target output will be a single value
00158 bool LinearSplitsTree::trainInstance(classPair &instance){
00159 
00160   if (DTDEBUG) cout << id << " trainInstance" << endl;
00161 
00162   bool modelChanged = false;
00163 
00164   // simply add this instance to the set of experiences
00165 
00166   // take from static array until we run out
00167   tree_experience *e;
00168   if (nExperiences < N_LST_EXP){
00169     // from statically created set of experiences
00170     e = &(allExp[nExperiences]);
00171 
00172   } else {
00173     // dynamically create experience
00174     e = new tree_experience;
00175   }
00176 
00177 
00178   e->input = instance.in;
00179   e->output = instance.out;
00180   experiences.push_back(e);
00181   nExperiences++;
00182 
00183   if (nExperiences == 1000000){
00184     cout << "Reached limit of # experiences allowed." << endl;
00185     return false;
00186   }
00187 
00188   if (nExperiences != (int)experiences.size())
00189     cout << "ERROR: experience size mismatch: " << nExperiences << ", " << experiences.size() << endl;
00190 
00191   //cout << nExperiences << endl << flush;
00192   //if (nExperiences == 503 && id == 10){
00193   //  DTDEBUG = true;
00194   //  SPLITDEBUG = true;
00195   //  INCDEBUG = true;
00196   //}
00197 
00198   if ( DTDEBUG) {
00199     cout << "Original input: ";
00200     for (unsigned i = 0; i < instance.in.size(); i++){
00201       cout << instance.in[i] << ", ";
00202     }
00203     cout << endl << " Original output: " << instance.out << endl;
00204     cout << "Added exp id: " << nExperiences << " output: " << e->output << endl;
00205     cout << "Address: " << e << " Input : ";
00206     for (unsigned i = 0; i < e->input.size(); i++){
00207       cout << e->input[i] << ", ";
00208     }
00209     cout << endl << " Now have " << nExperiences << " experiences." << endl;
00210   }
00211 
00212   // mode 0: re-build every step
00213   if (mode == BUILD_EVERY || nExperiences <= 1){
00214     rebuildTree();
00215     modelChanged = true;
00216   }
00217 
00218   // mode 1: re-build on error only
00219   else if (mode == BUILD_ON_ERROR){
00220 
00221     // build on misclassification
00222     // check for misclassify
00223     std::map<float, float> answer;
00224     testInstance(e->input, &answer);
00225     float val = answer.begin()->first;
00226     float error = fabs(val - e->output);
00227 
00228     if (error > 0.0){
00229       rebuildTree();
00230       modelChanged = true;
00231     } 
00232   }
00233 
00234   // mode 2: re-build every FREQ steps
00235   else if (mode == BUILD_EVERY_N){
00236     // build every freq steps
00237     if (!modelChanged && (nExperiences % freq) == 0){
00238       rebuildTree();
00239       modelChanged = true;
00240     }
00241   }
00242 
00243   if (modelChanged){
00244     if (DTDEBUG || SPLITDEBUG) cout << "DT " << id << " tree re-built." << endl;
00245 
00246     if (DTDEBUG || SPLITDEBUG){
00247       cout << endl << "DT: " << id << endl;
00248       printTree(root, 0);
00249       cout << "Done printing tree" << endl;
00250     }
00251   }
00252 
00253   /*
00254   if (nExperiences % 100 == 0){
00255     cout << endl << "DT: " << id << endl;
00256     printTree(root, 0);
00257     cout << "Done printing tree" << endl;
00258     
00259     // look at error
00260     float errorSum = 0.0;
00261     for (int i = 0; i < nExperiences; i++){
00262       e = experiences[i];
00263       std::map<float, float> answer;
00264       testInstance(e->input, &answer);
00265       float val = answer.begin()->first;
00266       float error = fabs(val - e->output);
00267       errorSum += error;
00268     }
00269     float avgError = errorSum / (float)nExperiences;
00270     cout << "avgError: " << avgError << endl << endl;
00271   }
00272   */
00273 
00274   return modelChanged;
00275 
00276 }
00277 
00278 
00279 // here the target output will be a single value
00280 bool LinearSplitsTree::trainInstances(std::vector<classPair> &instances){
00281   if (DTDEBUG) cout << "DT trainInstances: " << instances.size() << endl;
00282 
00283   bool modelChanged = false;
00284 
00285   bool doBuild = false;
00286 
00287   // loop through instances, possibly checking for errors
00288   for (unsigned a = 0; a < instances.size(); a++){
00289     classPair instance = instances[a];
00290 
00291     // simply add this instance to the set of experiences
00292 
00293     // take from static array until we run out
00294     tree_experience *e;
00295     if (nExperiences < N_LST_EXP){
00296       // from statically created set of experiences
00297       e = &(allExp[nExperiences]);
00298 
00299     } else {
00300       // dynamically create experience
00301       e = new tree_experience;
00302     }
00303 
00304 
00305     e->input = instance.in;
00306     e->output = instance.out;
00307     experiences.push_back(e);
00308     nExperiences++;
00309 
00310     if (nExperiences == 1000000){
00311       cout << "Reached limit of # experiences allowed." << endl;
00312       return false;
00313     }
00314 
00315     if (nExperiences != (int)experiences.size())
00316       cout << "ERROR: experience size mismatch: " << nExperiences << ", " << experiences.size() << endl;
00317 
00318     if (DTDEBUG) {
00319       cout << "Original input: ";
00320       for (unsigned i = 0; i < instance.in.size(); i++){
00321         cout << instance.in[i] << ", ";
00322       }
00323       cout << endl << " Original output: " << instance.out << endl;
00324       cout << "Added exp id: " << nExperiences << " output: " << e->output << endl;
00325       cout << "Address: " << e << " Input : ";
00326       for (unsigned i = 0; i < e->input.size(); i++){
00327         cout << e->input[i] << ", ";
00328       }
00329       cout << endl << " Now have " << nExperiences << " experiences." << endl;
00330     }
00331 
00332     /*
00333     if (nExperiences % 100 == 0){
00334       cout << endl << "DT: " << id << endl;
00335       printTree(root, 0);
00336       cout << "Done printing tree" << endl;
00337       
00338       // look at error
00339       float errorSum = 0.0;
00340       for (int i = 0; i < nExperiences; i++){
00341         e = experiences[i];
00342         std::map<float, float> answer;
00343         testInstance(e->input, &answer);
00344         float val = answer.begin()->first;
00345         float error = fabs(val - e->output);
00346         errorSum += error;
00347       }
00348       float avgError = errorSum / (float)nExperiences;
00349       cout << "avgError: " << avgError << endl << endl;
00350     }
00351     */
00352 
00353     // depending on mode/etc, maybe re-build tree
00354 
00355     // don't need to check if we've already decided
00356     if (doBuild) continue;
00357 
00358     // mode 0: re-build every step
00359     if (mode == BUILD_EVERY || nExperiences <= 1){
00360       doBuild = true;
00361     }
00362 
00363     // mode 1: re-build on error only
00364     else if (mode == BUILD_ON_ERROR){
00365 
00366       // build on misclassification
00367       // check for misclassify
00368       std::map<float, float> answer;
00369       testInstance(e->input, &answer);
00370       float val = answer.begin()->first;
00371       float error = fabs(val - e->output);
00372 
00373       if (error > 0.0){
00374         doBuild = true;
00375       }
00376 
00377     }
00378 
00379     // mode 2: re-build every FREQ steps
00380     else if (mode == BUILD_EVERY_N){
00381       // build every freq steps
00382       if (!modelChanged && (nExperiences % freq) == 0){
00383         doBuild = true;
00384       }
00385     }
00386 
00387   } // loop of new instances
00388 
00389   if (DTDEBUG) cout << "Added " << instances.size() << " new instances. doBuild = " << doBuild << endl;
00390 
00391   if (doBuild){
00392     rebuildTree();
00393     modelChanged = true;
00394   }
00395 
00396   if (modelChanged){
00397     if (DTDEBUG || SPLITDEBUG) cout << "DT " << id << " tree re-built." << endl;
00398 
00399     if (DTDEBUG || SPLITDEBUG){
00400       cout << endl << "DT: " << id << endl;
00401       printTree(root, 0);
00402       cout << "Done printing tree" << endl;
00403     }
00404   }
00405 
00406   return modelChanged;
00407 
00408 }
00409 
00410 
00411 void LinearSplitsTree::rebuildTree(){
00412   //cout << "rebuild tree " << id << " on exp: " << nExperiences << endl;
00413   //deleteTree(root);
00414 
00415   // re-calculate avg error for root
00416   root->avgError = calcAvgErrorforSet(experiences);
00417 
00418   buildTree(root, experiences, false);
00419   //cout << "tree " << id << " rebuilt. " << endl;
00420 }
00421 
00422 
00423 // TODO: here we want to return the probability of the output value being each of the possible values, in the stochastic case
00424 void LinearSplitsTree::testInstance(const std::vector<float> &input, std::map<float, float>* retval){
00425   if (DTDEBUG) cout << "testInstance on tree " << id << endl;
00426 
00427   retval->clear();
00428 
00429   // in case the tree is empty
00430   if (experiences.size() == 0){
00431     (*retval)[0.0] = 1.0;
00432     return;
00433   }
00434 
00435   // follow through tree to leaf
00436   tree_node* leaf = traverseTree(root, input);
00437   lastNode = leaf;
00438 
00439   // and return mapping of outputs and their probabilities
00440   leafPrediction(leaf, input, retval);
00441 
00442 }
00443 
00444 float LinearSplitsTree::getConf(const std::vector<float> &input){
00445   if (DTDEBUG) cout << "numVisits" << endl;
00446 
00447   // in case the tree is empty
00448   if (experiences.size() == 0){
00449     return 0;
00450   }
00451 
00452   if (lastNode == NULL)
00453     return 0;
00454 
00455   // follow through tree to leaf
00456   //tree_node* leaf = traverseTree(root, input);
00457 
00458   // and return # in this leaf
00459   float conf = (float)lastNode->nInstances / (float)(2.0*M);
00460   if (conf > 1.0)
00461     return 1.0;
00462   else
00463     return conf;
00464 
00465 }
00466 
00467 // check to see if this state is one we should explore
00468 // to get more info on potential splits
00469 
00470 
00471 
00472 // init the tree
00473 void LinearSplitsTree::initTree(){
00474   if (DTDEBUG) cout << "initTree()" << endl;
00475   root = allocateNode();
00476 
00477   if (DTDEBUG) cout << "   root id = " << root->id << endl;
00478 
00479   // just to ensure the diff models are on different random values
00480   for (int i = 0; i < id; i++){
00481     rng.uniform(0, 1);
00482   }
00483 
00484 }
00485 
00486 
00487 
00488 // init a tree node
00489 void LinearSplitsTree::initTreeNode(tree_node* node){
00490   if (DTDEBUG) cout << "initTreeNode()";
00491 
00492   node->id = nnodes++;
00493   if (DTDEBUG) cout << " id = " << node->id << endl;
00494 
00495   totalnodes++;
00496   if (totalnodes > maxnodes){
00497     maxnodes = totalnodes;
00498     if (DTDEBUG) cout << id << " LS MAX nodes: " << maxnodes << endl;
00499   }
00500 
00501   // split criterion
00502   node->dim = -1;
00503   node->val = -1;
00504 
00505   // current data
00506   node->nInstances = 0;
00507 
00508   // coefficients
00509   node->constant = 0.0;
00510 
00511   // coefficients will get resized later
00512   //  node->coefficients.resize(2,0);
00513 
00514   // next nodes in tree
00515   node->l = NULL;
00516   node->r = NULL;
00517 
00518   node->leaf = true;
00519   node->avgError = 10000;
00520 
00521 }
00522 
00524 void LinearSplitsTree::deleteTree(tree_node* node){
00525   if (DTDEBUG) cout << "deleteTree, node=" << node->id << endl;
00526 
00527   if (node==NULL)
00528     return;
00529 
00530   totalnodes--;
00531 
00532   node->nInstances = 0;
00533   node->coefficients.clear();
00534  
00535   //recursively call deleteTree on children
00536   // then delete them
00537   if (!node->leaf){
00538     // left child
00539     if (node->l != NULL){
00540       deleteTree(node->l);
00541       deallocateNode(node->l);
00542       node->l = NULL;
00543     }
00544 
00545     // right child
00546     if (node->r != NULL){
00547       deleteTree(node->r);
00548       deallocateNode(node->r);
00549       node->r = NULL;
00550     }
00551   }
00552 
00553   node->dim = -1;
00554   node->leaf = true;
00555   node->avgError = 10000;
00556   node->val = -1;
00557   node->constant = 0;
00558 }
00559 
00560 
00562 LinearSplitsTree::tree_node* LinearSplitsTree::getCorrectChild(tree_node* node,
00563                                                                const std::vector<float> &input){
00564 
00565   if (DTDEBUG) cout << "getCorrectChild, node=" << node->id << endl;
00566 
00567   if (passTest(node->dim, node->val, input))
00568     return node->l;
00569   else
00570     return node->r;
00571 
00572 }
00573 
00575 LinearSplitsTree::tree_node* LinearSplitsTree::traverseTree(tree_node* node,
00576                                                             const std::vector<float> &input){
00577 
00578   if (DTDEBUG) cout << "traverseTree, node=" << node->id << endl;
00579 
00580   while (!node->leaf){
00581     node = getCorrectChild(node, input);
00582   }
00583 
00584   return node;
00585 }
00586 
00587 
00589 bool LinearSplitsTree::passTest(int dim, float val, const std::vector<float> &input){
00590   if (DTDEBUG) cout << "passTest, dim=" << dim << ",val=" << val 
00591                     << ",input["<<dim<<"]=" << input[dim] <<endl;
00592 
00593   
00594   if (input[dim] > val)
00595     return false;
00596   else
00597     return true;
00598 
00599 }
00600 
00601 
00603 void LinearSplitsTree::buildTree(tree_node *node,
00604                        const std::vector<tree_experience*> &instances,
00605                        bool changed){
00606   if(DTDEBUG) cout << "buildTree, node=" << node->id
00607                    << ",nInstances:" << instances.size()
00608                    << ",chg:" << changed << endl;
00609 
00610   if (instances.size() == 0){
00611     cout << "Error: buildTree called on tree " << id << " node " << node->id << " with no instances." << endl;
00612     exit(-1);
00613   }
00614 
00615 
00616   // TODO: what about stochastic data?
00617   //std::vector<float> chiSquare = calcChiSquare(instances);
00618 
00619   // first, add instances to tree
00620   node->nInstances = instances.size();
00621 
00622   // add each output to this node
00623   bool allSame = true;
00624   float val0 = instances[0]->output;
00625   for (unsigned i = 1; i < instances.size(); i++){
00626     if (instances[i]->output != val0){
00627       allSame = false;
00628       break;
00629     }
00630   }
00631 
00632   // see if they're all the same
00633   if (allSame){
00634     makeLeaf(node, instances);
00635     if (DTDEBUG){
00636       cout << "Tree " << id << " node " << node->id 
00637            << " All " << node->nInstances
00638            << " classified with output "
00639            << instances[0]->output << ", " << node->constant << endl;
00640     }
00641     return;
00642   }
00643 
00644   // check if linear model has no error
00645   if (node != root && node->avgError < MIN_ER){
00646     makeLeaf(node, instances);
00647     if (node->avgError < MIN_ER){
00648       if (DTDEBUG) {
00649       cout << "Tree " << id << " node " << node->id
00650            << " has low error " << node->avgError << " keeping as leaf" << endl;
00651       }
00652       return;
00653     }
00654   }
00655   
00656   // if not, calculate ER to determine best split
00657   if (SPLITDEBUG) cout << endl << "Creating new decision node " << id << "-" << node->id << endl;
00658 
00659   node->leaf = false;
00660   //node->nInstances++;
00661 
00662   float bestER = -1.0;
00663   int bestDim = -1;
00664   float bestVal = -1;
00665   std::vector<tree_experience*> bestLeft;
00666   std::vector<tree_experience*> bestRight;
00667   float leftError = 10000;
00668   float rightError = 10000;
00669 
00670   testPossibleSplits(node->avgError, instances, &bestER, &bestDim, &bestVal, &bestLeft, &bestRight, &leftError, &rightError);
00671 
00672   implementSplit(node, instances, bestER, bestDim, bestVal, bestLeft, bestRight, changed, leftError, rightError);
00673 
00674 }
00675 
00676 
00677 void LinearSplitsTree::makeLeaf(tree_node* node, const std::vector<tree_experience*> &instances){
00678 
00679   // check on children
00680   if (node->l != NULL){
00681     deleteTree(node->l);
00682     deallocateNode(node->l);
00683     node->l = NULL;
00684   }
00685 
00686   if (node->r != NULL){
00687     deleteTree(node->r);
00688     deallocateNode(node->r);
00689     node->r = NULL;
00690   }
00691 
00692   node->leaf = true;
00693 
00694   // fit linear model
00695   if (SIMPLE)
00696     node->avgError = fitSimpleLinearModel(instances, &(node->constant), &(node->coefficients));
00697   else 
00698     node->avgError = fitMultiLinearModel(instances, &(node->constant), &(node->coefficients));
00699 
00700   if (DTDEBUG || LMDEBUG){
00701     cout << "make leaf, fit linear model with constant " << node->constant 
00702          << "  error: " << node->avgError << endl;
00703   }
00704 
00705 }
00706 
00707 
00708 float LinearSplitsTree::fitMultiLinearModel(const std::vector<tree_experience*> &instances,
00709                                             float *bestConstant, 
00710                                             std::vector<float> *bestCoefficients){
00711   if(DTDEBUG || LMDEBUG) cout << "fitMultiLinearModel"
00712                               << ",nInstances:" << instances.size() << endl;
00713 
00714   // make sure there are enough coefficients for all the features
00715   if (bestCoefficients->size() != instances[0]->input.size()){
00716     bestCoefficients->resize(instances[0]->input.size(), 0);  
00717   }
00718 
00719   // in case of size 1
00720   if (instances.size() == 1){
00721     *bestConstant = instances[0]->output;
00722     for (unsigned i = 0; i < bestCoefficients->size(); i++){
00723       (*bestCoefficients)[i] = 0.0;
00724     }
00725     if (LMDEBUG || SPLITDEBUG){
00726       cout << "  Singleton constant: " 
00727            << *bestConstant << " avgE: " << 0 << endl;
00728     }
00729     return 0;
00730   }
00731 
00732   // loop through all features, try simple single variable regression
00733   // keep track of error/coefficient of each
00734   float bestError = 100000;
00735   float avgError = 100000;
00736   bool doRegression = true;
00737   int ntimes = 0;
00738 
00739   std::vector<bool> featureMask(bestCoefficients->size(), true);
00740 
00741   while (doRegression){
00742     //cout << id << " Attempt linear model " << ntimes << endl;
00743     ntimes++;
00744 
00745     int nObs = (int)instances.size();
00746 
00747     int nFeats = 0;
00748     for (unsigned i = 0; i < featureMask.size(); i++){
00749       if (featureMask[i]) nFeats++;
00750     }
00751     if (nFeats < 1)
00752       break;
00753 
00754     // no feats or obs, no model to build
00755     if (nObs == 0 || nFeats == 0)
00756       break;
00757 
00758     Matrix X(nObs, nFeats);
00759     ColumnVector Y(nObs);
00760 
00761     std::vector<int> featIndices(nFeats);
00762 
00763     std::vector<bool> constants(nFeats,true);
00764     bool foundProblem = false;
00765     
00766     // load up matrices
00767     for (int i = 0; i < nObs; i++){
00768       tree_experience *e = instances[i];
00769       if (LMDEBUG) cout << "Obs: " << i;
00770 
00771       // go through all features
00772       int featIndex = 1;
00773       for (unsigned j = 0; j < featureMask.size(); j++){
00774         (*bestCoefficients)[j] = 0;
00775         if (!featureMask[j])
00776           continue;
00777 
00778         if (constants[j] && e->input[j] != instances[0]->input[j]){
00779           constants[j] = false;
00780         }
00781 
00782         if (i == nObs-1 && constants[j]){
00783           //cout << "PROBLEM: feat " << j << " is constant!" << endl;
00784           foundProblem = true;
00785           featureMask[j] = false;
00786           nFeats--;
00787           if (nFeats > 0)
00788             continue;
00789           else
00790             break;
00791         }
00792 
00793         featIndices[featIndex-1] = j;
00794         // HACK: I'm adding random noise here to prevent colinear features
00795         X(i+1,featIndex) = e->input[j]; // + rng.uniform(-0.00001, 0.00001);
00796         if (LMDEBUG){
00797           cout << " Feat " << featIndex << " index " << j
00798                << " val " << X(i+1,featIndex) << ",";
00799         }
00800         featIndex++;
00801       }
00802 
00803       Y(i+1) = e->output;
00804       if (LMDEBUG) cout << " out: " << e->output << endl;
00805     }
00806 
00807     if (foundProblem)
00808       continue;
00809 
00810     // make vector of 1s
00811     ColumnVector Ones(nObs); Ones = 1.0;
00812 
00813     // calculate means (averages) of x1 and x2 [ .t() takes transpose]
00814     RowVector Mrow = Ones.t() * X / nObs;
00815 
00816     // and subtract means from x1 and x1
00817     Matrix XC(nObs,nFeats);
00818     XC = X - Ones * Mrow;
00819 
00820     // do the same to Y [use Sum to get sum of elements]
00821     ColumnVector YC(nObs);
00822     Real mval = Sum(Y) / nObs;
00823     YC = Y - Ones * mval;
00824 
00825     // form sum of squares and product matrix
00826     //    [use << rather than = for copying Matrix into SymmetricMatrix]
00827     SymmetricMatrix SSQ;
00828     SSQ << XC.t() * XC;
00829 
00830     Try {
00831 
00833       // Cholesky Method
00834       LowerTriangularMatrix L = Cholesky(SSQ);
00835 
00836       // calculate estimate
00837       ColumnVector A = L.t().i() * (L.i() * (XC.t() * YC));
00839       
00841       // Least Squares Method
00842       /*
00843       // calculate estimate
00844       //    [bracket last two terms to force this multiplication first]
00845       //    [ .i() means inverse, but inverse is not explicity calculated]
00846       ColumnVector A = SSQ.i() * (XC.t() * YC);
00848       */
00849   
00850       // calculate estimate of constant term
00851       //    [AsScalar converts 1x1 matrix to Real]
00852       Real a = mval - (Mrow * A).AsScalar();
00853 
00854       // Calculate fitted values and residuals
00855       //int npred1 = nFeats+1;
00856       ColumnVector Fitted = X * A + a;
00857       ColumnVector Residual = Y - Fitted;
00858       //Real ResVar = Residual.SumSquare() / (nObs-npred1);
00859 
00860       // print out answers
00861       // for each instance
00862       bestError = 0;
00863       for (int i = 0; i < nObs; i++){
00864         if (DTDEBUG || LMDEBUG){
00865           cout << "instance " << i << " linear model predicted: " << Fitted(i+1)
00866                << " actual: " << instances[i]->output
00867                << " error: " << Residual(i+1) << endl;
00868         }
00869         bestError += fabs(Residual(i+1));
00870       }
00871       avgError = bestError / (float)nObs;
00872 
00873       // coeff
00874       for (int i = 0; i < nFeats; i++){
00875         if (DTDEBUG || LMDEBUG) cout << "Coeff " << i << " on feat: " << featIndices[i] << " is " << A(i+1) << endl;
00876         (*bestCoefficients)[featIndices[i]] = A(i+1);
00877       }
00878       if (DTDEBUG || LMDEBUG) cout << "constant is " << a << endl;
00879       *bestConstant = a;
00880 
00881     }
00882 
00883     CatchAll {
00884       // had an error trying the linear regression.
00885       // HACK TODO: for now, turn off one variable
00886       //cout << ntimes << " linear regression had exception" << endl;
00887       //<< BaseException::what() <<endl;
00888 
00889       /*
00890       for (int i = 0; i < nObs; i++){
00891         tree_experience *e = instances[i];
00892         cout << "Obs: " << i;
00893         
00894         // go through all features
00895         int featIndex = 1;
00896         for (unsigned j = 0; j < featureMask.size(); j++){
00897           node->coefficients[j] = 0;
00898           if (!featureMask[j])
00899             continue;
00900           
00901           
00902           cout << " Feat " << featIndex << " index " << j
00903                << " val " << X(i+1,featIndex) << ",";
00904           
00905           featIndex++;
00906         }
00907         
00908         cout << " out: " << e->output << endl;
00909       }
00910       */
00911 
00912       // tried this already, stop now
00913       if (ntimes > 2 || nFeats < 2){
00914         //cout << "max regression" << endl;
00915         doRegression = false;
00916         break;
00917       }
00918       for (unsigned j = 0; j < featureMask.size(); j++){
00919         if (featureMask[j]){
00920           //cout << "remove feature " << j << endl;
00921           featureMask[j] = false;
00922           nFeats--;
00923           break;
00924         }
00925       }
00926       continue;
00927     } // catch
00928 
00929     // it worked, dont need to do it again
00930     doRegression = false;
00931 
00932   }
00933 
00934   // return error
00935   return avgError;
00936 
00937 }
00938 
00939 
00940 float LinearSplitsTree::fitSimpleLinearModel(const std::vector<tree_experience*> &instances,
00941                                              float *bestConstant, std::vector<float> *bestCoefficients){
00942   if(DTDEBUG || LMDEBUG || SPLITDEBUG) cout << " fitSimpleLinearModel, "
00943                               << ",nInstances:" << instances.size() << endl;
00944   
00945   // make sure there are enough coefficients for all the features
00946   if (bestCoefficients->size() != instances[0]->input.size()){
00947     bestCoefficients->resize(instances[0]->input.size(), 0);  
00948   }
00949 
00950   // loop through all features, try simple single variable regression
00951   // keep track of error/coefficient of each
00952   int bestFeat = -1;
00953   float bestCoeff = -1;
00954   float bestError = 100000;
00955   *bestConstant = -1;
00956 
00957   // in case of size 1
00958   if (instances.size() == 1){
00959     bestFeat = 0;
00960     *bestConstant = instances[0]->output;
00961     for (unsigned i = 0; i < bestCoefficients->size(); i++){
00962       (*bestCoefficients)[i] = 0.0;
00963     }
00964     bestError = 0;
00965     if (LMDEBUG || SPLITDEBUG){
00966       cout << "  Singleton constant: " 
00967            << *bestConstant << " avgE: " << bestError << endl;
00968     }
00969     return 0;
00970   }
00971 
00972   std::vector<float> xsum(instances[0]->input.size(), 0);
00973   std::vector<float> xysum(instances[0]->input.size(), 0);
00974   std::vector<float> x2sum(instances[0]->input.size(), 0);
00975   float ysum = 0;
00976 
00977   int nObs = (int)instances.size();
00978   for (int i = 0; i < nObs; i++){
00979     tree_experience *e = instances[i];
00980     if (LMDEBUG) cout << "Obs: " << i;
00981     
00982     // go through all features
00983     for (unsigned j = 0; j < instances[0]->input.size(); j++){
00984       if (LMDEBUG) cout << ", F" << j << ": " << e->input[j];
00985       xsum[j] += e->input[j];
00986       xysum[j] += (e->input[j] * e->output);
00987       x2sum[j] += (e->input[j] * e->input[j]);
00988     }
00989     ysum += e->output;
00990     if (LMDEBUG) cout << ", out: " << e->output << endl;
00991   }
00992 
00993   // now go through all features and calc coeff and constant
00994   for (unsigned j = 0; j < instances[0]->input.size(); j++){
00995     float coeff = (xysum[j] - xsum[j]*ysum/nObs)/(x2sum[j]-(xsum[j]*xsum[j])/nObs);
00996     float constant = (ysum/nObs) - coeff*(xsum[j]/nObs);
00997 
00998     // so we don't get absurd coefficients that get rounded off earlier
00999     if (fabs(coeff) < 1e-5){
01000       coeff = 0.0;
01001     }
01002     if (fabs(constant) < 1e-5){
01003       constant = 0.0;
01004     }
01005 
01006     if (LMDEBUG) {
01007       cout << "Feat " << j << " coeff: " << coeff << ", constant: " 
01008            << constant  << endl;
01009     }
01010 
01011     // now try to make predictions and see what error is
01012     float errorSum = 0;
01013     for (int i = 0; i < nObs; i++){
01014       tree_experience *e = instances[i];
01015       float pred = constant + coeff * e->input[j];
01016       float error = fabs(pred - e->output);
01017       if (LMDEBUG) cout << "Instance " << i << " error: " << error << endl;
01018       errorSum += error;
01019     }
01020     float avgError = errorSum / (float)nObs;
01021     if (LMDEBUG) cout << "avgError: " << avgError << endl;
01022 
01023     // check if this is the best
01024     if (avgError < bestError){
01025       bestError = avgError;
01026       bestFeat = j;
01027       *bestConstant = constant;
01028       bestCoeff = coeff;
01029     }
01030   }
01031   
01032   if (LMDEBUG || SPLITDEBUG){
01033     cout << "  LST feat: " << bestFeat << " coeff: " 
01034          << bestCoeff << " constant: " 
01035          << *bestConstant << " avgE: " << bestError << endl;
01036   }
01037 
01038   if (bestFeat < 0 || bestFeat > (int)instances[0]->input.size()){
01039     for (unsigned i = 0; i < bestCoefficients->size(); i++){
01040       (*bestCoefficients)[i] = 0.0;
01041     }
01042     *bestConstant = 0.0;
01043     return 100000;
01044   }
01045 
01046   // fill in coeff vector
01047   for (unsigned i = 0; i < bestCoefficients->size(); i++){
01048     if (i == (unsigned)bestFeat)
01049       (*bestCoefficients)[i] = bestCoeff;
01050     else
01051       (*bestCoefficients)[i] = 0.0; 
01052   }
01053 
01054   return bestError;
01055  
01056 }
01057 
01058 
01059 
01060 
01061 void LinearSplitsTree::implementSplit(tree_node* node, 
01062                                       const std::vector<tree_experience*> &instances,
01063                                       float bestER, int bestDim,
01064                                       float bestVal, 
01065                                       const std::vector<tree_experience*> &bestLeft,
01066                                       const std::vector<tree_experience*> &bestRight,
01067                                       bool changed, float leftError, float rightError){
01068   if (DTDEBUG) cout << "implementSplit node=" << node->id
01069                     << ",er=" << bestER
01070                     << ",dim=" << bestDim
01071                     << ",val=" << bestVal 
01072                     << ",chg=" << changed << endl;
01073 
01074 
01075   // see if this should still be a leaf node
01076   if (bestER < MIN_ER){
01077     makeLeaf(node, instances);
01078 
01079     if (SPLITDEBUG || STOCH_DEBUG){
01080       cout << " DT " << id << " Node " << node->id << " Poor er "
01081            << node->nInstances
01082            << " instances classified at leaf " << node->id
01083            << " with er " << bestER << " constant: " << node->constant << endl;
01084     }
01085     return;
01086   }
01087 
01088   //cout << id << " implement split with er " << bestER << endl;
01089 
01090   // see if this split changed or not
01091   // assuming no changes above
01092   if (!changed && node->dim == bestDim && node->val == bestVal
01093       && !node->leaf && node->l != NULL && node->r != NULL){
01094     // same split as before.
01095     if (DTDEBUG || SPLITDEBUG) cout << "Same split as before " << node->id << endl;
01096     node->l->avgError = leftError;
01097     node->r->avgError = rightError;
01098 
01099 
01100     // see which leaf changed
01101     if (bestLeft.size() > (unsigned)node->l->nInstances){
01102       // redo left side
01103       if (DTDEBUG) cout << "Rebuild left side of tree" << endl;
01104       buildTree(node->l, bestLeft, changed);
01105     }
01106 
01107     if (bestRight.size() > (unsigned)node->r->nInstances){
01108       // redo right side
01109       if (DTDEBUG) cout << "Rebuild right side of tree" << endl;
01110       buildTree(node->r, bestRight, changed);
01111     }
01112     return;
01113   }
01114 
01115 
01116   // totally new
01117   // set the best split here
01118   node->leaf = false;
01119   node->dim = bestDim;
01120   node->val = bestVal;
01121 
01122   if (SPLITDEBUG) cout << "Best split was cut with val " << node->val
01123                        << " on dim " << node->dim
01124                        << " with er: " << bestER << endl;
01125 
01126   if (DTDEBUG) cout << "Left has " << bestLeft.size()
01127                     << ", right has " << bestRight.size() << endl;
01128 
01129   // make sure both instances
01130   if (bestLeft.size() == 0 || bestRight.size() == 0){
01131     cout << "ERROR: DT " << id << " node " << node->id << " has 0 instances: left: " << bestLeft.size()
01132          << " right: " << bestRight.size() << endl;
01133     cout << "Split was cut with val " << node->val
01134          << " on dim " << node->dim
01135          << " with er: " << bestER << endl;
01136     exit(-1);
01137   }
01138 
01139 
01140   // check if these already exist
01141   if (node->l == NULL){
01142     if (DTDEBUG) cout << "Init new left tree nodes " << endl;
01143     node->l = allocateNode();
01144   }
01145   if (node->r == NULL){
01146     if (DTDEBUG) cout << "Init new right tree nodes " << endl;
01147     node->r = allocateNode();
01148   }
01149 
01150   // recursively build the sub-trees to this one
01151   if (DTDEBUG) cout << "Building left tree for node " << node->id << endl;
01152   node->l->avgError = leftError;
01153   buildTree(node->l, bestLeft, true);
01154   if (DTDEBUG) cout << "Building right tree for node " << node->id << endl;
01155   node->r->avgError = rightError;
01156   buildTree(node->r, bestRight, true);
01157 
01158 }
01159 
01160 
01161 void LinearSplitsTree::testPossibleSplits(float avgError, const std::vector<tree_experience*> &instances,
01162                                           float *bestER, int *bestDim,
01163                                           float *bestVal, 
01164                                           std::vector<tree_experience*> *bestLeft,
01165                                           std::vector<tree_experience*> *bestRight,
01166                                           float *bestLeftError, float *bestRightError) {
01167   if (DTDEBUG || SPLITDEBUG) cout << "testPossibleSplits, error=" << avgError << endl;
01168 
01169   int nties = 0;
01170 
01171   // for each possible split, calc standard deviation reduction
01172   for (unsigned idim = 0; idim < instances[0]->input.size(); idim++){
01173 
01174     //float* sorted = sortOnDim(idim, instances);
01175     float minVal, maxVal;
01176     std::set<float> uniques = getUniques(idim, instances, minVal, maxVal);
01177 
01178     for (std::set<float>::iterator j = uniques.begin(); j != uniques.end(); j++){
01179 
01180       // skip max val, not a valid cut for either
01181       if ((*j) == maxVal)
01182         continue;
01183 
01184       // if this is a random forest, we eliminate some random number of splits
01185       // here (decision is taken from the random set that are left)
01186       if (rng.uniform() < featPct)
01187         continue;
01188       
01189       std::vector<tree_experience*> left;
01190       std::vector<tree_experience*> right;
01191       float leftError = 10000;
01192       float rightError = 10000;
01193 
01194       // splits that are cuts
01195       float splitval = (*j);
01196       float er = calcER(idim, splitval, instances, avgError, left, right, &leftError, &rightError);
01197 
01198       if (SPLITDEBUG){
01199         cout << id << " CUT split val " << splitval
01200              << " on dim: " << idim << " had er "
01201              << er << endl;
01202       }
01203 
01204       // see if this is the new best er
01205       compareSplits(er, idim, splitval, left, right, &nties, leftError, rightError,
01206                     bestER, bestDim, bestVal,bestLeft, bestRight, bestLeftError, bestRightError);
01207 
01208 
01209     } // j loop
01210   }
01211 }
01212 
01213 
01214 
01216 void LinearSplitsTree::compareSplits(float er, int dim, float val, 
01217                                      const std::vector<tree_experience*> &left, 
01218                                      const std::vector<tree_experience*> &right,
01219                                      int *nties, float leftError, float rightError,
01220                                      float *bestER, int *bestDim,
01221                                      float *bestVal, 
01222                                      std::vector<tree_experience*> *bestLeft,
01223                                      std::vector<tree_experience*> *bestRight,
01224                                      float *bestLeftError, float *bestRightError){
01225   if (DTDEBUG) cout << "compareSplits er=" << er << ",dim=" << dim
01226                     << ",val=" << val  <<endl;
01227 
01228 
01229   bool newBest = false;
01230 
01231   // if its a virtual tie, break it randomly
01232   if (fabs(*bestER - er) < SPLIT_MARGIN){
01233     //cout << "Split tie, making random decision" << endl;
01234 
01235     (*nties)++;
01236     float randomval = rng.uniform(0,1);
01237     float newsplitprob = (1.0 / (float)*nties);
01238 
01239     if (randomval < newsplitprob){
01240       newBest = true;
01241       if (SPLITDEBUG) cout << "   Tie on split. DT: " << id << " rand: " << randomval
01242                            << " splitProb: " << newsplitprob << ", selecting new split " << endl;
01243     }
01244     else
01245       if (SPLITDEBUG) cout << "   Tie on split. DT: " << id << " rand: " << randomval
01246                            << " splitProb: " << newsplitprob << ", staying with old split " << endl;
01247   }
01248 
01249   // if its clearly better, set this as the best split
01250   else if (er > *bestER){
01251     newBest = true;
01252     *nties = 1;
01253   }
01254 
01255 
01256   // set the split features
01257   if (newBest){
01258     *bestER = er;
01259     *bestDim = dim;
01260     *bestVal = val;
01261     *bestLeft = left;
01262     *bestRight = right;
01263     *bestLeftError = leftError;
01264     *bestRightError = rightError;
01265     if (SPLITDEBUG){
01266       cout << "  New best er: " << *bestER
01267            << " with val " << *bestVal
01268            << " on dim " << *bestDim << endl;
01269     }
01270   } // newbest
01271 }
01272 
01274 float LinearSplitsTree::calcER(int dim, float val,
01275                                const std::vector<tree_experience*> &instances,
01276                                float avgError,
01277                                std::vector<tree_experience*> &left,
01278                                std::vector<tree_experience*> &right,
01279                                float *leftError, float *rightError){
01280   if (DTDEBUG || SPLITDEBUG) cout << "calcER, dim=" << dim
01281                     << " val=" << val
01282                     << " err=" << avgError
01283                     << " nInstances= " << instances.size() << endl;
01284 
01285   left.clear();
01286   right.clear();
01287 
01288   // split into two sides
01289   for (unsigned i = 0; i < instances.size(); i++){
01290     if (DTDEBUG) cout << " calcER - Classify instance " << i << " on new split " << endl;
01291 
01292     if (passTest(dim, val, instances[i]->input)){
01293       left.push_back(instances[i]);
01294     }
01295     else{
01296       right.push_back(instances[i]);
01297     }
01298   }
01299 
01300   if (DTDEBUG) cout << "Left has " << left.size()
01301                     << ", right has " << right.size() << endl;
01302 
01303   // get sd for both sides
01304   *leftError = calcAvgErrorforSet(left);
01305   *rightError = calcAvgErrorforSet(right);
01306 
01307   float leftRatio = (float)left.size() / (float)instances.size();
01308   float rightRatio = (float)right.size() / (float)instances.size();
01309   float newError = (leftRatio * (*leftError) + rightRatio * (*rightError));
01310 
01311   float er = avgError - newError;
01312 
01313   if (DTDEBUG || SPLITDEBUG){
01314     cout << "LeftError: " << *leftError
01315          << " RightError: " << *rightError
01316          << " NewError: " << newError
01317          << " NodeError: " << avgError
01318          << " ER: " << er
01319          << endl;
01320   }
01321 
01322   return er;
01323 
01324 }
01325 
01327 float LinearSplitsTree::calcAvgErrorforSet(const std::vector<tree_experience*> &instances){
01328   if (DTDEBUG) cout << "calcAvgErrorforSet" << endl;
01329 
01330   int n = instances.size();
01331 
01332   if (n == 0)
01333     return 0;
01334 
01335   // fit a linear model to instances
01336   // and figure out avg error of it
01337   float constant;
01338   float avgError = 0.0;
01339   std::vector<float> coeff;
01340   if (SIMPLE) 
01341     avgError = fitSimpleLinearModel(instances, &constant, &coeff);
01342   else 
01343     avgError = fitMultiLinearModel(instances, &constant, &coeff);
01344 
01345   return avgError;
01346 }
01347 
01348 
01350 std::set<float> LinearSplitsTree::getUniques(int dim, const std::vector<tree_experience*> &instances, float& minVal, float& maxVal){
01351   if (DTDEBUG) cout << "getUniques,dim = " << dim;
01352 
01353   std::set<float> uniques;
01354 
01355   for (int i = 0; i < (int)instances.size(); i++){
01356     if (i == 0 || instances[i]->input[dim] < minVal)
01357       minVal = instances[i]->input[dim];
01358     if (i == 0 || instances[i]->input[dim] > maxVal)
01359       maxVal = instances[i]->input[dim];
01360 
01361     uniques.insert(instances[i]->input[dim]);
01362   }
01363 
01364   
01365   // lets not try more than 100 possible splits per dimension
01366   if (uniques.size() > 100){
01367     float rangeInc = (maxVal - minVal) / 100.0;
01368     uniques.clear();
01369     for (float i = minVal; i < maxVal; i+= rangeInc){
01370       uniques.insert(i);
01371     }
01372   }
01373   uniques.insert(maxVal);
01374   
01375 
01376   if (DTDEBUG) cout << " #: " << uniques.size() << endl;
01377   return uniques;
01378 }
01379 
01380 
01383 float* LinearSplitsTree::sortOnDim(int dim, const std::vector<tree_experience*> &instances){
01384   if (DTDEBUG) cout << "sortOnDim,dim = " << dim << endl;
01385 
01386   float* values = new float[instances.size()];
01387 
01388   for (int i = 0; i < (int)instances.size(); i++){
01389     //cout << "Instance " << i << endl;
01390 
01391     float val = instances[i]->input[dim];
01392     //cout << " val: " << val << endl;
01393 
01394     // find where this should go
01395     for (int j = 0; j <= i; j++){
01396       //cout << " j: " << j << endl;
01397 
01398       // get to i, this is the spot then
01399       if (j==i){
01400         values[j] = val;
01401         //cout << "  At i, putting value in slot j: " << j << endl;
01402       }
01403 
01404       // if this is the spot
01405       else if (val < values[j]){
01406         //cout << "  Found slot at j: " << j << endl;
01407 
01408         // slide everything forward to make room
01409         for (int k = i; k > j; k--){
01410           //cout << "   k = " << k << " Sliding value from k-1 to k" << endl;
01411           values[k] = values[k-1];
01412         }
01413 
01414         // put value in its spot at j
01415         //cout << "  Putting value at slot j: " << j << endl;
01416         values[j] = val;
01417 
01418         // break
01419         break;
01420       }
01421 
01422     }
01423   }
01424 
01425   if (DTDEBUG){
01426     cout << "Sorted array: " << values[0];
01427     for (int i = 1; i < (int)instances.size(); i++){
01428       cout << ", " << values[i];
01429     }
01430     cout << endl;
01431   }
01432 
01433   return values;
01434 
01435 }
01436 
01437 
01439 void LinearSplitsTree::printTree(tree_node *t, int level){
01440 
01441   for (int i = 0; i < level; i++){
01442     cout << ".";
01443   }
01444 
01445   cout << "Node " << t->id  << " nInstances: " << t->nInstances ;
01446 
01447   // Leaf, print regression stuff
01448   if (t->leaf){
01449     cout << " Constant: " << t->constant << " coeff: ";
01450     for (unsigned j = 0; j < t->coefficients.size(); j++){
01451       if (!SIMPLE)
01452         cout << t->coefficients[j] << ", ";
01453       if (SIMPLE && t->coefficients[j] != 0)
01454         cout << " on feat " << j << ": " << t->coefficients[j];
01455     }
01456     cout << endl;
01457     return;
01458   }
01459 
01460   // otherwise, print split info, next nodes
01461 
01462   cout << " Type: CUT";
01463   cout << " Dim: " << t->dim << " Val: " << t->val;
01464   cout << " Left: " << t->l->id << " Right: " << t->r->id << endl;
01465 
01466   // print children
01467   if (t->dim != -1 && !t->leaf){
01468     printTree(t->l, level+1);
01469     printTree(t->r, level+1);
01470   }
01471 
01472 }
01473 
01474 
01475 // output a map of outcomes and their probabilities for this leaf node
01476 void LinearSplitsTree::leafPrediction(tree_node* leaf, const std::vector<float> &input,
01477                                       std::map<float, float>* retval){
01478   if (DTDEBUG)
01479     cout << "Calculating output for leaf " << leaf->id << endl;
01480 
01481   float prediction = leaf->constant;
01482   if (DTDEBUG) cout << "leaf constant: " << leaf->constant << endl;
01483 
01484   // plus each coefficient
01485   for (unsigned i = 0; i < input.size(); i++){
01486     prediction += leaf->coefficients[i] * input[i];
01487     if (DTDEBUG) {
01488       cout << "feat " << i << " coeff: " << leaf->coefficients[i]
01489            << " on input " << input[i] << endl;
01490     }
01491   }
01492 
01493   if (DTDEBUG) cout << " prediction: " << prediction << endl;
01494 
01495   (*retval)[prediction] = 1.0;
01496  
01497 }
01498 
01499 
01500 void LinearSplitsTree::initNodes(){
01501 
01502   for (int i = 0; i < N_LS_NODES; i++){
01503     initTreeNode(&(allNodes[i]));
01504     freeNodes.push_back(i);
01505     if (NODEDEBUG) 
01506       cout << "init node " << i << " with id " << allNodes[i].id 
01507            << ", now " << freeNodes.size() << " free nodes." << endl;
01508   }
01509 
01510 }
01511 
01512 LinearSplitsTree::tree_node* LinearSplitsTree::allocateNode(){
01513   if (freeNodes.empty()){
01514     tree_node* newNode = new tree_node;
01515     initTreeNode(newNode);
01516     if (NODEDEBUG) 
01517       cout << id << " PROBLEM: No more pre-allocated nodes!!!" << endl
01518            << "return new node " << newNode->id 
01519            << ", now " << freeNodes.size() << " free nodes." << endl;
01520     return newNode;
01521   }
01522 
01523   int i = freeNodes.back();
01524   freeNodes.pop_back();
01525   if (NODEDEBUG) 
01526     cout << id << " allocate node " << i << " with id " << allNodes[i].id 
01527          << ", now " << freeNodes.size() << " free nodes." << endl;
01528   return &(allNodes[i]);
01529 }
01530 
01531 void LinearSplitsTree::deallocateNode(tree_node* node){
01532   if (node->id >= N_LS_NODES){
01533     if (NODEDEBUG) 
01534       cout << id << " dealloc extra node id " << node->id 
01535            << ", now " << freeNodes.size() << " free nodes." << endl;
01536     delete node;
01537     return;
01538   }
01539 
01540   freeNodes.push_back(node->id);
01541   if (NODEDEBUG) 
01542     cout << id << " dealloc node " << node->id 
01543          << ", now " << freeNodes.size() << " free nodes." << endl;
01544 }


rl_agent
Author(s): Todd Hester
autogenerated on Thu Jun 6 2019 22:00:13