ValueIteration.c

Go to the documentation of this file.
00001 #include "ValueIteration.h"
00002 //#include <avr/io.h>
00003  
00004 
00008 void ValueIteration::init() {
00009 
00010      fb.init();
00011      ctr.init();
00012 
00013      // seed for PRNG
00014      srand (get_seed() + 1103515245);
00015 
00016 
00017      // set initial gamma
00018      if (ctr.getSwitchState(SWITCH_GAMMA)) {
00019           this->gamma = 0.98;
00020      } else {
00021           this->gamma = 0.9;
00022      }
00023 
00024      // set initial exploration mode
00025      this->expMode = ctr.getSwitchState(SWITCH_VDBE_EGREEDY);
00026 
00027      this->iteration = 0;
00028      this->currentExpProbability = 1.0;
00029 
00030      if (this->expMode == EXPLORATION_EGREEDY) {
00031           this->expParameter = 0.075;
00032      } else if (this->expMode == EXPLORATION_VALUE_BASED) {
00033           this->expParameter = 1;
00034      }
00035 
00036      oldValue = newValue = 0.0;
00037      this->distanceCovered = 0;
00038 }
00039 
00044 unsigned long int ValueIteration::getIterationCount() {
00045      return this->iteration;
00046 }
00047 
00051 signed long int ValueIteration::getDistanceCovered() {
00052      return this->distanceCovered;
00053 }
00054 
00058 void ValueIteration::resetDistanceCounter() {
00059      this->distanceCovered = 0;
00060 }
00061 
00065 unsigned char ValueIteration::getGreedyAction(unsigned char _row, unsigned char _column) {
00066 
00067      float value, maximum = -100;
00068      unsigned short multipleBestActions = 0;
00069      unsigned char _action, bestAction;
00070      bool haveAction = false;
00071 
00072 
00073      // 1) calculate values for all valid actions
00074      for(_action=0; _action <NUM_ACTIONS; _action++) {
00075 
00076           if (isValidAction(_row, _column, _action)) {
00077 
00078                // calculate value
00079                value = evaluateAction (_row, _column, _action);
00080      
00081                // check for maximum
00082                if(value > maximum) {
00083      
00084                     maximum = value;
00085                     haveAction = true;
00086                     
00087                     // memory the best action
00088                     bestAction = _action;
00089      
00090                     multipleBestActions = 0;
00091      
00092                } else  if (fabs(value - maximum) < 0.001) {
00093                     multipleBestActions++;
00094                }
00095           }
00096      }
00097 
00098      // if there are multiple best actions, chose a random one 
00099      if (multipleBestActions) {
00100 
00101           unsigned char found=0;
00102           unsigned char randAction = rand()%(multipleBestActions+1);
00103      
00104           for (_action=0; _action < NUM_ACTIONS; _action++) {
00105                if (isValidAction(_row, _column, _action)) {
00106                     if (fabs((value=evaluateAction(_row, _column, _action)) - maximum) < 0.001) {
00107                          if (found < randAction) {
00108                               found++;
00109                          } else {
00110                               bestAction = _action;
00111                               _action = NUM_ACTIONS; // terminates the for-loop
00112                          }
00113                     }
00114                }
00115           }
00116      }
00117      
00118 
00119      // 2) if there was no best action found, take random action
00120      if (!haveAction) {  
00121           bestAction= this->getRandomAction();
00122           ctr.blinkLED((1<<LED_EXPLORATION), 100, 50);
00123      }
00124 
00125      return bestAction;
00126 }
00127 
00128 
00132 void ValueIteration::valueIterateAllStates() {
00133 
00134      float value, maximum = -100;
00135      unsigned char bestAction, action, _row, _column;
00136      unsigned short multipleBestActions = 0;
00137      bool haveAction = false;
00138      STATE _tmpState;
00139      
00140      // increment iteration number
00141      this->iteration++;
00142 
00143      // switch on LEARN LED
00144      ctr.powerOnLED(LED_LEARN, true);
00145      
00146      // for each state s in S: 
00147      for(_row=0; _row<ctr.states.scalY; _row++) {
00148           for(_column=0; _column<ctr.states.scalX; _column++) {
00149 
00150                haveAction = false;
00151                maximum = -100;
00152                multipleBestActions=0;
00153 
00154                // 1) for each valid successor state
00155                for(action=0; action<NUM_ACTIONS; action++) {
00156 
00157                     if (isValidAction(_row, _column, action)) {
00158 
00159                          value = evaluateAction (_row, _column, action);
00160 
00161                          // memorize maximum value
00162                          if(value > maximum) {
00163 
00164                               maximum = value;
00165                               haveAction = true;
00166 
00167                               // update maximum value in state-space
00168                               ctr.states.setValue(_row, _column, value);
00169                          }    
00170                     }
00171                }
00172           }
00173      }
00174 
00175      // switch off LEARN LED
00176      ctr.powerOnLED(LED_LEARN, false);
00177 }
00178 
00179 
00184 float ValueIteration::evaluateAction(unsigned char _row, unsigned char _column, unsigned char _action) {
00185 
00186      float neighbourValue, newValue;
00187      int actionFeedback;
00188      
00189      unsigned char _newRow = _row;
00190      unsigned char _newColumn = _column;
00191 
00192      if (_action == UP)   _newRow--;
00193      if (_action == DOWN) _newRow++;
00194      if (_action == RIGHT) _newColumn++;
00195      if (_action == LEFT)  _newColumn--;
00196 
00197      // get reward for action a in s
00198      actionFeedback = ctr.states.getFeedback(_row, _column, _action);
00199 
00200      // get value of successor state
00201      neighbourValue = ctr.states.getValue(_newRow, _newColumn);
00202 
00203      // compute new value
00204      newValue = (gamma * (float)neighbourValue) + (float)actionFeedback;
00205 
00206      return newValue;
00207 }
00208 
00209 
00213 void ValueIteration::exploreAllFeedbacks(bool wait5Seconds) {
00214 
00215      bool direction;
00216 
00217      unsigned char i, j;
00218      unsigned char steps = 0;
00219 
00220      // switch on EXPLORATION LED
00221      ctr.powerOnLED(LED_EXPLORATION, true);
00222 
00223      // go to state (0,0)
00224      ctr.gotoStateWithoutSavingFeedback(0,0);
00225      direction = true;
00226 
00227      // delay five seconds
00228      if (wait5Seconds) ctr.msDelay(5000);
00229 
00230      // explore horizontal until (scalX, scalY)
00231      while (steps < (ctr.states.scalY*(ctr.states.scalX-1))) {
00232           
00233           // go right
00234           if (direction) {
00235      
00236                moveAndSaveFeedback(RIGHT, false);
00237 
00238                // go down at the right end
00239                if (!isValidAction(ctr.currentY, ctr.currentX, RIGHT)) {
00240                     direction = false;
00241                     if (isValidAction(ctr.currentY, ctr.currentX, DOWN)) {
00242                          moveAndSaveFeedback(DOWN, false);
00243                     }
00244                }
00245 
00246           // go left
00247           } else {
00248      
00249                moveAndSaveFeedback(LEFT, false);
00250 
00251                // go down on the left end
00252                if (!isValidAction(ctr.currentY, ctr.currentX, LEFT)) {
00253                     direction = true;
00254                     if (isValidAction(ctr.currentY, ctr.currentX, DOWN)) {
00255                          moveAndSaveFeedback(DOWN, false);
00256                     }
00257                }
00258           }
00259           steps++;
00260      }
00261 
00262      // go back horizontally from (scalX, scalY) to (0,0);
00263      steps=0;
00264      while (steps < (ctr.states.scalY*(ctr.states.scalX-1))) {
00265           
00266           // go right 
00267           if (direction) {
00268      
00269                moveAndSaveFeedback(RIGHT, false);
00270      
00271                // go up on the right end
00272                if (!isValidAction(ctr.currentY, ctr.currentX, RIGHT)) {
00273                     direction = false;
00274                     if (isValidAction(ctr.currentY, ctr.currentX, UP)){ 
00275                          moveAndSaveFeedback(UP, false);
00276                     }
00277                }
00278 
00279           // go left
00280           } else {
00281      
00282                moveAndSaveFeedback(LEFT, false);
00283                
00284                // go up on the left end
00285                if (!isValidAction(ctr.currentY, ctr.currentX, LEFT)) {
00286                     direction = true;
00287                     if (isValidAction(ctr.currentY, ctr.currentX, UP)) {
00288                          moveAndSaveFeedback(UP, false);
00289                     }
00290                }
00291           }
00292           steps++;
00293 
00294      }
00295 
00296      // explore vertically
00297      direction = true;
00298      steps = 0;
00299      while (steps < (ctr.states.scalY*(ctr.states.scalX-1))) {
00300           
00301           // go down 
00302           if (direction) {
00303      
00304                moveAndSaveFeedback(DOWN, false);
00305 
00306                // go left on the lower end
00307                if (!isValidAction(ctr.currentY, ctr.currentX, DOWN)) {
00308                     direction = false;
00309                     if (isValidAction(ctr.currentY, ctr.currentX, RIGHT)) {
00310                          moveAndSaveFeedback(RIGHT, false);
00311                     }
00312                }
00313 
00314           // go up
00315           } else {
00316      
00317                moveAndSaveFeedback(UP, false);
00318 
00319                // go right on the upper end
00320                if (!isValidAction(ctr.currentY, ctr.currentX, UP)) {
00321                     direction = true;
00322                     if (isValidAction(ctr.currentY, ctr.currentX, RIGHT)) {
00323                          moveAndSaveFeedback(RIGHT, false);
00324                     }
00325                }
00326           }
00327           steps++;
00328      }
00329 
00330      // go vertically backwards to (0,0);
00331      steps=0;
00332      while (steps < (ctr.states.scalY*(ctr.states.scalX-1))) {
00333           
00334           // go down
00335           if (direction) {
00336      
00337                moveAndSaveFeedback(DOWN, false);
00338      
00339                // go left on the lower end
00340                if (!isValidAction(ctr.currentY, ctr.currentX, DOWN)) {
00341                     direction = false;
00342                     if (isValidAction(ctr.currentY, ctr.currentX, LEFT)) {
00343                          moveAndSaveFeedback(LEFT, false);
00344                     }
00345                }
00346 
00347           // go left
00348           } else {
00349      
00350                moveAndSaveFeedback(UP, false);
00351                
00352                // go left on the upper end
00353                if (!isValidAction(ctr.currentY, ctr.currentX, UP)) {
00354                     direction = true;
00355                     if (isValidAction(ctr.currentY, ctr.currentX, LEFT)) {
00356                          moveAndSaveFeedback(LEFT, false);
00357                     }
00358                }
00359           }
00360           steps++;
00361 
00362      }
00363 
00364      // turn off FEEDBACK LED
00365      ctr.powerOnLED(LED_EXPLORATION, false);
00366 }
00367 
00371 unsigned char ValueIteration::getRandomAction () {
00372 
00373      bool found = false;
00374      unsigned char randomAction;
00375      static unsigned char increment = 0;
00376 
00377      do {
00378           randomAction = (rand()+increment)%NUM_ACTIONS;
00379           if (isValidAction(ctr.currentY, ctr.currentX, randomAction)) { 
00380                found = true;
00381           }
00382 
00383           increment++;
00384 
00385      } while (!found);
00386 
00387      return randomAction;
00388 }
00389 
00393 void ValueIteration::gotoRandomState() {
00394 
00395      unsigned char newX = rand()%ctr.states.scalX;
00396      unsigned char newY = rand()%ctr.states.scalY;
00397 
00398      unsigned char oldX = ctr.currentX;
00399      unsigned char oldY = ctr.currentY;
00400 
00401 
00402      // position new state on Y-axis
00403      while (oldY != newY) {
00404           if (oldY < newY) {
00405                oldY++;
00406                this->moveAndSaveFeedback(DOWN, 0);
00407           }
00408           else {
00409                oldY--;
00410                this->moveAndSaveFeedback(UP, 0);
00411           }
00412      }
00413      // position new state on X-axis
00414      while (oldX != newX) {
00415           if (oldX < newX) {
00416                oldX++;
00417                this->moveAndSaveFeedback(RIGHT, 0);
00418           }
00419           else {
00420                oldX--;
00421                this->moveAndSaveFeedback(LEFT, 0);
00422           }
00423      }
00424 }
00425 
00427 bool ValueIteration::doExploration() {
00428      
00429      float explorationProbability = 1;
00430      float dV;
00431 
00432      if (this->expMode == EXPLORATION_VALUE_BASED) {
00433           
00434           // calculate dV
00435           dV = fabs ((oldValue-newValue) / (fabs(oldValue)+fabs(newValue)));
00436           explorationProbability = pow(dV, 1.0/this->expParameter);
00437 
00438      } else if (this->expMode == EXPLORATION_EGREEDY) {
00439           explorationProbability = this->expParameter;
00440      }
00441 
00442      this->currentExpProbability = explorationProbability;
00443 
00444      // return whether we should explore or exploit
00445      if ((rand()%100) > (explorationProbability*100)) {
00446           return false; // exploitation
00447      } else {
00448           return true; // exploration
00449      }
00450 }
00451 
00455 char ValueIteration::moveAndSaveFeedback (unsigned char _action, bool _activateRandomMoving) {
00456 
00457      char _feedback;
00458      unsigned char oldX = ctr.currentX;
00459      unsigned char oldY = ctr.currentY;
00460      unsigned char randomAction=0;
00461 
00462      unsigned char nextAction = _action;
00463      
00464      STATE curState;
00465      ctr.states.getState(ctr.currentY, ctr.currentX, &curState);
00466 
00467      ctr.powerOnLED(LED_WALK, true);
00468      
00469      // check for reward-signal invertion
00470      if (ctr.getSwitchState(SWITCH_INV_FB_0)==0) {
00471           fb.invertSensor(FEEDBACK_SENSOR_0, true);
00472      } else {
00473           fb.invertSensor(FEEDBACK_SENSOR_0, false);
00474      }
00475 
00476 
00477      // check whether we should explore
00478      if (_activateRandomMoving && doExploration()) {
00479 
00480           ctr.powerOnLED(LED_EXPLORATION, true);
00481           ctr.powerOnLED(LED_WALK, true);
00482 
00483           if (ctr.getSwitchState(SWITCH_RANDOM_STATE)==0) {
00484                nextAction = getRandomAction();
00485                this->moveAndSaveFeedback(nextAction, 0);
00486           } else {
00487                this->gotoRandomState();
00488           }
00489 
00490           ctr.powerOnLED(LED_EXPLORATION, false);
00491           ctr.powerOnLED(LED_WALK, false);
00492           return 0;
00493      }
00494 
00495      
00496      //  if _action is invalid, take a (valid) random action
00497      while (!isValidAction(ctr.currentY, ctr.currentX, nextAction)) {
00498           nextAction = getRandomAction();
00499           //ctr.blinkLED((1<<LED_WALK), 1, 50);
00500      }
00501 
00502 
00503      // compute new state in grid
00504      if (nextAction == UP) {
00505           ctr.currentY--;
00506 
00507      } else if (nextAction == DOWN) {
00508           ctr.currentY++;
00509 
00510      } else if (nextAction == LEFT) {
00511           ctr.currentX--;
00512 
00513      } else if (nextAction == RIGHT) {
00514           ctr.currentX++;
00515 
00516      } else {
00517           ctr.blinkLED((1<<LED_WALK)|(1<<LED_POWER), 100, 250);
00518      }
00519 
00520      // enable reward sensor and disable serial interface
00521      ctr.disableV24Interrupt();
00522      fb.enableSensor();
00523 
00524      // go to new state
00525      if (oldX != ctr.currentX) ctr.servo_x.setPosition(ctr.currentX, ctr.states.scalX);
00526      if (oldY != ctr.currentY) ctr.servo_y.setPosition(ctr.currentY, ctr.states.scalY);
00527 
00528      // short delay
00529      if (nextAction == UP  || nextAction == DOWN) { 
00530           ctr.msDelay (
00531                (ctr.getSwitchState(SWITCH_RAM) ?  
00532                     (((int)(DELAY_TIME*0.66)) / (int)ctr.states.scalY) : 
00533                     (((int)DELAY_TIME) / (int)ctr.states.scalY))
00534           );
00535      }
00536      else if (nextAction == LEFT || nextAction == RIGHT) { 
00537           ctr.msDelay (
00538                (ctr.getSwitchState(SWITCH_RAM) ?  
00539                     (((int)(DELAY_TIME*0.66)) / (int)ctr.states.scalX) : 
00540                     (((int)DELAY_TIME) / (int)ctr.states.scalX))
00541           );
00542      }
00543 
00544      // receive reward, deactive reward sensor and enable serial interface
00545      // Feedback-Sensor wieder deaktivieren (und seriellen interrupt wieder aktivieren)
00546      fb.disableSensor();
00547      ctr.enableV24Interrupt();
00548      _feedback = fb.getFeedback();
00549      
00550      // reset feed (with noise from the environment :-) 
00551      srand (rand()+1+_feedback);
00552 
00553      this->distanceCovered += _feedback;
00554      ctr.states.setFeedback(oldY, oldX, nextAction, _feedback);
00555 
00556      // turn of EXPLORATION and WALK LEDs
00557      ctr.powerOnLED(LED_EXPLORATION, false);
00558      ctr.powerOnLED(LED_WALK, false);
00559 
00560      return _feedback;
00561 }
00562 
00563 
00567 bool ValueIteration::isValidAction(unsigned char _row, unsigned char _column, unsigned char _action) {
00568 
00569      if (_action != UP && _action != DOWN && _action != LEFT && _action != RIGHT) return false;
00570      else if (_action == UP   && _row == 0) return false;
00571      else if (_action == DOWN && _row >= (ctr.states.scalY-1)) return false;
00572      else if (_action == LEFT  && _column == 0) return false;
00573      else if (_action == RIGHT && _column >= (ctr.states.scalX-1)) return false;
00574      
00575      return true;
00576 }
00577 
00578 unsigned short ValueIteration::get_seed()
00579 {
00580    unsigned short seed = 0;
00581    unsigned short *p = (unsigned short*) (RAMEND+1);
00582    extern unsigned short __heap_start;
00583 
00584    while (p >= &__heap_start + 1)
00585       seed ^= * (--p);
00586 
00587    return seed;
00588 }
00589 

Generated on Wed Mar 25 12:58:40 2009 for Crawling Robot Microcontroller Software by  doxygen 1.5.5