00001 #include "ValueIteration.h"
00002
00003
00004
00008 void ValueIteration::init() {
00009
00010 fb.init();
00011 ctr.init();
00012
00013
00014 srand (get_seed() + 1103515245);
00015
00016
00017
00018 if (ctr.getSwitchState(SWITCH_GAMMA)) {
00019 this->gamma = 0.98;
00020 } else {
00021 this->gamma = 0.9;
00022 }
00023
00024
00025 this->expMode = ctr.getSwitchState(SWITCH_VDBE_EGREEDY);
00026
00027 this->iteration = 0;
00028 this->currentExpProbability = 1.0;
00029
00030 if (this->expMode == EXPLORATION_EGREEDY) {
00031 this->expParameter = 0.075;
00032 } else if (this->expMode == EXPLORATION_VALUE_BASED) {
00033 this->expParameter = 1;
00034 }
00035
00036 oldValue = newValue = 0.0;
00037 this->distanceCovered = 0;
00038 }
00039
00044 unsigned long int ValueIteration::getIterationCount() {
00045 return this->iteration;
00046 }
00047
00051 signed long int ValueIteration::getDistanceCovered() {
00052 return this->distanceCovered;
00053 }
00054
00058 void ValueIteration::resetDistanceCounter() {
00059 this->distanceCovered = 0;
00060 }
00061
00065 unsigned char ValueIteration::getGreedyAction(unsigned char _row, unsigned char _column) {
00066
00067 float value, maximum = -100;
00068 unsigned short multipleBestActions = 0;
00069 unsigned char _action, bestAction;
00070 bool haveAction = false;
00071
00072
00073
00074 for(_action=0; _action <NUM_ACTIONS; _action++) {
00075
00076 if (isValidAction(_row, _column, _action)) {
00077
00078
00079 value = evaluateAction (_row, _column, _action);
00080
00081
00082 if(value > maximum) {
00083
00084 maximum = value;
00085 haveAction = true;
00086
00087
00088 bestAction = _action;
00089
00090 multipleBestActions = 0;
00091
00092 } else if (fabs(value - maximum) < 0.001) {
00093 multipleBestActions++;
00094 }
00095 }
00096 }
00097
00098
00099 if (multipleBestActions) {
00100
00101 unsigned char found=0;
00102 unsigned char randAction = rand()%(multipleBestActions+1);
00103
00104 for (_action=0; _action < NUM_ACTIONS; _action++) {
00105 if (isValidAction(_row, _column, _action)) {
00106 if (fabs((value=evaluateAction(_row, _column, _action)) - maximum) < 0.001) {
00107 if (found < randAction) {
00108 found++;
00109 } else {
00110 bestAction = _action;
00111 _action = NUM_ACTIONS;
00112 }
00113 }
00114 }
00115 }
00116 }
00117
00118
00119
00120 if (!haveAction) {
00121 bestAction= this->getRandomAction();
00122 ctr.blinkLED((1<<LED_EXPLORATION), 100, 50);
00123 }
00124
00125 return bestAction;
00126 }
00127
00128
00132 void ValueIteration::valueIterateAllStates() {
00133
00134 float value, maximum = -100;
00135 unsigned char bestAction, action, _row, _column;
00136 unsigned short multipleBestActions = 0;
00137 bool haveAction = false;
00138 STATE _tmpState;
00139
00140
00141 this->iteration++;
00142
00143
00144 ctr.powerOnLED(LED_LEARN, true);
00145
00146
00147 for(_row=0; _row<ctr.states.scalY; _row++) {
00148 for(_column=0; _column<ctr.states.scalX; _column++) {
00149
00150 haveAction = false;
00151 maximum = -100;
00152 multipleBestActions=0;
00153
00154
00155 for(action=0; action<NUM_ACTIONS; action++) {
00156
00157 if (isValidAction(_row, _column, action)) {
00158
00159 value = evaluateAction (_row, _column, action);
00160
00161
00162 if(value > maximum) {
00163
00164 maximum = value;
00165 haveAction = true;
00166
00167
00168 ctr.states.setValue(_row, _column, value);
00169 }
00170 }
00171 }
00172 }
00173 }
00174
00175
00176 ctr.powerOnLED(LED_LEARN, false);
00177 }
00178
00179
00184 float ValueIteration::evaluateAction(unsigned char _row, unsigned char _column, unsigned char _action) {
00185
00186 float neighbourValue, newValue;
00187 int actionFeedback;
00188
00189 unsigned char _newRow = _row;
00190 unsigned char _newColumn = _column;
00191
00192 if (_action == UP) _newRow--;
00193 if (_action == DOWN) _newRow++;
00194 if (_action == RIGHT) _newColumn++;
00195 if (_action == LEFT) _newColumn--;
00196
00197
00198 actionFeedback = ctr.states.getFeedback(_row, _column, _action);
00199
00200
00201 neighbourValue = ctr.states.getValue(_newRow, _newColumn);
00202
00203
00204 newValue = (gamma * (float)neighbourValue) + (float)actionFeedback;
00205
00206 return newValue;
00207 }
00208
00209
00213 void ValueIteration::exploreAllFeedbacks(bool wait5Seconds) {
00214
00215 bool direction;
00216
00217 unsigned char i, j;
00218 unsigned char steps = 0;
00219
00220
00221 ctr.powerOnLED(LED_EXPLORATION, true);
00222
00223
00224 ctr.gotoStateWithoutSavingFeedback(0,0);
00225 direction = true;
00226
00227
00228 if (wait5Seconds) ctr.msDelay(5000);
00229
00230
00231 while (steps < (ctr.states.scalY*(ctr.states.scalX-1))) {
00232
00233
00234 if (direction) {
00235
00236 moveAndSaveFeedback(RIGHT, false);
00237
00238
00239 if (!isValidAction(ctr.currentY, ctr.currentX, RIGHT)) {
00240 direction = false;
00241 if (isValidAction(ctr.currentY, ctr.currentX, DOWN)) {
00242 moveAndSaveFeedback(DOWN, false);
00243 }
00244 }
00245
00246
00247 } else {
00248
00249 moveAndSaveFeedback(LEFT, false);
00250
00251
00252 if (!isValidAction(ctr.currentY, ctr.currentX, LEFT)) {
00253 direction = true;
00254 if (isValidAction(ctr.currentY, ctr.currentX, DOWN)) {
00255 moveAndSaveFeedback(DOWN, false);
00256 }
00257 }
00258 }
00259 steps++;
00260 }
00261
00262
00263 steps=0;
00264 while (steps < (ctr.states.scalY*(ctr.states.scalX-1))) {
00265
00266
00267 if (direction) {
00268
00269 moveAndSaveFeedback(RIGHT, false);
00270
00271
00272 if (!isValidAction(ctr.currentY, ctr.currentX, RIGHT)) {
00273 direction = false;
00274 if (isValidAction(ctr.currentY, ctr.currentX, UP)){
00275 moveAndSaveFeedback(UP, false);
00276 }
00277 }
00278
00279
00280 } else {
00281
00282 moveAndSaveFeedback(LEFT, false);
00283
00284
00285 if (!isValidAction(ctr.currentY, ctr.currentX, LEFT)) {
00286 direction = true;
00287 if (isValidAction(ctr.currentY, ctr.currentX, UP)) {
00288 moveAndSaveFeedback(UP, false);
00289 }
00290 }
00291 }
00292 steps++;
00293
00294 }
00295
00296
00297 direction = true;
00298 steps = 0;
00299 while (steps < (ctr.states.scalY*(ctr.states.scalX-1))) {
00300
00301
00302 if (direction) {
00303
00304 moveAndSaveFeedback(DOWN, false);
00305
00306
00307 if (!isValidAction(ctr.currentY, ctr.currentX, DOWN)) {
00308 direction = false;
00309 if (isValidAction(ctr.currentY, ctr.currentX, RIGHT)) {
00310 moveAndSaveFeedback(RIGHT, false);
00311 }
00312 }
00313
00314
00315 } else {
00316
00317 moveAndSaveFeedback(UP, false);
00318
00319
00320 if (!isValidAction(ctr.currentY, ctr.currentX, UP)) {
00321 direction = true;
00322 if (isValidAction(ctr.currentY, ctr.currentX, RIGHT)) {
00323 moveAndSaveFeedback(RIGHT, false);
00324 }
00325 }
00326 }
00327 steps++;
00328 }
00329
00330
00331 steps=0;
00332 while (steps < (ctr.states.scalY*(ctr.states.scalX-1))) {
00333
00334
00335 if (direction) {
00336
00337 moveAndSaveFeedback(DOWN, false);
00338
00339
00340 if (!isValidAction(ctr.currentY, ctr.currentX, DOWN)) {
00341 direction = false;
00342 if (isValidAction(ctr.currentY, ctr.currentX, LEFT)) {
00343 moveAndSaveFeedback(LEFT, false);
00344 }
00345 }
00346
00347
00348 } else {
00349
00350 moveAndSaveFeedback(UP, false);
00351
00352
00353 if (!isValidAction(ctr.currentY, ctr.currentX, UP)) {
00354 direction = true;
00355 if (isValidAction(ctr.currentY, ctr.currentX, LEFT)) {
00356 moveAndSaveFeedback(LEFT, false);
00357 }
00358 }
00359 }
00360 steps++;
00361
00362 }
00363
00364
00365 ctr.powerOnLED(LED_EXPLORATION, false);
00366 }
00367
00371 unsigned char ValueIteration::getRandomAction () {
00372
00373 bool found = false;
00374 unsigned char randomAction;
00375 static unsigned char increment = 0;
00376
00377 do {
00378 randomAction = (rand()+increment)%NUM_ACTIONS;
00379 if (isValidAction(ctr.currentY, ctr.currentX, randomAction)) {
00380 found = true;
00381 }
00382
00383 increment++;
00384
00385 } while (!found);
00386
00387 return randomAction;
00388 }
00389
00393 void ValueIteration::gotoRandomState() {
00394
00395 unsigned char newX = rand()%ctr.states.scalX;
00396 unsigned char newY = rand()%ctr.states.scalY;
00397
00398 unsigned char oldX = ctr.currentX;
00399 unsigned char oldY = ctr.currentY;
00400
00401
00402
00403 while (oldY != newY) {
00404 if (oldY < newY) {
00405 oldY++;
00406 this->moveAndSaveFeedback(DOWN, 0);
00407 }
00408 else {
00409 oldY--;
00410 this->moveAndSaveFeedback(UP, 0);
00411 }
00412 }
00413
00414 while (oldX != newX) {
00415 if (oldX < newX) {
00416 oldX++;
00417 this->moveAndSaveFeedback(RIGHT, 0);
00418 }
00419 else {
00420 oldX--;
00421 this->moveAndSaveFeedback(LEFT, 0);
00422 }
00423 }
00424 }
00425
00427 bool ValueIteration::doExploration() {
00428
00429 float explorationProbability = 1;
00430 float dV;
00431
00432 if (this->expMode == EXPLORATION_VALUE_BASED) {
00433
00434
00435 dV = fabs ((oldValue-newValue) / (fabs(oldValue)+fabs(newValue)));
00436 explorationProbability = pow(dV, 1.0/this->expParameter);
00437
00438 } else if (this->expMode == EXPLORATION_EGREEDY) {
00439 explorationProbability = this->expParameter;
00440 }
00441
00442 this->currentExpProbability = explorationProbability;
00443
00444
00445 if ((rand()%100) > (explorationProbability*100)) {
00446 return false;
00447 } else {
00448 return true;
00449 }
00450 }
00451
00455 char ValueIteration::moveAndSaveFeedback (unsigned char _action, bool _activateRandomMoving) {
00456
00457 char _feedback;
00458 unsigned char oldX = ctr.currentX;
00459 unsigned char oldY = ctr.currentY;
00460 unsigned char randomAction=0;
00461
00462 unsigned char nextAction = _action;
00463
00464 STATE curState;
00465 ctr.states.getState(ctr.currentY, ctr.currentX, &curState);
00466
00467 ctr.powerOnLED(LED_WALK, true);
00468
00469
00470 if (ctr.getSwitchState(SWITCH_INV_FB_0)==0) {
00471 fb.invertSensor(FEEDBACK_SENSOR_0, true);
00472 } else {
00473 fb.invertSensor(FEEDBACK_SENSOR_0, false);
00474 }
00475
00476
00477
00478 if (_activateRandomMoving && doExploration()) {
00479
00480 ctr.powerOnLED(LED_EXPLORATION, true);
00481 ctr.powerOnLED(LED_WALK, true);
00482
00483 if (ctr.getSwitchState(SWITCH_RANDOM_STATE)==0) {
00484 nextAction = getRandomAction();
00485 this->moveAndSaveFeedback(nextAction, 0);
00486 } else {
00487 this->gotoRandomState();
00488 }
00489
00490 ctr.powerOnLED(LED_EXPLORATION, false);
00491 ctr.powerOnLED(LED_WALK, false);
00492 return 0;
00493 }
00494
00495
00496
00497 while (!isValidAction(ctr.currentY, ctr.currentX, nextAction)) {
00498 nextAction = getRandomAction();
00499
00500 }
00501
00502
00503
00504 if (nextAction == UP) {
00505 ctr.currentY--;
00506
00507 } else if (nextAction == DOWN) {
00508 ctr.currentY++;
00509
00510 } else if (nextAction == LEFT) {
00511 ctr.currentX--;
00512
00513 } else if (nextAction == RIGHT) {
00514 ctr.currentX++;
00515
00516 } else {
00517 ctr.blinkLED((1<<LED_WALK)|(1<<LED_POWER), 100, 250);
00518 }
00519
00520
00521 ctr.disableV24Interrupt();
00522 fb.enableSensor();
00523
00524
00525 if (oldX != ctr.currentX) ctr.servo_x.setPosition(ctr.currentX, ctr.states.scalX);
00526 if (oldY != ctr.currentY) ctr.servo_y.setPosition(ctr.currentY, ctr.states.scalY);
00527
00528
00529 if (nextAction == UP || nextAction == DOWN) {
00530 ctr.msDelay (
00531 (ctr.getSwitchState(SWITCH_RAM) ?
00532 (((int)(DELAY_TIME*0.66)) / (int)ctr.states.scalY) :
00533 (((int)DELAY_TIME) / (int)ctr.states.scalY))
00534 );
00535 }
00536 else if (nextAction == LEFT || nextAction == RIGHT) {
00537 ctr.msDelay (
00538 (ctr.getSwitchState(SWITCH_RAM) ?
00539 (((int)(DELAY_TIME*0.66)) / (int)ctr.states.scalX) :
00540 (((int)DELAY_TIME) / (int)ctr.states.scalX))
00541 );
00542 }
00543
00544
00545
00546 fb.disableSensor();
00547 ctr.enableV24Interrupt();
00548 _feedback = fb.getFeedback();
00549
00550
00551 srand (rand()+1+_feedback);
00552
00553 this->distanceCovered += _feedback;
00554 ctr.states.setFeedback(oldY, oldX, nextAction, _feedback);
00555
00556
00557 ctr.powerOnLED(LED_EXPLORATION, false);
00558 ctr.powerOnLED(LED_WALK, false);
00559
00560 return _feedback;
00561 }
00562
00563
00567 bool ValueIteration::isValidAction(unsigned char _row, unsigned char _column, unsigned char _action) {
00568
00569 if (_action != UP && _action != DOWN && _action != LEFT && _action != RIGHT) return false;
00570 else if (_action == UP && _row == 0) return false;
00571 else if (_action == DOWN && _row >= (ctr.states.scalY-1)) return false;
00572 else if (_action == LEFT && _column == 0) return false;
00573 else if (_action == RIGHT && _column >= (ctr.states.scalX-1)) return false;
00574
00575 return true;
00576 }
00577
00578 unsigned short ValueIteration::get_seed()
00579 {
00580 unsigned short seed = 0;
00581 unsigned short *p = (unsigned short*) (RAMEND+1);
00582 extern unsigned short __heap_start;
00583
00584 while (p >= &__heap_start + 1)
00585 seed ^= * (--p);
00586
00587 return seed;
00588 }
00589