00001 #include "ValueIteration.h"
00002
00003
00004
00008 ValueIteration::ValueIteration (ENV_POS* d) {
00009
00010
00011
00012
00013
00014
00015 this->env = new Environment(d);
00016
00017
00018 srand (get_seed() + 1103515245);
00019
00020
00021 if (BoardController::getSwitchState(SWITCH_GAMMA)) {
00022 this->gamma = 0.98;
00023 } else {
00024 this->gamma = 0.9;
00025 }
00026
00027
00028 this->expMode = BoardController::getSwitchState(SWITCH_VDBE_EGREEDY);
00029
00030 this->iteration = 0;
00031 this->currentExpProbability = 1.0;
00032
00033 if (this->expMode == EXPLORATION_EGREEDY) {
00034 this->expParameter = 0.075;
00035 } else if (this->expMode == EXPLORATION_VALUE_BASED) {
00036 this->expParameter = 1;
00037 }
00038
00039 oldValue = newValue = 0.0;
00040 this->distanceCovered = 0;
00041
00042
00043 this->states = new StateSpace(env);
00044
00045
00046 }
00047
00049 ValueIteration::~ValueIteration() {
00050 delete this->states;
00051 delete this->env;
00052 }
00053
00058 unsigned long int ValueIteration::getIterationCount() {
00059 return this->iteration;
00060 }
00061
00065 signed long int ValueIteration::getDistanceCovered() {
00066 return this->distanceCovered;
00067 }
00068
00072 void ValueIteration::resetDistanceCounter() {
00073 this->distanceCovered = 0;
00074 }
00075
00079 unsigned char ValueIteration::getGreedyAction(ENV_POS* pos) {
00080
00081 float value, maximum = -100;
00082 unsigned short multipleBestActions = 0;
00083 unsigned char _action, bestAction;
00084 bool haveAction = false;
00085
00086
00087
00088 for(_action=0; _action <NUM_ACTIONS; _action++) {
00089
00090 if (this->env->isValidAction(pos, _action)) {
00091
00092
00093 value = evaluateAction (pos, _action);
00094
00095
00096 if(value > maximum) {
00097
00098 maximum = value;
00099 haveAction = true;
00100
00101
00102 bestAction = _action;
00103
00104 multipleBestActions = 0;
00105
00106 } else if (fabs(value - maximum) < 0.001) {
00107 multipleBestActions++;
00108 }
00109 }
00110 }
00111
00112
00113 if (multipleBestActions) {
00114
00115 unsigned char found=0;
00116 unsigned char randAction = rand()%(multipleBestActions+1);
00117
00118 for (_action=0; _action < NUM_ACTIONS; _action++) {
00119 if (this->env->isValidAction(pos, _action)) {
00120 if (fabs((value=evaluateAction(pos, _action)) - maximum) < 0.001) {
00121 if (found < randAction) {
00122 found++;
00123 } else {
00124 bestAction = _action;
00125 _action = NUM_ACTIONS;
00126 }
00127 }
00128 }
00129 }
00130 }
00131
00132
00133
00134 if (!haveAction) {
00135 bestAction = this->getRandomAction();
00136 BoardController::blinkLED((1<<LED_EXPLORATION), 50, 200);
00137 }
00138
00139 return bestAction;
00140 }
00141
00142
00146 void ValueIteration::valueIterateAllStates() {
00147
00148 float value, maximum = -100;
00149 unsigned char bestAction, action, _row, _column;
00150 unsigned short multipleBestActions = 0;
00151 bool haveAction = false;
00152
00153
00154 this->iteration++;
00155
00156
00157 BoardController::powerOnLED(LED_LEARN, true);
00158
00159 ENV_POS position, d;
00160 env->getDiscretization(&d);
00161
00162
00163
00164
00165
00166 for(_row=0; _row<d.y; _row++) {
00167 for(_column=0; _column<d.x; _column++) {
00168
00169 position.x = _column;
00170 position.y = _row;
00171
00172 haveAction = false;
00173 maximum = -100;
00174 multipleBestActions=0;
00175
00176
00177 for(action=0; action<NUM_ACTIONS; action++) {
00178
00179 if (this->env->isValidAction(&position, action)) {
00180
00181 value = evaluateAction (&position, action);
00182
00183
00184 if(value > maximum) {
00185
00186 maximum = value;
00187 haveAction = true;
00188
00189
00190
00191 states->setValue(&position, value);
00192
00193
00194 }
00195 }
00196 }
00197 }
00198 }
00199
00200
00201
00202
00203 BoardController::powerOnLED(LED_LEARN, false);
00204 }
00205
00206
00211 float ValueIteration::evaluateAction(ENV_POS* pos, unsigned char _action) {
00212
00213 float neighbourValue, newValue;
00214 int actionFeedback;
00215
00216 ENV_POS newPos;
00217 newPos.x = pos->x;
00218 newPos.y = pos->y;
00219
00220 if (_action == UP) newPos.y--;
00221 if (_action == DOWN) newPos.y++;
00222 if (_action == RIGHT) newPos.x++;
00223 if (_action == LEFT) newPos.x--;
00224
00225
00226 actionFeedback = states->getFeedback(pos, _action);
00227
00228
00229 neighbourValue = states->getValue(&newPos);
00230
00231
00232 newValue = (gamma * (float)neighbourValue) + (float)actionFeedback;
00233
00234 return newValue;
00235 }
00236
00237
00241 void ValueIteration::exploreAllFeedbacks(bool wait5Seconds) {
00242
00243 unsigned char i, j;
00244 unsigned char steps = 0;
00245
00246
00247
00248
00249
00250 ENV_POS pos(0, 0);
00251 env->beamRobotToState(&pos);
00252
00253 ENV_POS d;
00254 env->getDiscretization(&d);
00255
00256 bool turnFlag = true;
00257
00258
00259 if (wait5Seconds) BoardController::msDelay(5000);
00260
00261
00262
00263
00264 while (steps < ((d.y*d.x)-1)) {
00265
00266
00267
00268 if (turnFlag) {
00269
00270 moveAndSaveFeedback(RIGHT, false, &pos);
00271 steps++;
00272
00273
00274 if (!this->env->isValidAction(&pos, RIGHT)) {
00275 turnFlag = false;
00276 if (this->env->isValidAction(&pos, DOWN)) {
00277 moveAndSaveFeedback(DOWN, false, &pos);
00278 steps++;
00279 }
00280 }
00281
00282
00283 } else {
00284
00285 moveAndSaveFeedback(LEFT, false, &pos);
00286 steps++;
00287
00288
00289 if (!this->env->isValidAction(&pos, LEFT)) {
00290 turnFlag = true;
00291 if (this->env->isValidAction(&pos, DOWN)) {
00292 moveAndSaveFeedback(DOWN, false, &pos);
00293 steps++;
00294 }
00295 }
00296 }
00297 }
00298
00299
00300
00301 steps=0;
00302
00303 while (steps < ((d.y*d.x)-1)) {
00304
00305
00306
00307 if (turnFlag) {
00308
00309 moveAndSaveFeedback(RIGHT, false, &pos);
00310 steps++;
00311
00312
00313 if (!this->env->isValidAction(&pos, RIGHT)) {
00314 turnFlag = false;
00315 if (this->env->isValidAction(&pos, UP)){
00316 moveAndSaveFeedback(UP, false, &pos);
00317 steps++;
00318 }
00319 }
00320
00321
00322 } else {
00323
00324 moveAndSaveFeedback(LEFT, false, &pos);
00325 steps++;
00326
00327
00328 if (!this->env->isValidAction(&pos, LEFT)) {
00329 turnFlag = true;
00330 if (this->env->isValidAction(&pos, UP)) {
00331 moveAndSaveFeedback(UP, false, &pos);
00332 steps++;
00333 }
00334 }
00335 }
00336
00337 }
00338
00339
00340
00341
00342 turnFlag = true;
00343 steps = 0;
00344 while (steps < ((d.y*d.x)-1)) {
00345
00346
00347
00348
00349 if (turnFlag) {
00350
00351 moveAndSaveFeedback(DOWN, false, &pos);
00352 steps++;
00353
00354
00355 if (!this->env->isValidAction(&pos, DOWN)) {
00356 turnFlag = false;
00357 if (this->env->isValidAction(&pos, RIGHT)) {
00358 moveAndSaveFeedback(RIGHT, false, &pos);
00359 steps++;
00360 }
00361 }
00362
00363
00364 } else {
00365
00366 moveAndSaveFeedback(UP, false, &pos);
00367 steps++;
00368
00369
00370 if (!this->env->isValidAction(&pos, UP)) {
00371 turnFlag = true;
00372 if (this->env->isValidAction(&pos, RIGHT)) {
00373 moveAndSaveFeedback(RIGHT, false, &pos);
00374 steps++;
00375 }
00376 }
00377 }
00378 }
00379
00380
00381
00382
00383 steps=0;
00384 while (steps < ((d.y*d.x)-1)) {
00385
00386
00387
00388
00389 if (turnFlag) {
00390
00391 moveAndSaveFeedback(DOWN, false, &pos);
00392 steps++;
00393
00394
00395 if (!this->env->isValidAction(&pos, DOWN)) {
00396 turnFlag = false;
00397 if (this->env->isValidAction(&pos, LEFT)) {
00398 moveAndSaveFeedback(LEFT, false, &pos);
00399 steps++;
00400 }
00401 }
00402
00403
00404 } else {
00405
00406 moveAndSaveFeedback(UP, false, &pos);
00407 steps++;
00408
00409
00410 if (!this->env->isValidAction(&pos, UP)) {
00411 turnFlag = true;
00412 if (this->env->isValidAction(&pos, LEFT)) {
00413 moveAndSaveFeedback(LEFT, false, &pos);
00414 steps++;
00415 }
00416 }
00417 }
00418 }
00419
00420
00421
00422
00423 BoardController::powerOnLED(LED_EXPLORATION, false);
00424 }
00425
00429 unsigned char ValueIteration::getRandomAction () {
00430
00431 bool found = false;
00432 unsigned char randomAction;
00433 static unsigned char increment = 0;
00434
00435 ENV_POS pos;
00436 env->getCurrentState(&pos);
00437
00438 do {
00439 randomAction = (rand()+increment)%NUM_ACTIONS;
00440 if (this->env-> isValidAction(&pos, randomAction)) {
00441 found = true;
00442 }
00443
00444 increment++;
00445
00446 } while (!found);
00447
00448 return randomAction;
00449 }
00450
00454 void ValueIteration::gotoRandomState() {
00455
00456 ENV_POS d;
00457 env->getDiscretization(&d);
00458
00459 ENV_POS pos;
00460 env->getCurrentState(&pos);
00461
00462 unsigned char newX = rand()%d.x;
00463 unsigned char newY = rand()%d.y;
00464
00465
00466 while (pos.y != newY) {
00467 if (pos.y < newY) {
00468 this->moveAndSaveFeedback(DOWN, 0, &pos);
00469 }
00470 else {
00471 this->moveAndSaveFeedback(UP, 0, &pos);
00472 }
00473 }
00474
00475 while (pos.x!= newX) {
00476 if (pos.x < newX) {
00477 this->moveAndSaveFeedback(RIGHT, 0, &pos);
00478 }
00479 else {
00480 this->moveAndSaveFeedback(LEFT, 0, &pos);
00481 }
00482 }
00483 }
00484
00486 bool ValueIteration::doExploration() {
00487
00488 float explorationProbability = 1;
00489 float dV;
00490
00491 if (this->expMode == EXPLORATION_VALUE_BASED) {
00492
00493
00494 dV = fabs ((oldValue-newValue) / (fabs(oldValue)+fabs(newValue)));
00495 explorationProbability = pow(dV, 1.0/this->expParameter);
00496
00497 } else if (this->expMode == EXPLORATION_EGREEDY) {
00498 explorationProbability = this->expParameter;
00499 }
00500
00501 this->currentExpProbability = explorationProbability;
00502
00503
00504 if ((rand()%100) > (explorationProbability*100)) {
00505 return false;
00506 } else {
00507 return true;
00508 }
00509 }
00510
00511
00515 void ValueIteration::doNextAction() {
00516
00517 ENV_POS pos, oldPos;
00518
00519 env->getCurrentState(&pos);
00520 env->getCurrentState(&oldPos);
00521
00522
00523 this->moveAndSaveFeedback(
00524
00525 this->getGreedyAction(&pos),
00526
00527 BoardController::getSwitchState(SWITCH_ENABLE_EXPLORATION),
00528
00529 &pos
00530 );
00531
00532
00533 this->oldValue = this->states->getValue(&oldPos);
00534
00535
00536 this->valueIterateAllStates();
00537
00538
00539 this->newValue = this->states->getValue(&oldPos);
00540
00541 }
00542
00543
00547 char ValueIteration::moveAndSaveFeedback (unsigned char _action, bool _activateRandomMoving, ENV_POS* successorState) {
00548
00549 ENV_POS oldPos, d;
00550 env->getCurrentState(&oldPos);
00551 env->getDiscretization(&d);
00552
00553
00554
00555 unsigned char randomAction=0;
00556 unsigned char nextAction = _action;
00557
00558 BoardController::powerOnLED(LED_WALK, true);
00559
00560
00561 if (_activateRandomMoving && doExploration()) {
00562
00563 BoardController::powerOnLED(LED_EXPLORATION, true);
00564 BoardController::powerOnLED(LED_WALK, true);
00565
00566 if (BoardController::getSwitchState(SWITCH_RANDOM_STATE)==0) {
00567 nextAction = getRandomAction();
00568 this->moveAndSaveFeedback(nextAction, 0, successorState);
00569 } else {
00570 this->gotoRandomState();
00571 }
00572
00573 BoardController::powerOnLED(LED_EXPLORATION, false);
00574 BoardController::powerOnLED(LED_WALK, false);
00575 return 0;
00576 }
00577
00578
00579
00580 while (!this->env->isValidAction(&oldPos, nextAction)) {
00581
00582 BoardController::blinkLED((1<<LED_WALK), 10, 100);
00583 nextAction = getRandomAction();
00584 }
00585
00586
00587
00588
00589
00590 int milliDelay = 0;
00591 if (nextAction == UP || nextAction == DOWN) {
00592 milliDelay = (int)DELAY_TIME / (int)d.y;
00593 } else if (nextAction == LEFT || nextAction == RIGHT) {
00594 milliDelay = (int)DELAY_TIME / (int)d.x;
00595 }
00596
00597
00598 if (BoardController::getSwitchState(SWITCH_RAM)) {
00599 milliDelay = (int)((double)milliDelay * 0.66);
00600 }
00601
00602
00603 ACTION_RETURN actionReturn;
00604 this->env->doAction(nextAction, milliDelay, &actionReturn);
00605 successorState->x = actionReturn.state.x;
00606 successorState->y = actionReturn.state.y;
00607
00608
00609
00610
00611 srand (rand()+1+actionReturn.reward);
00612
00613 this->distanceCovered += actionReturn.reward;
00614 states->setFeedback(&oldPos, nextAction, actionReturn.reward);
00615
00616
00617 BoardController::powerOnLED(LED_EXPLORATION, false);
00618 BoardController::powerOnLED(LED_WALK, false);
00619
00620 return actionReturn.reward;
00621 }
00622
00623 unsigned short ValueIteration::get_seed()
00624 {
00625 unsigned short seed = 0;
00626 unsigned short *p = (unsigned short*) (RAMEND+1);
00627 extern unsigned short __heap_start;
00628
00629 while (p >= &__heap_start + 1)
00630 seed ^= * (--p);
00631
00632 return seed;
00633 }
00634