#include <ValueIteration.h>
Public Member Functions | |
void | doNextAction () |
void | exploreAllFeedbacks (bool wait5Seconds) |
signed long int | getDistanceCovered () |
unsigned long int | getIterationCount () |
char | moveAndSaveFeedback (unsigned char _action, bool _activateRandomMoving, ENV_POS *successorState) |
void | resetDistanceCounter () |
void | valueIterateAllStates () |
ValueIteration (ENV_POS *d) | |
~ValueIteration () | |
Data Fields | |
float | currentExpProbability |
Environment * | env |
unsigned char | expMode |
float | expParameter |
float | gamma |
StateSpace * | states |
Private Member Functions | |
bool | doExploration (void) |
float | evaluateAction (ENV_POS *pos, unsigned char _action) |
unsigned short | get_seed () |
unsigned char | getGreedyAction (ENV_POS *pos) |
unsigned char | getRandomAction () |
void | gotoRandomState () |
Private Attributes | |
signed long int | distanceCovered |
unsigned long int | iteration |
double | newValue |
double | oldValue |
unsigned char | random_byte |
Definition at line 15 of file ValueIteration.h.
ValueIteration::ValueIteration | ( | ENV_POS * | d | ) |
initializes ValueIteration with the environment
Definition at line 8 of file ValueIteration.c.
References currentExpProbability, distanceCovered, env, EXPLORATION_EGREEDY, EXPLORATION_VALUE_BASED, expMode, expParameter, gamma, get_seed(), BoardController::getSwitchState(), iteration, newValue, oldValue, states, SWITCH_GAMMA, and SWITCH_VDBE_EGREEDY.
00008 { 00009 00010 00011 //#ifdef TESTMODE 00012 //#warning VI: skipped initalization of the StateSpace 00013 //#else 00014 // create new MxN environment 00015 this->env = new Environment(d); 00016 00017 // seed for PRNG 00018 srand (get_seed() + 1103515245); 00019 00020 // set initial gamma 00021 if (BoardController::getSwitchState(SWITCH_GAMMA)) { 00022 this->gamma = 0.98; 00023 } else { 00024 this->gamma = 0.9; 00025 } 00026 00027 // set initial exploration mode 00028 this->expMode = BoardController::getSwitchState(SWITCH_VDBE_EGREEDY); 00029 00030 this->iteration = 0; 00031 this->currentExpProbability = 1.0; 00032 00033 if (this->expMode == EXPLORATION_EGREEDY) { 00034 this->expParameter = 0.075; 00035 } else if (this->expMode == EXPLORATION_VALUE_BASED) { 00036 this->expParameter = 1; 00037 } 00038 00039 oldValue = newValue = 0.0; 00040 this->distanceCovered = 0; 00041 00042 // initialize StateSpace 00043 this->states = new StateSpace(env);//(StateSpace*) malloc( sizeof(StateSpace) ); 00044 //this->states->StateSpace::StateSpace(env); 00045 //#endif 00046 }
ValueIteration::~ValueIteration | ( | ) |
bool ValueIteration::doExploration | ( | void | ) | [private] |
returns the next Action with respect to the exploration parameters
Definition at line 486 of file ValueIteration.c.
References currentExpProbability, EXPLORATION_EGREEDY, EXPLORATION_VALUE_BASED, expMode, expParameter, newValue, and oldValue.
Referenced by moveAndSaveFeedback().
00486 { 00487 00488 float explorationProbability = 1; 00489 float dV; 00490 00491 if (this->expMode == EXPLORATION_VALUE_BASED) { 00492 00493 // calculate dV 00494 dV = fabs ((oldValue-newValue) / (fabs(oldValue)+fabs(newValue))); 00495 explorationProbability = pow(dV, 1.0/this->expParameter); 00496 00497 } else if (this->expMode == EXPLORATION_EGREEDY) { 00498 explorationProbability = this->expParameter; 00499 } 00500 00501 this->currentExpProbability = explorationProbability; 00502 00503 // return whether we should explore or exploit 00504 if ((rand()%100) > (explorationProbability*100)) { 00505 return false; // exploitation 00506 } else { 00507 return true; // exploration 00508 } 00509 }
void ValueIteration::doNextAction | ( | ) |
this method selects the action with respect to the current policy
Definition at line 515 of file ValueIteration.c.
References env, Environment::getCurrentState(), getGreedyAction(), BoardController::getSwitchState(), StateSpace::getValue(), moveAndSaveFeedback(), newValue, oldValue, states, SWITCH_ENABLE_EXPLORATION, and valueIterateAllStates().
Referenced by main().
00515 { 00516 00517 ENV_POS pos, oldPos; 00518 00519 env->getCurrentState(&pos); 00520 env->getCurrentState(&oldPos); 00521 00522 // select next action - with respect on SWTICH_ENABLE_EXPLORATION 00523 this->moveAndSaveFeedback( 00524 // choose best action 00525 this->getGreedyAction(&pos), 00526 // pure greedy or exploitation/exploration 00527 BoardController::getSwitchState(SWITCH_ENABLE_EXPLORATION), 00528 // memory for new position 00529 &pos 00530 ); 00531 00532 // save old value(currentState) (for VDBE) 00533 this->oldValue = this->states->getValue(&oldPos); 00534 00535 // value iterate all states 00536 this->valueIterateAllStates(); 00537 00538 // save new value(currentState) (for VDBE) 00539 this->newValue = this->states->getValue(&oldPos); 00540 00541 }
float ValueIteration::evaluateAction | ( | ENV_POS * | pos, | |
unsigned char | _action | |||
) | [private] |
compute the successor-state value for action a in state (_row, _column) v' = feedback(a) + gamma*v(a)
Definition at line 211 of file ValueIteration.c.
References DOWN, gamma, StateSpace::getFeedback(), StateSpace::getValue(), LEFT, newValue, RIGHT, states, UP, envPosition::x, and envPosition::y.
Referenced by getGreedyAction(), and valueIterateAllStates().
00211 { 00212 00213 float neighbourValue, newValue; 00214 int actionFeedback; 00215 00216 ENV_POS newPos; 00217 newPos.x = pos->x; 00218 newPos.y = pos->y; 00219 00220 if (_action == UP) newPos.y--; 00221 if (_action == DOWN) newPos.y++; 00222 if (_action == RIGHT) newPos.x++; 00223 if (_action == LEFT) newPos.x--; 00224 00225 // get reward for action a in s 00226 actionFeedback = states->getFeedback(pos, _action); 00227 00228 // get value of successor state 00229 neighbourValue = states->getValue(&newPos); 00230 00231 // compute new value 00232 newValue = (gamma * (float)neighbourValue) + (float)actionFeedback; 00233 00234 return newValue; 00235 }
void ValueIteration::exploreAllFeedbacks | ( | bool | wait5Seconds | ) |
perform a complete exploration of the environment. (all state-transitions)
Definition at line 241 of file ValueIteration.c.
References Environment::beamRobotToState(), DOWN, env, Environment::getDiscretization(), Environment::isValidAction(), LED_EXPLORATION, LEFT, moveAndSaveFeedback(), BoardController::msDelay(), BoardController::powerOnLED(), RIGHT, UP, envPosition::x, and envPosition::y.
Referenced by main().
00241 { 00242 00243 unsigned char i, j; 00244 unsigned char steps = 0; 00245 00246 // switch on EXPLORATION LED 00247 //BoardController::powerOnLED(LED_EXPLORATION, true); 00248 00249 // go to state (0,0) 00250 ENV_POS pos(0, 0); 00251 env->beamRobotToState(&pos); 00252 00253 ENV_POS d; // discretization 00254 env->getDiscretization(&d); 00255 00256 bool turnFlag = true; 00257 00258 // delay five seconds 00259 if (wait5Seconds) BoardController::msDelay(5000); 00260 00261 //BoardController::blinkLED((1<<LED_LEARN)|(1<<LED_EXPLORATION), 4, 400); 00262 00263 // explore horizontal until (scalX, scalY) 00264 while (steps < ((d.y*d.x)-1)) { 00265 00266 //BoardController::blinkLED((1<<LED_EXPLORATION), 1, 200); 00267 // go right 00268 if (turnFlag) { 00269 00270 moveAndSaveFeedback(RIGHT, false, &pos); 00271 steps++; 00272 00273 // go down at the right end 00274 if (!this->env->isValidAction(&pos, RIGHT)) { 00275 turnFlag = false; 00276 if (this->env->isValidAction(&pos, DOWN)) { 00277 moveAndSaveFeedback(DOWN, false, &pos); 00278 steps++; 00279 } 00280 } 00281 00282 // go left 00283 } else { 00284 00285 moveAndSaveFeedback(LEFT, false, &pos); 00286 steps++; 00287 00288 // go down on the left end 00289 if (!this->env->isValidAction(&pos, LEFT)) { 00290 turnFlag = true; 00291 if (this->env->isValidAction(&pos, DOWN)) { 00292 moveAndSaveFeedback(DOWN, false, &pos); 00293 steps++; 00294 } 00295 } 00296 } 00297 } 00298 00299 //BoardController::blinkLED((1<<LED_LEARN)|(1<<LED_EXPLORATION), 4, 400); 00300 // go back horizontally from (scalX, scalY) to (0,0); 00301 steps=0; 00302 //turnFlag = !turnFlag; 00303 while (steps < ((d.y*d.x)-1)) { 00304 00305 //BoardController::blinkLED((1<<LED_EXPLORATION), 1, 200); 00306 // go right 00307 if (turnFlag) { 00308 00309 moveAndSaveFeedback(RIGHT, false, &pos); 00310 steps++; 00311 00312 // go up on the right end 00313 if (!this->env->isValidAction(&pos, RIGHT)) { 00314 turnFlag = false; 00315 if (this->env->isValidAction(&pos, UP)){ 00316 moveAndSaveFeedback(UP, false, &pos); 00317 steps++; 00318 } 00319 } 00320 00321 // go left 00322 } else { 00323 00324 moveAndSaveFeedback(LEFT, false, &pos); 00325 steps++; 00326 00327 // go up on the left end 00328 if (!this->env->isValidAction(&pos, LEFT)) { 00329 turnFlag = true; 00330 if (this->env->isValidAction(&pos, UP)) { 00331 moveAndSaveFeedback(UP, false, &pos); 00332 steps++; 00333 } 00334 } 00335 } 00336 00337 } 00338 00339 //BoardController::blinkLED((1<<LED_LEARN)|(1<<LED_EXPLORATION), 4, 400); 00340 //BoardController::blinkLED((1<<LED_LEARN)|(1<<LED_EXPLORATION), 10, 400); 00341 // explore vertically 00342 turnFlag = true; 00343 steps = 0; 00344 while (steps < ((d.y*d.x)-1)) { 00345 00346 //BoardController::blinkLED((1<<LED_EXPLORATION), 1, 200); 00347 00348 // go down 00349 if (turnFlag) { 00350 00351 moveAndSaveFeedback(DOWN, false, &pos); 00352 steps++; 00353 00354 // go left on the lower end 00355 if (!this->env->isValidAction(&pos, DOWN)) { 00356 turnFlag = false; 00357 if (this->env->isValidAction(&pos, RIGHT)) { 00358 moveAndSaveFeedback(RIGHT, false, &pos); 00359 steps++; 00360 } 00361 } 00362 00363 // go up 00364 } else { 00365 00366 moveAndSaveFeedback(UP, false, &pos); 00367 steps++; 00368 00369 // go right on the upper end 00370 if (!this->env->isValidAction(&pos, UP)) { 00371 turnFlag = true; 00372 if (this->env->isValidAction(&pos, RIGHT)) { 00373 moveAndSaveFeedback(RIGHT, false, &pos); 00374 steps++; 00375 } 00376 } 00377 } 00378 } 00379 00380 //BoardController::blinkLED((1<<LED_LEARN)|(1<<LED_EXPLORATION), 4, 400); 00381 //BoardController::blinkLED((1<<LED_LEARN)|(1<<LED_EXPLORATION), 10, 400); 00382 // go vertically backwards to (0,0); 00383 steps=0; 00384 while (steps < ((d.y*d.x)-1)) { 00385 00386 //BoardController::blinkLED((1<<LED_EXPLORATION), 1, 200); 00387 00388 // go down 00389 if (turnFlag) { 00390 00391 moveAndSaveFeedback(DOWN, false, &pos); 00392 steps++; 00393 00394 // go left on the lower end 00395 if (!this->env->isValidAction(&pos, DOWN)) { 00396 turnFlag = false; 00397 if (this->env->isValidAction(&pos, LEFT)) { 00398 moveAndSaveFeedback(LEFT, false, &pos); 00399 steps++; 00400 } 00401 } 00402 00403 // go left 00404 } else { 00405 00406 moveAndSaveFeedback(UP, false, &pos); 00407 steps++; 00408 00409 // go left on the upper end 00410 if (!this->env->isValidAction(&pos, UP)) { 00411 turnFlag = true; 00412 if (this->env->isValidAction(&pos, LEFT)) { 00413 moveAndSaveFeedback(LEFT, false, &pos); 00414 steps++; 00415 } 00416 } 00417 } 00418 } 00419 //BoardController::blinkLED((1<<LED_LEARN)|(1<<LED_EXPLORATION), 4, 400); 00420 00421 00422 // turn off FEEDBACK LED 00423 BoardController::powerOnLED(LED_EXPLORATION, false); 00424 }
unsigned short ValueIteration::get_seed | ( | ) | [private] |
Definition at line 623 of file ValueIteration.c.
Referenced by ValueIteration().
00624 { 00625 unsigned short seed = 0; 00626 unsigned short *p = (unsigned short*) (RAMEND+1); 00627 extern unsigned short __heap_start; 00628 00629 while (p >= &__heap_start + 1) 00630 seed ^= * (--p); 00631 00632 return seed; 00633 }
signed long int ValueIteration::getDistanceCovered | ( | ) |
returns the traveled distance of the crawling robot
Definition at line 65 of file ValueIteration.c.
References distanceCovered.
Referenced by V24Control::processCommand().
00065 { 00066 return this->distanceCovered; 00067 }
unsigned char ValueIteration::getGreedyAction | ( | ENV_POS * | pos | ) | [private] |
Returns the greedy action in state (_row, _column)
Definition at line 79 of file ValueIteration.c.
References BoardController::blinkLED(), env, evaluateAction(), getRandomAction(), Environment::isValidAction(), LED_EXPLORATION, and NUM_ACTIONS.
Referenced by doNextAction().
00079 { 00080 00081 float value, maximum = -100; 00082 unsigned short multipleBestActions = 0; 00083 unsigned char _action, bestAction; 00084 bool haveAction = false; 00085 00086 00087 // 1) calculate values for all valid actions 00088 for(_action=0; _action <NUM_ACTIONS; _action++) { 00089 00090 if (this->env->isValidAction(pos, _action)) { 00091 00092 // calculate value 00093 value = evaluateAction (pos, _action); 00094 00095 // check for maximum 00096 if(value > maximum) { 00097 00098 maximum = value; 00099 haveAction = true; 00100 00101 // memory the best action 00102 bestAction = _action; 00103 00104 multipleBestActions = 0; 00105 00106 } else if (fabs(value - maximum) < 0.001) { 00107 multipleBestActions++; 00108 } 00109 } 00110 } 00111 00112 // if there are multiple best actions, chose a random one 00113 if (multipleBestActions) { 00114 00115 unsigned char found=0; 00116 unsigned char randAction = rand()%(multipleBestActions+1); 00117 00118 for (_action=0; _action < NUM_ACTIONS; _action++) { 00119 if (this->env->isValidAction(pos, _action)) { 00120 if (fabs((value=evaluateAction(pos, _action)) - maximum) < 0.001) { 00121 if (found < randAction) { 00122 found++; 00123 } else { 00124 bestAction = _action; 00125 _action = NUM_ACTIONS; // terminates the for-loop 00126 } 00127 } 00128 } 00129 } 00130 } 00131 00132 00133 // 2) if there was no best action found, take random action 00134 if (!haveAction) { 00135 bestAction = this->getRandomAction(); 00136 BoardController::blinkLED((1<<LED_EXPLORATION), 50, 200); 00137 } 00138 00139 return bestAction; 00140 }
unsigned long int ValueIteration::getIterationCount | ( | ) |
returns the amount of value-iterations performed so long
Definition at line 58 of file ValueIteration.c.
References iteration.
Referenced by V24Control::processCommand().
00058 { 00059 return this->iteration; 00060 }
unsigned char ValueIteration::getRandomAction | ( | ) | [private] |
returns a valid random action
Definition at line 429 of file ValueIteration.c.
References env, Environment::getCurrentState(), and NUM_ACTIONS.
Referenced by getGreedyAction(), and moveAndSaveFeedback().
00429 { 00430 00431 bool found = false; 00432 unsigned char randomAction; 00433 static unsigned char increment = 0; 00434 00435 ENV_POS pos; 00436 env->getCurrentState(&pos); 00437 00438 do { 00439 randomAction = (rand()+increment)%NUM_ACTIONS; 00440 if (this->env-> isValidAction(&pos, randomAction)) { 00441 found = true; 00442 } 00443 00444 increment++; 00445 00446 } while (!found); 00447 00448 return randomAction; 00449 }
void ValueIteration::gotoRandomState | ( | ) | [private] |
Go to a random state and update reward table
Definition at line 454 of file ValueIteration.c.
References DOWN, env, Environment::getCurrentState(), Environment::getDiscretization(), LEFT, moveAndSaveFeedback(), RIGHT, UP, envPosition::x, and envPosition::y.
Referenced by moveAndSaveFeedback().
00454 { 00455 00456 ENV_POS d; // discretization 00457 env->getDiscretization(&d); 00458 00459 ENV_POS pos; 00460 env->getCurrentState(&pos); 00461 00462 unsigned char newX = rand()%d.x; 00463 unsigned char newY = rand()%d.y; 00464 00465 // position new state on Y-axis 00466 while (pos.y != newY) { 00467 if (pos.y < newY) { 00468 this->moveAndSaveFeedback(DOWN, 0, &pos); 00469 } 00470 else { 00471 this->moveAndSaveFeedback(UP, 0, &pos); 00472 } 00473 } 00474 // position new state on X-axis 00475 while (pos.x!= newX) { 00476 if (pos.x < newX) { 00477 this->moveAndSaveFeedback(RIGHT, 0, &pos); 00478 } 00479 else { 00480 this->moveAndSaveFeedback(LEFT, 0, &pos); 00481 } 00482 } 00483 }
char ValueIteration::moveAndSaveFeedback | ( | unsigned char | _action, | |
bool | _activateRandomMoving, | |||
ENV_POS * | successorState | |||
) |
perform _action in currentState. _activateRandomMoving defines if exploration/exploitation should be balanced by eGreedy or VDBE. Otherwise, pure greedy exploitation is performed.
Definition at line 547 of file ValueIteration.c.
References BoardController::blinkLED(), DELAY_TIME, distanceCovered, Environment::doAction(), doExploration(), DOWN, env, Environment::getCurrentState(), Environment::getDiscretization(), getRandomAction(), BoardController::getSwitchState(), gotoRandomState(), Environment::isValidAction(), LED_EXPLORATION, LED_WALK, LEFT, BoardController::powerOnLED(), actionReturn::reward, RIGHT, StateSpace::setFeedback(), actionReturn::state, states, SWITCH_RAM, SWITCH_RANDOM_STATE, UP, envPosition::x, and envPosition::y.
Referenced by doNextAction(), exploreAllFeedbacks(), and gotoRandomState().
00547 { 00548 00549 ENV_POS oldPos, d; 00550 env->getCurrentState(&oldPos); 00551 env->getDiscretization(&d); 00552 00553 //unsigned char oldX = pos.x; 00554 //unsigned char oldY = pos.y; 00555 unsigned char randomAction=0; 00556 unsigned char nextAction = _action; 00557 00558 BoardController::powerOnLED(LED_WALK, true); 00559 00560 // check whether we should explore 00561 if (_activateRandomMoving && doExploration()) { 00562 00563 BoardController::powerOnLED(LED_EXPLORATION, true); 00564 BoardController::powerOnLED(LED_WALK, true); 00565 00566 if (BoardController::getSwitchState(SWITCH_RANDOM_STATE)==0) { 00567 nextAction = getRandomAction(); 00568 this->moveAndSaveFeedback(nextAction, 0, successorState); 00569 } else { 00570 this->gotoRandomState(); 00571 } 00572 00573 BoardController::powerOnLED(LED_EXPLORATION, false); 00574 BoardController::powerOnLED(LED_WALK, false); 00575 return 0; 00576 } 00577 00578 00579 // if _action is invalid, take a (valid) random action 00580 while (!this->env->isValidAction(&oldPos, nextAction)) { 00581 00582 BoardController::blinkLED((1<<LED_WALK), 10, 100); 00583 nextAction = getRandomAction(); 00584 } 00585 00586 // disable serial interrupt 00587 //BoardController::disableV24Interrupt(); 00588 00589 // compute delay 00590 int milliDelay = 0; 00591 if (nextAction == UP || nextAction == DOWN) { 00592 milliDelay = (int)DELAY_TIME / (int)d.y; 00593 } else if (nextAction == LEFT || nextAction == RIGHT) { 00594 milliDelay = (int)DELAY_TIME / (int)d.x; 00595 } 00596 00597 // if external memory is used, delay-time can be reduced due to bus latency 00598 if (BoardController::getSwitchState(SWITCH_RAM)) { 00599 milliDelay = (int)((double)milliDelay * 0.66); 00600 } 00601 00602 // perform action 00603 ACTION_RETURN actionReturn; 00604 this->env->doAction(nextAction, milliDelay, &actionReturn); 00605 successorState->x = actionReturn.state.x; 00606 successorState->y = actionReturn.state.y; 00607 00608 //BoardController::enableV24Interrupt(); 00609 00610 // re set seed (with noise from the environment) 00611 srand (rand()+1+actionReturn.reward); 00612 00613 this->distanceCovered += actionReturn.reward; 00614 states->setFeedback(&oldPos, nextAction, actionReturn.reward); 00615 00616 // turn of EXPLORATION and WALK LEDs 00617 BoardController::powerOnLED(LED_EXPLORATION, false); 00618 BoardController::powerOnLED(LED_WALK, false); 00619 00620 return actionReturn.reward; 00621 }
void ValueIteration::resetDistanceCounter | ( | ) |
resets the distance counter
Definition at line 72 of file ValueIteration.c.
References distanceCovered.
Referenced by V24Control::processCommand().
00072 { 00073 this->distanceCovered = 0; 00074 }
void ValueIteration::valueIterateAllStates | ( | ) |
this functino performs the value-iteration algorithm for all states
Definition at line 146 of file ValueIteration.c.
References env, evaluateAction(), Environment::getDiscretization(), Environment::isValidAction(), iteration, LED_LEARN, NUM_ACTIONS, BoardController::powerOnLED(), StateSpace::setValue(), states, envPosition::x, and envPosition::y.
Referenced by doNextAction(), and main().
00146 { 00147 00148 float value, maximum = -100; 00149 unsigned char bestAction, action, _row, _column; 00150 unsigned short multipleBestActions = 0; 00151 bool haveAction = false; 00152 00153 // increment iteration number 00154 this->iteration++; 00155 00156 // switch on LEARN LED 00157 BoardController::powerOnLED(LED_LEARN, true); 00158 00159 ENV_POS position, d; 00160 env->getDiscretization(&d); // discretization 00161 00162 00163 //BoardController::blinkLED((1<<LED_LEARN), 5, 100); 00164 00165 // for each state s in S: 00166 for(_row=0; _row<d.y; _row++) { 00167 for(_column=0; _column<d.x; _column++) { 00168 00169 position.x = _column; 00170 position.y = _row; 00171 00172 haveAction = false; 00173 maximum = -100; 00174 multipleBestActions=0; 00175 00176 // 1) for each valid successor state 00177 for(action=0; action<NUM_ACTIONS; action++) { 00178 00179 if (this->env->isValidAction(&position, action)) { 00180 00181 value = evaluateAction (&position, action); 00182 00183 // memorize maximum value 00184 if(value > maximum) { 00185 00186 maximum = value; 00187 haveAction = true; 00188 00189 // update maximum value in state-space 00190 //BoardController::blinkLED((1<<LED_LEARN)|(1<<LED_EXPLORATION), 2, 300); 00191 states->setValue(&position, value); 00192 //BoardController::blinkLED((1<<LED_LEARN)|(1<<LED_WALK), 2, 300); 00193 //BoardController::powerOnLED((1<<LED_EXPLORATION)|(1<<LED_WALK), false); 00194 } 00195 } 00196 } 00197 } 00198 } 00199 00200 00201 // switch off LEARN LED 00202 //BoardController::msDelay(100); 00203 BoardController::powerOnLED(LED_LEARN, false); 00204 }
Definition at line 32 of file ValueIteration.h.
Referenced by doExploration(), V24Control::processGetDebug(), and ValueIteration().
signed long int ValueIteration::distanceCovered [private] |
Definition at line 51 of file ValueIteration.h.
Referenced by getDistanceCovered(), moveAndSaveFeedback(), resetDistanceCounter(), and ValueIteration().
Environment Object
Definition at line 35 of file ValueIteration.h.
Referenced by doNextAction(), exploreAllFeedbacks(), getGreedyAction(), getRandomAction(), gotoRandomState(), main(), moveAndSaveFeedback(), V24Control::processCommand(), V24Control::processGetGridsize(), V24Control::processGetState(), V24Control::processSetState(), valueIterateAllStates(), ValueIteration(), and ~ValueIteration().
unsigned char ValueIteration::expMode |
Definition at line 31 of file ValueIteration.h.
Referenced by doExploration(), V24Control::processCommand(), V24Control::processGetExplorationConfig(), and ValueIteration().
Definition at line 30 of file ValueIteration.h.
Referenced by doExploration(), V24Control::processCommand(), V24Control::processGetExplorationConfig(), and ValueIteration().
float ValueIteration::gamma |
Definition at line 29 of file ValueIteration.h.
Referenced by evaluateAction(), V24Control::processGetGamma(), V24Control::processSetGamma(), and ValueIteration().
unsigned long int ValueIteration::iteration [private] |
Definition at line 50 of file ValueIteration.h.
Referenced by getIterationCount(), valueIterateAllStates(), and ValueIteration().
double ValueIteration::newValue [private] |
Definition at line 54 of file ValueIteration.h.
Referenced by doExploration(), doNextAction(), evaluateAction(), and ValueIteration().
double ValueIteration::oldValue [private] |
Definition at line 53 of file ValueIteration.h.
Referenced by doExploration(), doNextAction(), and ValueIteration().
unsigned char ValueIteration::random_byte [private] |
Definition at line 49 of file ValueIteration.h.
The StateSpace Object
Definition at line 38 of file ValueIteration.h.
Referenced by doNextAction(), evaluateAction(), moveAndSaveFeedback(), V24Control::processCommand(), V24Control::processGetState(), V24Control::processSetState(), valueIterateAllStates(), ValueIteration(), and ~ValueIteration().