00001 #include "valueIteration.h"
00002 #include "valueIterationUmgebung.h"
00003 #include "gui_debug.h"
00004 #include <stdio.h>
00005 #include <stdlib.h>
00006 #include <math.h>
00007
00008 extern int xPos, yPos;
00009 double value[XMAX][YMAX];
00010 double feedback[XMAX][YMAX][NUMBEROFACTIONS];
00011
00012
00016 void ValueIterationThread::printMatrix() {
00017
00018 int x=0, y=0;
00019 char cur_mode = this->getModus();
00020 this->setModus(cur_mode);
00021 double in[2], out;
00022 double minValue=100000, maxValue=0;
00023
00024 if ((cur_mode == 'l' && (
00025 speed >= 10 ||
00026 (speed==0 && (iteration%1000)==0)||
00027 (speed>0 && speed<10 && (iteration%100)==0)))
00028 || cur_mode=='i' || cur_mode=='p' || this->netOn==true ) {
00029
00030 for(y=0; y < scalY; y++) {
00031 for(x=0; x < scalX; x++) {
00032 this->printMutex->lock();
00033
00034 if(netOn) {
00035 in[0] = x;
00036 in[1] = y;
00037 network->getNetValue(in, &out);
00038 if (out < minValue) minValue=out;
00039 if (out > maxValue) maxValue=out;
00040 emit emitValue(y, x, out, getBestDirection(x,y));
00041 }
00042 else {
00043 if (value[x][y] < minValue) minValue=value[x][y];
00044 if (value[x][y] > maxValue) maxValue=value[x][y];
00045 emit emitValue(y, x, value[x][y], getBestDirection(x,y));
00046 }
00047
00048 }
00049 }
00050
00051
00052 cout << "minValue=" <<minValue <<", maxValue=" <<maxValue
00053 <<", difference=" <<maxValue-minValue <<" (" <<(100*(maxValue-minValue))/maxValue <<"%)"<<endl;
00054
00055
00056 if (!netOn) {
00057 emit debugOutput (QString("displaying VI-table - iteration #") + QString::number(iteration));
00058
00059
00060 } else {
00061 emit debugOutput (
00062 "displaying network \tvi-count=" + QString::number(iteration) +
00063 ", bp-count=" + QString::number (this->network_training_result->total_epochs) +
00064 ", last error=" + QString::number(this->network_training_result->error) );
00065 }
00066 }
00067 }
00068
00075 bool ValueIterationThread::testBorders(int action, int _x, int _y) {
00076 switch(action) {
00077 case DOWN: {
00078 if(_y >= (scalY-1)) return false;
00079 break;
00080 }
00081 case UP: {
00082 if(_y <= 0) return false;
00083 break;
00084 }
00085 case RIGHT: {
00086 if(_x >= (scalX-1)) return false;
00087 break;
00088 }
00089 case LEFT: {
00090 if(_x <= 0) return false;
00091 break;
00092 }
00093 }
00094 return true;
00095 }
00096
00103 int ValueIterationThread::newX(int action, int xOld) {
00104 switch(action) {
00105 case RIGHT: {
00106 return (xOld + 1);
00107 break;
00108 }
00109 case LEFT: {
00110 return (xOld - 1);
00111 break;
00112 }
00113 }
00114 return xOld;
00115 }
00116
00123 int ValueIterationThread::newY(int action, int yOld) {
00124 switch(action) {
00125 case DOWN: {
00126 return (yOld + 1);
00127 break;
00128 }
00129 case UP: {
00130 return (yOld - 1);
00131 break;
00132 }
00133 }
00134 return yOld;
00135 }
00136
00144 long double ValueIterationThread::evaluate(int action, int col, int row) {
00145 int xNew, yNew;
00146 long double dbl_value;
00147 double in[2], Feedback, out;
00148
00149 xNew = newX(action, col);
00150 yNew = newY(action, row);
00151
00152 Feedback = feedback[col][row][action];
00153
00154
00155 if(netOn) {
00156 in[0] = xNew;
00157 in[1] = yNew;
00158 network->getNetValue(in, &out);
00159 dbl_value = Feedback + (long double)(this->gamma * out);
00160 } else {
00161 dbl_value = Feedback + (long double)(this->gamma * value[xNew][yNew]);
00162 }
00163
00164
00165 if (netOn) {
00166 if (dbl_value > (this->maxReward * (1.101/(1.0-this->gamma)))) {
00167 printf ("ADJUSTING OVERSIZED NET VALUE (y=%d, x=%d) FROM %f TO %f\n", yNew, xNew, (double)dbl_value, (this->maxReward * (1/(1-this->gamma))));
00168 dbl_value=(this->maxReward * (1/(1-this->gamma)))*0.9;
00169 }
00170 }
00171 return dbl_value;
00172 }
00173
00180 int ValueIterationThread::getBestDirection(int _x, int _y) {
00181
00182
00183 long double value, maximum = -100;
00184 bool improved = false;
00185 int best = 4;
00186
00187 for(int action=0; action < NUMBEROFACTIONS; action++) {
00188 if( !testBorders(action, _x, _y) ) continue;
00189 value = evaluate(action, _x, _y);
00190 if(value > maximum) {
00191 maximum = value;
00192 best = action;
00193 improved = true;
00194 }
00195 }
00196
00197 return best;
00198 }
00199
00203 void ValueIterationThread::setValues() {
00204 long double dbl_value, maximum = 0;
00205
00206
00207
00208 if (this->randomStates!=NULL) delete this->randomStates;
00209 this->randomStates = new int[this->trainRandomValues];
00210 int uniqueStatesCount=0, iTest;
00211 bool found=false;
00212 while (uniqueStatesCount < this->trainRandomValues) {
00213 found = false;
00214 this->randomStates[uniqueStatesCount] = (int)((float)((float)rand() / RAND_MAX) * (float)(this->scalX * this->scalY));
00215
00216
00217 for (iTest=0; iTest<uniqueStatesCount; iTest++) {
00218 if (this->randomStates[iTest] == this->randomStates[uniqueStatesCount]) {
00219 found=true;
00220 iTest = uniqueStatesCount;
00221 }
00222 }
00223
00224 if (!found) {
00225 uniqueStatesCount++;
00226 }
00227 }
00228 cout <<endl <<endl;
00229
00230
00231
00232 int x, y;
00233
00234
00235 for (int s=0; s<this->trainRandomValues; s++) {
00236 y=(this->randomStates[s]/scalX);
00237 x=this->randomStates[s] -(y*scalY);
00238 for(int action=0; action<NUMBEROFACTIONS; action++) {
00239 if( !testBorders(action, x, y) ) continue;
00240 dbl_value = evaluate(action, x, y);
00241 if(dbl_value > maximum) maximum = dbl_value;
00242 }
00243
00244 value[x][y] = (long double) maximum;
00245
00246 maximum = 0;
00247 steps++;
00248 }
00249
00250 }
00251
00257 void ValueIterationThread::learn(int _speed, long double _gamma) {
00258
00259 this->setSpeed (_speed);
00260
00261 this->gamma = _gamma;
00262 char _input;
00263 double in[2];
00264
00265 while( (_input = this->getModus()) != 'q' ) {
00266
00267 switch (_input) {
00268
00269
00270
00271
00272
00273 case 'l':
00274
00275 case 'i':
00276 this->netOn = false;
00277
00278 printf ("performing value iteration\n");
00279 setValues();
00280
00281 iteration++;
00282 printMatrix();
00283
00284
00285 if (_input == 'l') {
00286 usleep(speed * 10000);
00287
00288
00289 } else {
00290 setModus('q');
00291 }
00292
00293 break;
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303 case 'B':
00304
00305 case 'b':
00306
00307 this->netOn = true;
00308 printf ("performing value iteration\n");
00309 setValues();
00310 iteration++;
00311 printf ("start training network ...\n");
00312 this->coachNet();
00313
00314
00315 if (_input == 'B') {
00316 usleep(speed * 10000);
00317 this->printMatrix();
00318
00319
00320 } else {
00321 cout <<"abbrechen" <<endl;
00322 setModus('p');
00323 }
00324
00325 break;
00326
00327
00328 case 'c':
00329
00330 printf ("clearing table ...\n");
00331 for(int y=0; y<scalY; y++) {
00332 for(int x=0; x<scalX; x++) {
00333 value[x][y] = 0.00000;
00334 emit emitValue (y, x, value[x][y], 5);
00335 }
00336 }
00337 printf ("setting random net-values\n");
00338
00339
00340 delete (network);
00341
00342 network = new Network(2, 1, this->iHiddenLayers+3, this->iHiddenUnits, "valueIteration");
00343 this->initNet();
00344 delete (network);
00345
00346
00347 network = new Network(2, 1, this->iHiddenLayers+2, this->iHiddenUnits, "valueIteration");
00348
00349 this->initNet();
00350
00351 iteration = 0;
00352 printf ("table cleared\n");
00353 printMatrix();
00354
00355
00356 setModus('q');
00357
00358 calculatetrainValues = false;
00359 break;
00360
00361
00362 case 'k':
00363
00364 for(int y=0; y<scalY; y++) {
00365 for(int x=0; x<scalX; x++) {
00366 in[0] = x;
00367 in[1] = y;
00368 network->getNetValue(in, &value[x][y]);
00369 emit emitValue (y, x, value[x][y], getBestDirection(x,y));
00370 }
00371 }
00372
00373
00374 setModus('q');
00375 break;
00376
00377
00378 case 'p':
00379 printMatrix();
00380
00381
00382 setModus('q');
00383
00384 sleep(1);
00385 break;
00386
00387 default:
00388 printMatrix();
00389 DBG();
00390 printf ("INPUT=%c\n", _input);
00391 sleep(1);
00392 break;
00393 }
00394 }
00395
00396 printf ("Value-Iteration - learn() - finished()\n");
00397 }
00398
00399
00400
00401
00402
00406 ValueIterationThread::~ValueIterationThread() {
00407 delete (network);
00408 }
00409
00414 void ValueIterationThread::setTrainRandomValues(int _trainRandomValues) {
00415 this->trainRandomValues = _trainRandomValues;
00416 }
00417
00427 ValueIterationThread::ValueIterationThread(int _x, int _y, double _r, int _g, int _iHiddenLayers, int _iHiddenUnits) {
00428
00429
00430 srand( (unsigned int) time(NULL));
00431
00432 this->auto_adjust_error=false;
00433 this->skipNextPrintMatrix = false;
00434 this->scalX = _x;
00435 this->scalY = _y;
00436 this->trainRandomValues = _x*_y;
00437
00438 this->maxReward = 1;
00439 this->minReward = -1;
00440 this->randomStates = NULL;
00441
00442
00443 for (int x=0; x<this->scalX; x++) {
00444 for (int y=0; y<this->scalY; y++) {
00445 value[x][y] = 0;
00446 }
00447 }
00448
00449 this->printMutex = new QMutex;
00450 this->inputMutex = new QMutex;
00451 this->explored = false;
00452 this->iteration = 0;
00453 this->netOn = calculatetrainValues = false;
00454
00455 this->setSpeed (_g);
00456 this->setModusFromExtern(0);
00457 this->setGamma (_r);
00458 printf ("scalX=%d, scalY=%d\n", scalX, scalY);
00459
00460 this->iHiddenUnits = _iHiddenUnits;
00461 this->iHiddenLayers = _iHiddenLayers;
00462
00463 network = new Network(2, 1, _iHiddenLayers+2, _iHiddenUnits, "valueIteration");
00464 this->initNet();
00465 }
00466
00479 void ValueIterationThread::setNetworkParameters (int _max_epochs, int _iterations_between_report, double _desired_error, double _dEta, bool _auto_adjust_error, double _activation_stepness_hidden, double _activation_stepness_output, int _activation_function_hidden, int _activation_function_output ) {
00480 this->network->setParameters (_max_epochs, _iterations_between_report, _desired_error, _dEta, _activation_stepness_hidden, _activation_stepness_output, _activation_function_hidden, _activation_function_output);
00481 this->auto_adjust_error = _auto_adjust_error;
00482 }
00483
00484
00488 void ValueIterationThread::run() {
00489 learn (this->speed, this->gamma);
00490 }
00491
00496 void ValueIterationThread::setGamma (double _gamma) {
00497 printf ("Value-Iteration - new gamma (%f)\n", _gamma);
00498 this->gamma = _gamma;
00499 if (netOn) {
00500 network->setMinMaxInputOutputValues(0, scalX > scalY ? (scalX-1) : (scalY-1), this->minReward/(1-this->gamma), this->maxReward/(1-this->gamma));
00501 }
00502 }
00503
00510 void ValueIterationThread::setViValue (int _row, int _col, double _value) {
00511 printf ("Value-Iteration - new Value {%d, %d} = %f\n", _row, _col, _value);
00512 value[_col][_row] = _value;
00513 }
00514
00519 void ValueIterationThread::setSpeed (int _g) {
00520 printf ("Value-Iteration - new speed (%d)\n", _g);
00521 speed = _g;
00522 }
00523
00528 void ValueIterationThread::setModus (char _x) {
00529 printf ("Value-Iteration - intenal modus changed to: '%c'\n", _x);
00530 this->in = _x;
00531 }
00532
00537 void ValueIterationThread::setModusFromExtern (char _x) {
00538 printf ("Value-Iteration - extern modus: '%c'\n", _x);
00539 this->in_ext = _x;
00540 }
00541
00546 char ValueIterationThread::getModus () {
00547
00548 char in_tmp;
00549
00550 if (this->in_ext != 0) {
00551 in_tmp = this->in_ext;
00552 this->in = this->in_ext;
00553
00554 this->in_ext = 0;
00555 printf ("returning extern modus: '%c'\n", in_tmp);
00556 return in_tmp;
00557 } else {
00558 printf ("returning internal modus: '%c'\n", this->in);
00559 return this->in;
00560 }
00561 }
00562
00566 void ValueIterationThread::unlockMutex() {
00567 this->printMutex->unlock();
00568 }
00569
00579 void ValueIterationThread::setFeedback(int row, int _col, double _up, double _down, double _left, double _right) {
00580 feedback[_col][row][UP] = _up;
00581 feedback[_col][row][DOWN] = _down;
00582 feedback[_col][row][LEFT] = _left;
00583 feedback[_col][row][RIGHT] = _right;
00584 }
00585
00586
00587
00591 void ValueIterationThread::deactivateNet() {
00592 this->netOn = false;
00593 printf ("Value Iteration - neural network is temporarily deactivated\n");
00594 }
00595
00599 void ValueIterationThread::activateNet() {
00600 this->netOn = true;
00601 printf ("Value Iteration - neural network is activated\n");
00602 }
00603
00607 void ValueIterationThread::initNet() {
00608
00609 this->network->setMinMaxInputOutputValues(
00610 0,
00611 scalX > scalY ? (scalX-1) : (scalY-1),
00612 this->minReward/(1-this->gamma),
00613 this->maxReward/(1-this->gamma));
00614
00615
00616 this->network->clear();
00617
00618
00619 this->network_training_result = new NETWORKTRAININGRESULTS();
00620
00621 }
00622
00626 void ValueIterationThread::coachNet() {
00627 double in[2];
00628 int newMaxReward=0, newMinReward=0;
00629 int x, y;
00630
00631
00632 network->createNewTrainingDataFile();
00633
00634
00635 cout <<"checking for (minReward, maxReward) = (" <<minReward <<", " <<maxReward <<")" <<endl;
00636 for(int y=0; y<scalY; y++) {
00637 for(int x=0; x<scalX; x++) {
00638 for(int aktion=0; aktion<NUMBEROFACTIONS; aktion++) {
00639
00640 if (feedback[x][y][aktion] > newMaxReward) {
00641 newMaxReward = (int)feedback[x][y][aktion];
00642
00643
00644 } else if (feedback[x][y][aktion] < newMinReward) {
00645 newMinReward = (int)feedback[x][y][aktion];
00646 }
00647 }
00648 }
00649 }
00650
00651
00652 if ((newMaxReward != this->maxReward) || (newMinReward != this->minReward)) {
00653 cout <<"adjusting (minReward, maxReward) to: ("
00654 <<newMinReward <<", " <<newMaxReward <<")" <<endl;
00655 this->maxReward = newMaxReward;
00656 this->minReward = newMinReward;
00657 this->initNet();
00658 }
00659 for (int s=0; s<this->trainRandomValues; s++) {
00660
00661 y=(this->randomStates[s]/scalX);
00662 x=this->randomStates[s] -(y*scalY);
00663
00664
00665 in[0] = x;
00666 in[1] = y;
00667 network->addTrainingData(in, &value[x][y]);
00668 }
00669
00670
00671 network->trainNet(network_training_result);
00672
00673 emit debugOutput (
00674 "train net \tvi-count=" + QString::number(iteration) +
00675 ", bp-count=" + QString::number(this->network_training_result->total_epochs) +
00676 ", error=" + QString::number(this->network_training_result->error));
00677
00678 if (this->auto_adjust_error) {
00679
00680
00681
00682
00683
00684
00685 if (this->network_training_result->last_epochs == this->network_training_result->max_epochs) {
00686 emit emitNewNetworkError ( this->network_training_result->error );
00687 network->setDesiredError ( this->network_training_result->error );
00688
00689 } else if (this->network_training_result->last_epochs <= (this->network_training_result->max_epochs*(5.0/100.0))) {
00690 emit emitNewNetworkError ( this->network_training_result->trainError/2.0 );
00691 network->setDesiredError ( this->network_training_result->trainError/2.0 );
00692 }
00693 }
00694 }
00695