00001 #ifndef __LERNEN_H__ 00002 #define __LERNEN_H__ 00003 00004 //#include "Environment.h" 00005 #include "StateSpace.h" 00006 00007 // functions and constants for PRNG 00008 #define PMRAND_MIN 1 // Min. output 00009 #define PMRAND_MAX 2147483647 // Max. output 00010 #define a 48271 00011 #define m 2147483647 00012 #define q (m / a) 00013 #define r (m % a) 00014 00015 class ValueIteration { 00016 00017 public: 00018 ValueIteration (ENV_POS* d); 00019 ~ValueIteration(); 00020 00021 void doNextAction(); 00022 void exploreAllFeedbacks(bool wait5Seconds); 00023 signed long int getDistanceCovered(); 00024 void resetDistanceCounter(); 00025 unsigned long int getIterationCount(); 00026 char moveAndSaveFeedback (unsigned char _action, bool _activateRandomMoving, ENV_POS* successorState); 00027 void valueIterateAllStates(); 00028 00029 float gamma; 00030 float expParameter; 00031 unsigned char expMode; 00032 float currentExpProbability; 00033 00035 Environment* env; 00036 00038 StateSpace* states; 00039 00040 private: 00041 unsigned char getGreedyAction(ENV_POS* pos); 00042 bool doExploration(void); 00043 float evaluateAction (ENV_POS* pos, unsigned char _action); 00044 unsigned short get_seed(); 00045 unsigned char getRandomAction(); 00046 void gotoRandomState(); 00047 //void init(); 00048 00049 unsigned char random_byte; 00050 unsigned long int iteration; 00051 signed long int distanceCovered; 00052 00053 double oldValue; 00054 double newValue; 00055 }; 00056 00057 #endif