#include #define STATES 3 #define ACTIONS 2 double actions[ACTIONS][STATES][STATES] = { { { 0.0, 0.9, 0.1 }, { 1.0, 0.0, 0.0 }, { 1.0, 0.0, 0.0 } }, { { 0.0, 0.1, 0.9 }, { 0.0, 0.8, 0.2 }, { 0.0, 0.2, 0.8 } } }; double reward[ACTIONS][STATES] = { { 0.0, 10.0, 1.0 }, { 0.0, 10.0, 1.0 } }; int policy[ACTIONS]; #define ITERATIONS 20 #define DISCOUNT 0.5 double values[2][STATES]; void showvalues(int ite) { int i; printf("%i:",ite); for(i=0;i bestval) { bestval = val; bestaction = j; } } if(policy[i] != bestaction) change = 1; /* Policy got improved. */ policy[i] = bestaction; printf("state %i has action %i.\n",i,bestaction); } } while(change); }