#include #include #define STATES 5 #define ACTIONS 2 double actions[ACTIONS][STATES][STATES] = { { { 0.0, 0.0, 1.0, 0.0, 0.0 }, { 0.1, 0.0, 0.0, 0.9, 0.0 }, { 1.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 1.0 }, { 1.0, 0.0, 0.0, 0.0, 0.0 } }, { { 0.0, 1.0, 0.0, 0.0, 0.0 }, { 1.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 1.0 }, { 0.0, 0.0, 1.0, 0.0, 0.0 }, { 0.0, 0.0, 1.0, 0.0, 0.0 } } }; double reward[ACTIONS][STATES] = { { 1.0, 0.0, 0.0, 5.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0 } }; #define DISCOUNT 0.6 #define EPSILON 0.01 /* Store the value function for MAXITERATIONS iterations. */ #define MAXITERATIONS 10000 double values[MAXITERATIONS][STATES]; void showvalues(int ite) { int i; printf("%i:",ite); for(i=0;i maxval) { action = j; maxval = val; } } // printf("state %i action %i.\n",i,action); values[iteration][i] = maxval; if(fabs(values[iteration][i]-values[iteration-1][i]) > delta) delta = fabs(values[iteration][i]-values[iteration-1][i]); } showvalues(iteration); } while (delta > EPSILON*(1-DISCOUNT)/2*DISCOUNT); /* See which action produce the policy. */ for(i=0;i