#include #include #define STATES 3 #define ACTIONS 2 double actions[ACTIONS][STATES][STATES] = { { { 0.0, 0.9, 0.1 }, { 1.0, 0.0, 0.0 }, { 1.0, 0.0, 0.0 } }, { { 0.0, 0.1, 0.9 }, { 0.0, 0.8, 0.2 }, { 0.0, 0.2, 0.8 } } }; double reward[ACTIONS][STATES] = { { 0.0, 10.0, 1.0 }, { 0.0, 10.0, 1.0 } }; #define ITERATIONS 20 #define DISCOUNT 0.5 double values[ITERATIONS][STATES]; void showvalues(int ite) { int i; printf("%i:",ite); for(i=0;i maxval) { action = j; maxval = val; } } // printf("state %i action %i.\n",i,action); values[iteration][i] = maxval; } showvalues(iteration); } /* See which action produce the policy. */ for(i=0;i