#include #include #include int chooseGreedyAction(double a0, double a1); int chooseRandomAction(); double myRand(); void binaryBanditSL(double pA, double pB); void binaryBanditRL(double pA, double pB, double epsilon); int main(int argc, char** argv) { if (argc < 4) { printf("./bandit pA pB epsilon\n"); return 1; } srand(time(NULL)); double pA = atof(argv[1]); double pB = atof(argv[2]); double epsilon = atof(argv[3]); printf("Input: pA: %f, pB: %f, epsilon: %f\n", pA, pB, epsilon); binaryBanditSL(pA, pB); // supervised learning binaryBanditRL(pA, pB, epsilon); // reinforcement learning }; void binaryBanditSL(double pA, double pB) { int takeAction; double probSuccess[] = {pA, pB}; int actionCount[] = {0, 0}; int learned[] = {0, 0, 0}; // {actionA, actionB, draw} for (int i=0; i<100; i++) { actionCount[0] = 0; actionCount[1] = 0; for (int j=0; j<50; j++) { takeAction = chooseGreedyAction((double)actionCount[0], (double)actionCount[1]); // reward action if (myRand() < probSuccess[takeAction]) actionCount[takeAction]++; else actionCount[(takeAction+1)%2]++; } if (actionCount[0] > actionCount[1]) learned[0]++; else if (actionCount[1] > actionCount[0]) learned[1]++; else learned[2]++; } printf("Supervised Learning:\n"); printf("action a: %d, action b: %d, draw: %d\n", learned[0], learned[1], learned[2]); } void binaryBanditRL(double pA, double pB, double epsilon) { int takeAction; double probSuccess[] = {pA, pB}; int actionSum[] = {0, 0}; int actionCount[] = {1, 1}; int learned[] = {0, 0, 0}; // {actionA, actionB, draw} for (int i=0; i<100; i++) { actionCount[0] = 1; actionCount[1] = 1; actionSum[0] = 0; actionSum[1] = 0; for (int j=0; j<50; j++) { if (myRand() < epsilon) takeAction = chooseRandomAction(); else takeAction = chooseGreedyAction((double)actionSum[0]/actionCount[0], (double)actionSum[1]/actionCount[1]); actionCount[takeAction]++; if (myRand() < probSuccess[takeAction]) // reward = 1 else 0 actionSum[takeAction]++; } double vA = (double)actionSum[0]/actionCount[0]; double vB = (double)actionSum[1]/actionCount[1]; if (vA > vB) learned[0]++; else if (vB > vA) learned[1]++; else learned[2]++; } printf("Reinforcement Learning:\n"); printf("action a: %d, action b: %d, draw: %d\n", learned[0], learned[1], learned[2]); } int chooseGreedyAction(double a0, double a1) { if (a0 > a1) return 0; else if (a1 > a0) return 1; else { return chooseRandomAction(); } } int chooseRandomAction() { if (myRand() < 0.5) return 0; else return 1; } // random number in [0,1] double myRand() { return (double)rand()/(double)RAND_MAX; }