# Code from Chapter 11 of Machine Learning: An Algorithmic Perspective (2nd Edition) # by Stephen Marsland (http://stephenmonika.net) # You are free to use, change, or redistribute the code in any way you wish for # non-commercial purposes, but please maintain the name of the original author. # This code comes with no warranty of any kind. # Stephen Marsland, 2008, 2014 # The basic TD(0) algorithm with the Cliff example import numpy as np def TDZero_cliff(): R = -np.ones((4,7,4)) R[0,:,0] = -np.inf R[:,0,3] = -np.inf R[3,:,2] = -np.inf R[:,6,1] = -np.inf R[1,1:6,0] = -100 R[0,0,1] = -100 R[0,6,3] = -100 t = np.zeros((4,7,4,2)) for i in range(4): for j in range(7): for k in range(4): if k==2: if i<3: t[i,j,k,0] = i+1 t[i,j,k,1] = j else: t[i,j,k,0] = i t[i,j,k,1] = j elif k==1: if j<6: t[i,j,k,0] = i t[i,j,k,1] = j+1 else: t[i,j,k,0] = i t[i,j,k,1] = j if i==0 and j==0: t[i,j,k,0] = 0 t[i,j,k,1] = 0 elif k==0: if i==0 and j==6: # Finished t[i,j,k,0] = 0 t[i,j,k,1] = 0 if i>0: t[i,j,k,0] = i-1 t[i,j,k,1] = j else: t[i,j,k,0] = i t[i,j,k,1] = j if i==1 and 1<=j<=5: t[i,j,k,0] = 0 t[i,j,k,1] = 0 else: if j>0: t[i,j,k,0] = i t[i,j,k,1] = j-1 else: t[i,j,k,0] = i t[i,j,k,1] = j if i==0 and j==6: t[i,j,k,0] = 0 t[i,j,k,1] = 0 #print t[:,:,3,0] ,t[:,:,3,1] #Q = np.random.random_sample(np.shape(R))*0.1-0.05 Q = np.zeros(np.shape(R)) mu = 0.7 gamma = 0.4 epsilon = 0.05 nits = 0 while nits < 1000: # Pick initial state s = np.array([0,0]) #np.array([np.random.randint(4),np.random.randint(7)]) #print s, np.shape(s) #print np.shape(Q), np.shape(Q[s[0],s[1],:]) inEpisode = 1 # Stop when the accepting state is reached while inEpisode: r=-np.inf while r==-np.inf: # epsilon-greedy if (np.random.rand()