# Code from Chapter 13 of Machine Learning: An Algorithmic Perspective (2nd Edition) # by Stephen Marsland (http://stephenmonika.net) # You are free to use, change, or redistribute the code in any way you wish for # non-commercial purposes, but please maintain the name of the original author. # This code comes with no warranty of any kind. # Stephen Marsland, 2008, 2014 # An example of bagging on the Car Safety dataset import numpy as np import dtree import bagging import randomforest tree = dtree.dtree() bagger = bagging.bagger() forest = randomforest.randomforest() data,classes,features = tree.read_data('car.data') train = data[::2][:] test = data[1::2][:] trainc = classes[::2] testc = classes[1::2] t=tree.make_tree(train,trainc,features) out = tree.classifyAll(t,test) tree.printTree(t,' ') a = np.zeros(len(out)) b = np.zeros(len(out)) d = np.zeros(len(out)) for i in range(len(out)): if testc[i] == 'good' or testc[i]== 'v-good': b[i] = 1 if out[i] == testc[i]: d[i] = 1 if out[i] == testc[i]: a[i] = 1 print "Tree" print "Number correctly predicted",np.sum(a) print "Number of testpoints ",len(a) print "Percentage Accuracy ",np.sum(a)/len(a)*100.0 print "" print "Number of cars rated as good or very good", np.sum(b) print "Number correctly identified as good or very good",np.sum(d) print "Percentage Accuracy",np.sum(d)/np.sum(b)*100.0 c=bagger.bag(train,trainc,features,100) out = bagger.bagclass(c,test) a = np.zeros(len(out)) b = np.zeros(len(out)) d = np.zeros(len(out)) for i in range(len(out)): if testc[i] == 'good' or testc[i]== 'v-good': b[i] = 1 if out[i] == testc[i]: d[i] = 1 if out[i] == testc[i]: a[i] = 1 print "-----" print "Bagger" print "Number correctly predicted",np.sum(a) print "Number of testpoints ",len(a) print "Percentage Accuracy ",np.sum(a)/len(a)*100.0 print "" print "Number of cars rated as good or very good", np.sum(b) print "Number correctly identified as good or very good",np.sum(d) print "Percentage Accuracy",np.sum(d)/np.sum(b)*100.0 f=f = forest.rf(train,trainc,features,100,200,2) out = forest.rfclass(f,test) a = np.zeros(len(out)) b = np.zeros(len(out)) d = np.zeros(len(out)) for i in range(len(out)): if testc[i] == 'good' or testc[i]== 'v-good': b[i] = 1 if out[i] == testc[i]: d[i] = 1 if out[i] == testc[i]: a[i] = 1 print "-----" print "Forest" print "Number correctly predicted",np.sum(a) print "Number of testpoints ",len(a) print "Percentage Accuracy ",np.sum(a)/len(a)*100.0 print "" print "Number of cars rated as good or very good", np.sum(b) print "Number correctly identified as good or very good",np.sum(d) print "Percentage Accuracy",np.sum(d)/np.sum(b)*100.0