# Code from Chapter 13 of Machine Learning: An Algorithmic Perspective (2nd Edition) # by Stephen Marsland (http://stephenmonika.net) # You are free to use, change, or redistribute the code in any way you wish for # non-commercial purposes, but please maintain the name of the original author. # This code comes with no warranty of any kind. # Stephen Marsland, 2014 import numpy as np import dtree class randomforest: """The random forest algorithm based on the decision tree of Chapter 6""" def __init__(self): """ Constructor """ self.tree = dtree.dtree() def rf(self,data,targets,features,nTrees,nSamples,nFeatures,maxlevel=5): nPoints = np.shape(data)[0] nDim = np.shape(data)[1] self.nSamples = nSamples self.nTrees = nTrees classifiers = [] for i in range(nTrees): print i # Compute bootstrap samples samplePoints = np.random.randint(0,nPoints,(nPoints,nSamples)) for j in range(nSamples): sample = [] sampleTarget = [] for k in range(nPoints): sample.append(data[samplePoints[k,j]]) sampleTarget.append(targets[samplePoints[k,j]]) # Train classifiers classifiers.append(self.tree.make_tree(sample,sampleTarget,features,maxlevel,forest=nFeatures)) return classifiers def rfclass(self,classifiers,data): decision = [] # Majority voting for j in range(len(data)): outputs = [] #print data[j] for i in range(self.nTrees): out = self.tree.classify(classifiers[i],data[j]) if out is not None: outputs.append(out) # List the possible outputs out = [] for each in outputs: if out.count(each)==0: out.append(each) frequency = np.zeros(len(out)) index = 0 if len(out)>0: for each in out: frequency[index] = outputs.count(each) index += 1 decision.append(out[frequency.argmax()]) else: decision.append(None) return decision