1Cademy - Python Code for Classification and Decision Tree

Learn Before

Classification Tree

Code

Python Code for Classification and Decision Tree

from sklearn.externals.six import StringIO  
from IPython.display import Image  
from sklearn.tree import export_graphviz
import pydotplus

#split into train and test 
df_iris_train,df_iris_test = train_test_split(df_iris, petal_width > 5.5)

#build and train the classifier
dt_model = sktree.DecisionTreeClassifier(max_depth=1,
                                         criterion='entropy')
#given first 4 columns, learn the species
dt_model.fit(df_iris_train.ix[:,:4],df_iris_train.species)

dot_data = StringIO()
export_graphviz(dt_model, out_file=dot_data,  
                filled=True, rounded=True,
                special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())  
Image(graph.create_png())

#Deploying a decision tree
#this is testing the model 
predicted_labels = dt_model.predict(df_iris_test.ix[:,:4])
df_iris_test['predicted_label_tree'] = predicted_labels
df_iris_test.sample(10)

#Determine the overall accuracy
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(df_iris_test.species, predicted_labels)
print("Accuracy: {0:.2%}".format(accuracy))

#utility class to compare the predictions versus ground truth
def comparePlot(input_frame,real_column,predicted_column):
    df_a = input_frame.copy()
    df_b = input_frame.copy()
    df_a['label_source'] = 'Species'
    df_b['label_source'] = 'Classifier'
    df_a['label'] = df_a[real_column]
    df_b['label'] = df_b[predicted_column].apply(lambda x: 'Predict %s'%x)
    df_c = pd.concat((df_a, df_b), axis=0, ignore_index=True)
    sns.lmplot(x='sepal_length', y='sepal_width', col='label_source',
               hue='label', data=df_c, fit_reg=False, size=4);

#see the difference
comparePlot(df_iris_test,"species","predicted_label_tree")

Updated 2020-03-05

Contributors are:

Who are from:

University of Michigan - Ann Arbor

🏆 6

Learn Before

Related