Learn Before
Code
Python Code for Classification and Decision Tree
from sklearn.externals.six import StringIO from IPython.display import Image from sklearn.tree import export_graphviz import pydotplus #split into train and test df_iris_train,df_iris_test = train_test_split(df_iris, petal_width > 5.5) #build and train the classifier dt_model = sktree.DecisionTreeClassifier(max_depth=1, criterion='entropy') #given first 4 columns, learn the species dt_model.fit(df_iris_train.ix[:,:4],df_iris_train.species) dot_data = StringIO() export_graphviz(dt_model, out_file=dot_data, filled=True, rounded=True, special_characters=True) graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) Image(graph.create_png()) #Deploying a decision tree #this is testing the model predicted_labels = dt_model.predict(df_iris_test.ix[:,:4]) df_iris_test['predicted_label_tree'] = predicted_labels df_iris_test.sample(10) #Determine the overall accuracy from sklearn.metrics import accuracy_score accuracy = accuracy_score(df_iris_test.species, predicted_labels) print("Accuracy: {0:.2%}".format(accuracy)) #utility class to compare the predictions versus ground truth def comparePlot(input_frame,real_column,predicted_column): df_a = input_frame.copy() df_b = input_frame.copy() df_a['label_source'] = 'Species' df_b['label_source'] = 'Classifier' df_a['label'] = df_a[real_column] df_b['label'] = df_b[predicted_column].apply(lambda x: 'Predict %s'%x) df_c = pd.concat((df_a, df_b), axis=0, ignore_index=True) sns.lmplot(x='sepal_length', y='sepal_width', col='label_source', hue='label', data=df_c, fit_reg=False, size=4); #see the difference comparePlot(df_iris_test,"species","predicted_label_tree")

0
4
Updated 2020-03-05
Tags
Data Science