from sklearn import tree
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
import pandas as pd
wine = load_wine()
Xtrain, Xtest, Ytrain, Ytest = train_test_split(wine.data,wine.target,test_size=0.3)
clf = tree.DecisionTreeClassifier(criterion="entropy")
clf = clf.fit(Xtrain, Ytrain)
score = clf.score(Xtest, Ytest) #返回预测的准确度accuracy
score:0.94444444444444442
feature_name = ['酒精','苹果酸','灰','灰的碱性','镁','总酚','类黄酮','非黄烷类酚类','花青素','颜色强度','色调','od280/od315稀释葡萄酒','脯氨酸']
import graphviz
dot_data = tree.export_graphviz(clf, out_file=".\Tree.dot"
,feature_names = feature_name
,class_names=["琴酒","雪莉","贝尔摩德"]
,filled=True
,rounded=True
)
dot -Tjpg Tree.dot -o tree.jpg
查看保存在本地的 dot_data.dot 可发现,其默认字体 fontname=helvetica,只需将字体修改为支持的中文字体即可,通过正则表达式实现替换。
import re
# 打开 dot_data.dot,修改 fontname="支持的中文字体"
f = open("./Tree.dot", "r+", encoding="utf-8")
open('./Tree_utf8.dot', 'w', encoding="utf-8").write(re.sub(r'fontname=helvetica', 'fontname="Microsoft YaHei"', f.read()))
f.close()
dot -Tjpg Tree_utf8.dot -o tree1.jpg
dot -Tjpg Tree_utf8.dot -o tree2.pdf