随机森林n_estimators 学习曲线

科技资讯 投稿 6400 0 评论

随机森林n_estimators 学习曲线

随机森林

单颗树与随机森林的的分对比

# 导入包
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
# 实例化红酒数据集
wine = load_wine(
# 划分测试集和训练集
x_train, x_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.3
# 实例化决策树和随机森林,random_state=0
clf = DecisionTreeClassifier(random_state=0
rfc = RandomForestClassifier(random_state=0
# 训练模型
clf.fit(x_train, y_train
rfc.fit(x_train, y_train
RandomForestClassifier(random_state=0
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
# 返回测试集的分
clf_score = clf.score(x_test, y_test
rfc_score = rfc.score(x_test, y_test
print("sinle tree: {0}\nrandom tree: {1}".format(clf_score, rfc_score
sinle tree: 0.9074074074074074
random tree: 0.9629629629629629

单颗树与随机森林在交叉验证下的对比图

# 导入交叉验证和画图工具
%matplotlib inline
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
# 实例化决策树和随机森林
clf = DecisionTreeClassifier(
rfc = RandomForestClassifier(n_estimators=25 #创建25棵树组成的随机森林
# 实例化交叉验证 10次
clf_corss = cross_val_score(clf, wine.data, wine.target, cv=10
rfc_corss = cross_val_score(rfc, wine.data, wine.target, cv=10
# 查看决策树和随机森林的最好结果
print("single tree mean socre: {}\nrandom tree mean socre {}".format(clf_corss.mean(, rfc_corss.mean(
single tree mean socre: 0.8705882352941178
random tree mean socre 0.9722222222222221
# 画出决策树和随机森林对比图
plt.plot(range(1, 11, clf_corss, label="single tree"
plt.plot(range(1, 11, rfc_corss, label="random tree"
plt.xticks(range(1, 11
plt.legend(
<matplotlib.legend.Legend at 0x7ff6f4815d50>

clf_corss = cross_val_score(clf, wine.data, wine.target, cv=10
clf_corss
array([0.88888889, 0.88888889, 0.72222222, 0.88888889, 0.83333333,
       0.83333333, 1.        , 0.94444444, 0.94117647, 0.76470588]
rfc_corss = cross_val_score(rfc, wine.data, wine.target, cv=10
rfc_corss
array([1.        , 1.        , 0.94444444, 0.94444444, 0.88888889,
       1.        , 1.        , 1.        , 1.        , 1.        ]

十次交叉验证下决策树和随机森林的对比

# 创建分数列表
clf_list = []
rfc_list = []
for i in range(10:
    clf = DecisionTreeClassifier(
    rfc = RandomForestClassifier(n_estimators=25
    clf_corss_mean = cross_val_score(clf, wine.data, wine.target, cv=10.mean(
    rfc_corss_mean = cross_val_score(rfc, wine.data, wine.target, cv=10.mean(
    clf_list.append(clf_corss_mean
    rfc_list.append(rfc_corss_mean
# 画出决策树和随机森林对比图
plt.plot(range(1, 11, clf_list, label="single tree"
plt.plot(range(1, 11, rfc_list, label="random tree"
plt.xticks(range(1, 11
plt.legend(
<matplotlib.legend.Legend at 0x7ff6f490f670>

n_estimators 学习曲线

# 1-200颗树的学习曲线
superpa = []
for i in range(200:
    rfc = RandomForestClassifier(n_estimators=i+1, n_jobs=-1
    rfc_cross = cross_val_score(rfc, wine.data, wine.target, cv=10.mean(
    superpa.append(rfc_cross
print(max(superpa, superpa.index(max(superpa
plt.figure(figsize=(20,8
plt.plot(range(1,201, superpa, label="rfc_cross_mean"
plt.legend(
0.9888888888888889 20





<matplotlib.legend.Legend at 0x7ff6f540f100>

编程笔记 » 随机森林n_estimators 学习曲线

赞同 (34) or 分享 (0)
游客 发表我的评论   换个身份
取消评论

表情
(0)个小伙伴在吐槽