背景介绍

# http://www.scikit-yb.org/zh/latest/quickstart.html
# http://www.scikit-yb.org/en/latest/quickstart.html
import pandas as pd
X = data[[
"season", "month", "hour", "holiday", "weekday", "workingday",
"weather", "temp", "feelslike", "humidity", "windspeed"
]]
y = data["riders"]
from yellowbrick.features import Rank2D
visualizer = Rank2D(algorithm="pearson")
visualizer.fit_transform(X.values)
visualizer.poof() # 在notebook显示
# visualizer.poof(outpath="pcoords.jpg",clear_figure=True)  # 输出为png、jpg格式

from yellowbrick.features import JointPlotVisualizer
visualizer = JointPlotVisualizer(feature='temp', target='feelslike')
visualizer.fit(X['temp'], X['feelslike'])
visualizer.poof()

JointPlotVisualizer 让我们能快速浏览有强相关性的特征，以及各个特征的范围和分布情况。需要注意的是图中的各个轴都已经标准化到0到1之间的值，这是机器学习中一中非常常用的减少一个特征对另一个影响的技术。

from yellowbrick.regressor import ResidualsPlot
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Create training and test sets
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.1
)

visualizer = ResidualsPlot(LinearRegression())
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.poof()

import numpy as np

from sklearn.linear_model import RidgeCV
from yellowbrick.regressor import AlphaSelection
# RidgeCV:多个阿尔法，得出多个对应最佳的w,然后得到最佳的w及对应的阿尔法
alphas = np.logspace(-10, 1, 200)
visualizer = AlphaSelection(RidgeCV(alphas=alphas))
visualizer.fit(X, y)
visualizer.poof()

alpha = visualizer.alpha_
visualizer.alpha_
3.612342699709438

from sklearn.linear_model import Ridge
from yellowbrick.regressor import PredictionError

visualizer = PredictionError(Ridge(alpha=alpha))
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.poof()