第2关:动手实现线性回归
#encoding=utf8 import numpy as np #mse def mse_score(y_predict,y_test): #********* Begin *********# # 先求预测值与真实值的逐元素误差,再平方消负并放大误差,最后用np.mean计算均值得到均方误差(MSE) mse = np.mean((y_predict-y_test)**2) #********* End *********# return mse def lr(train_feature,train_label,test_feature): ''' input: train_feature(ndarray):训练样本特征 train_label(ndarray):训练样本标签 test_feature(ndarray):测试样本特征 output: predict(ndarray):测试样本预测标签 ''' #********* Begin *********# #将x0=1加入训练数据 train_x = np.hstack([np.ones((len(train_feature),1)),train_feature]) #使用正规方程解求得参数 theta =np.linalg.inv(train_x.T.dot(train_x)).dot(train_x.T).dot(train_label) #将x0=1加入测试数据 test_x = np.hstack([np.ones((len(test_feature),1)),test_feature]) #求得测试集预测标签 predict = test_x.dot(theta) #********* End *********# return predict
# 导入sklearn官方性能指标计算函数 from sklearn.metrics import mean_squared_error, r2_score as sklearn_r2_score # sklearn实现MSE(均方误差)计算 def mse_score(y_predict, y_test): """ 基于sklearn实现均方误差(MSE)计算,用于衡量线性回归模型预测值与真实值的偏差程度 input:y_predict(ndarray):模型预测值 y_test(ndarray):真实标签值 output:mse(float):均方误差值(值越小,模型拟合效果越好) """ # sklearn的mean_squared_error mse = mean_squared_error(y_test, y_predict) return mse # sklearn实现R²(决定系数)计算 def r2_score(y_predict, y_test): ''' 基于sklearn实现决定系数(R²)计算,核心衡量线性回归模型的解释力 input:y_predict(ndarray):预测值 y_test(ndarray):真实值 output:r2(float):r2值(取值范围(-∞,1],越接近1模型拟合效果越好) ''' #********* Begin *********# # 调用sklearn的r2_score r2 = sklearn_r2_score(y_test, y_predict) #********* End *********# return r2