我正在尝试使用curve_fit、RANSAC 和尝试学习基础知识的东西,但有一件事我不明白。
为什么这里的 R2 得分为负?
import numpy as np
import warnings
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from sklearn.base import BaseEstimator
from sklearn.linear_model import RANSACRegressor
from scipy.optimize import OptimizeWarning
from scipy.optimize import curve_fit
class LogarithmicRegression(BaseEstimator):
def __init__(self, log_base=np.log):
self.__log_base = log_base
def __log_expr(self, x, a, b, c):
with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
return a * self.__log_base(x+c) + b
def get_params(self, deep=False):
# https://scikit-learn.org/stable/developers/develop.html#get-params-and-set-params
return {"log_base": self.__log_base}
def set_params(self, **parameters):
for parameter, value in parameters.items():
setattr(self, parameter, value)
return self
def fit(self, X, y):
self.coef, _ = curve_fit(self.__log_expr, X.flatten(), y, maxfev=10000,
bounds=( (-np.inf, -np.inf, -np.inf),(np.inf, np.inf, np.inf) ))
return self
def predict(self, X):
hypothesis = self.__log_expr(X, *self.coef)
return hypothesis.flatten()
def score(self, X_test, y_test):
from sklearn.metrics import r2_score
self.accuracy = r2_score(X_test, y_test)
return self.accuracy
np.random.seed(543)
n_sample = 100
dataX = np.array(range(1, n_sample+1))
dataY = 2.5 * np.log(dataX) + 7
noise = np.random.normal(np.mean(dataY), 2, n_sample)
add_noise = np.random.choice(a=[False, True], size=n_sample)
for i in range(n_sample):
if add_noise[i]:
dataY[i] = noise[i]
plt.style.use("dark_background")
plt.rcParams["figure.figsize"] = (8,6)
plt.grid(False)
#plt.scatter(dataX, dataY, color='white')
X = dataX.reshape(-1, 1)
y = dataY
ransac = RANSACRegressor(base_estimator=LogarithmicRegression(),
min_samples=int(n_sample/4),
residual_threshold=0.7)
ransac.fit(X, y)
inlier_mask = ransac.inlier_mask_
outlier_mask = np.logical_not(inlier_mask)
plt.scatter(X[inlier_mask], y[inlier_mask], color='yellowgreen', marker='.', label='Inliers')
plt.scatter(X[outlier_mask], y[outlier_mask], color='r', marker='.', label='Outliers')
lineX = np.arange(X.min(), X.max())[:, np.newaxis]
lineY = ransac.predict(lineX)
print("Estimated coefficients", ransac.estimator_.coef)
print("Accuracy", ransac.estimator_.accuracy)
plt.plot(lineX, lineY, color='yellow', linewidth=2, label='RANSAC regressor')
plt.show()