从头开始实施 SVM?

数据挖掘 支持向量机
2022-02-05 21:37:48

我正在尝试从头开始为 SVM 实现 rbf 内核,作为我即将到来的面试的练习。我尝试使用 cvxopt 来解决优化问题。但是,当我计算准确度并将其与 sklearn 上的实际 SVM 库进行比较时,存在非常大的差异。我试图找出问题,但我似乎无法解决它。任何帮助将不胜感激。下面贴出代码。如果有人可以让我知道我做错了什么或帮助提出替代方法,我将不胜感激。

import numpy as np
import cvxopt

def rbf_kernel(gamma, **kwargs):
    def f(x1, x2):
        distance = np.linalg.norm(x1 - x2) ** 2
        return np.exp(-gamma * distance)
    return f

class SupportVectorMachine(object):
    def __init__(self, C=1, kernel=rbf_kernel, power=4, gamma=None, coef=4):
        self.C = C
        self.kernel = kernel
        self.power = power
        self.gamma = gamma
        self.coef = coef
        self.lagr_multipliers = None
        self.support_vectors = None
        self.support_vector_labels = None
        self.intercept = None

    def fit(self, X, y):

        n_samples, n_features = np.shape(X)

        # Set gamma to 1/n_features by default
        if not self.gamma:
            self.gamma = 1 / n_features

        # Initialize kernel method with parameters
        self.kernel = self.kernel(
            power=self.power,
            gamma=self.gamma,
            coef=self.coef)

        # Calculate kernel matrix
        kernel_matrix = np.zeros((n_samples, n_samples))
        for i in range(n_samples):
            for j in range(n_samples):
                kernel_matrix[i, j] = self.kernel(X[i], X[j])

        # Define the quadratic optimization problem
        P = cvxopt.matrix(np.outer(y, y) * kernel_matrix, tc='d')
        q = cvxopt.matrix(np.ones(n_samples) * -1)
        A = cvxopt.matrix(y, (1, n_samples), tc='d')
        b = cvxopt.matrix(0, tc='d')

        if not self.C: #if its empty
            G = cvxopt.matrix(np.identity(n_samples) * -1)
            h = cvxopt.matrix(np.zeros(n_samples))
        else:
            G_max = np.identity(n_samples) * -1
            G_min = np.identity(n_samples)
            G = cvxopt.matrix(np.vstack((G_max, G_min)))
            h_max = cvxopt.matrix(np.zeros(n_samples))
            h_min = cvxopt.matrix(np.ones(n_samples) * self.C)
            h = cvxopt.matrix(np.vstack((h_max, h_min)))

        # Solve the quadratic optimization problem using cvxopt
        minimization = cvxopt.solvers.qp(P, q, G, h, A, b)

        # Lagrange multipliers
        lagr_mult = np.ravel(minimization['x'])
         
        # Extract support vectors
        # Get indexes of non-zero lagr. multipiers
        idx = lagr_mult > 1e-11
        # Get the corresponding lagr. multipliers
        self.lagr_multipliers = lagr_mult[idx]
        # Get the samples that will act as support vectors
        self.support_vectors = X[idx]
        # Get the corresponding labels
        self.support_vector_labels = y[idx]

    # Calculate intercept with first support vector
        self.intercept = self.support_vector_labels[0]
        for i in range(len(self.lagr_multipliers)):
          self.intercept -= self.lagr_multipliers[i] * self.support_vector_labels[
             i] * self.kernel(self.support_vectors[i], self.support_vectors[0])


    def predict(self, X):
        y_pred = []
    # Iterate through list of samples and make predictions
        for sample in X:
            prediction = 0
            # Determine the label of the sample by the support vectors
            for i in range(len(self.lagr_multipliers)):
                prediction += self.lagr_multipliers[i] * self.support_vector_labels[
                i] * self.kernel(self.support_vectors[i], sample)
            prediction += self.intercept
            y_pred.append(np.sign(prediction))
        return np.array(y_pred)



def main():
    print ("-- SVM Classifier --")

    data = load_iris()

    # previous error 
   #X = normalize(data.data)
   #y = data.target

    # correct version 
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = -1
    y[y == 2] = 1

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
    clf = SupportVectorMachine(kernel=rbf_kernel, gamma = 1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print ("Accuracy (scratch):", accuracy)

    clf_sklearn = SVC(gamma = 'auto')
    clf_sklearn.fit(X_train, y_train)
    y_pred2 = clf_sklearn.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred2)
    print ("Accuracy :", accuracy)

if __name__ == "__main__":
    main()

结果:准确度(从头开始):0.31666666666666665 准确度(使用 SVM 库):1.0

注意,我没有添加库以节省空间

1个回答

这实际上是正确的代码。它本身没有任何问题。

但是,请注意:这适用于 OVO(一对一)SVM。基本上,如果您要比较两个班级。这并不意味着两个以上的课程,因此您会获得较低的准确性。