使用cvxpy求解SVM对偶问题
- 作者: chaihahaha
- 时间:
- 分类: 人工智能笔记
- 评论
最优化问题:$$
\begin{align*}
&\max_a & & \sum_{i=1}^m a_i - \frac{1}{2} \sum_{i=1}^m\sum_{j=1}^m a_i a_j y_i y_j x_i^\top x_j \label{softsvmd}\tag{$\mathrm{SVM_{soft} dual}$}\\
&...
展开阅读
最优化问题:
$$ \begin{align*} &\max_a & & \sum_{i=1}^m a_i - \frac{1}{2} \sum_{i=1}^m\sum_{j=1}^m a_i a_j y_i y_j x_i^\top x_j \label{softsvmd}\tag{$\mathrm{SVM_{soft} dual}$}\\ &s.t. \ & & \sum_{i=1}^m a_i y_i=0 \\ & & & 0\leq a_i \leq C,\ i=1,\cdots,m \tag{Box Constraint} \end{align*} $$
求解代码:
import numpy as np
import cvxpy as cp
x = np.array([[0,0],[1,0],[0,-1],[-1,1],[-2,1],[-1,2]]) # training samples
y = np.array([[-1],[-1],[-1],[1],[1],[1]]) # training labels
m = len(y) # # of samples
d = x.shape[1] # dim of samples
a = cp.Variable((m,1)) # lagrange multiplier
C = 1 # trade-off parameter
G = np.matmul(y*x, (y*x).T) # Gram matrix
objective = cp.Maximize(cp.sum(a)-(1/2)*cp.quad_form(a, G))
constraints = [0 <= a, a <= C, cp.sum(cp.multiply(a,y)) == 0] # box constraint
prob = cp.Problem(objective, constraints)
result = prob.solve()
print(a.value)
多类SVM
import numpy as np
import cvxpy as cp
def rbf(sigma=1):
def rbf_kernel(x1,x2,sigma):
X12norm = np.sum(x1**2,1,keepdims=True)-2*x1@x2.T+np.sum(x2**2,1,keepdims=True).T
return np.exp(-X12norm/(2*sigma**2))
return lambda x1,x2: rbf_kernel(x1,x2,sigma)
def poly(n=3):
return lambda x1,x2: (x1 @ x2.T)**n
class svm_model_cvxpy:
def __init__(self, m,n_class):
self.n_svm = n_class * (n_class - 1)//2
self.m = m # number of samples
self.n_class = n_class
# multiplier
self.a = [cp.Variable(shape=(m,1),pos=True) for i in range(self.n_svm)]
# bias
self.b = np.zeros((self.n_svm,1))
# kernel function should input x [n,d] y [m,d] output [n,m]
# Example of kernels: rbf(1.0), poly(3)
self.kernel = rbf(1)
# Binary setting for every SVM,
# Mij says the SVMj should give
# Mij label to sample with class i
self.lookup_matrix=np.zeros((self.n_class, self.n_svm))
# The two classes SVMi concerns,
# lookup_class[i]=[pos, neg]
self.lookup_class=np.zeros((self.n_svm, 2))
k=0
for i in range(n_class-1):
for j in range(i+1,n_class):
self.lookup_class[k, 0]=i
self.lookup_class[k, 1]=j
k += 1
for i in range(n_class):
for j in range(self.n_svm):
if i == self.lookup_class[j,0] or i == self.lookup_class[j,1]:
if self.lookup_class[j, 0]==i:
self.lookup_matrix[i,j]=1.0
else:
self.lookup_matrix[i,j]=-1.0
def fit(self, x, y_multiclass, kernel=rbf(1), C=0.001):
y_multiclass=y_multiclass.reshape(-1)
self.x = x
self.y_multiclass = y_multiclass
self.kernel = kernel
self.C = C
self.y_matrix = np.stack([self.cast(y_multiclass, k) for k in range(self.n_svm)],0)
for k in range(self.n_svm):
print("training ",k,"th SVM in ",self.n_svm)
y = self.y_matrix[k, :].reshape((-1,1))
yx = y*x
G = kernel(yx, yx) # Gram matrix
objective = cp.Maximize(cp.sum(self.a[k])-(1/2)*cp.quad_form(self.a[k], G))
if not objective.is_dcp():
print("Not solvable!")
assert objective.is_dcp()
constraints = [self.a[k] <= C, cp.sum(cp.multiply(self.a[k],y)) == 0] # box constraint
prob = cp.Problem(objective, constraints)
result = prob.solve()
x_pos = x[y[:,0]==1,:]
x_neg = x[y[:,0]==-1,:]
b_min = -np.min(self.wTx(k,x_pos)) if x_pos.shape[0]!=0 else 0
b_max = -np.max(self.wTx(k,x_neg)) if x_neg.shape[0]!=0 else 0
self.b[k,0] = (1/2)*(b_min + b_max)
self.a_matrix = np.stack([i.value.reshape(-1) for i in self.a],0)
def predict(self,xp):
k_predicts = (self.y_matrix * self.a_matrix) @ self.kernel(xp,self.x).T + self.b
result = np.argmax(self.lookup_matrix @ k_predicts,axis=0)
return result
def cast(self, y, k):
# cast the multiclass label of dataset to
# the pos/neg (with 0) where pos/neg are what SVMk concerns
return (y==self.lookup_class[k, 0]).astype(np.float32) - (y==self.lookup_class[k, 1]).astype(np.float32)
def wTx(self,k,xi):
# The prediction of SVMk without bias, w^T @ xi
y = self.y_matrix[k, :].reshape((-1,1))
a = self.a[k].value.reshape(-1,1)
wTx0 = self.kernel(xi, self.x) @ (y * a)
return wTx0
def get_avg_pct_spt_vec(self):
# the average percentage of support vectors,
# test error shouldn't be greater than it if traing converge
return np.sum((0.0<self.a_matrix) & (self.a_matrix<self.C)).astype(np.float32)/(self.n_svm*self.m)