分类人工智能笔记下的文章

强化学习调参经验

作者: chaihahaha
时间: 2020-11-14
分类: 人工智能笔记
评论

指数加权平均(polyak averaging)：$\tau$是加权平均系数，它相当于一个超学习率，非常敏感，增大0.01就有可能会使训练发散。一般要保证$(1-\tau)^{n}$大约在10%左右，n是一个epoch的episodes数。总之要让Actor的Loss在100个epoch内爆炸到$(1-\gamma^T)/(1-\gamma)$的大小，其中$\gamma$是discount，...

展开阅读

使用cvxpy求解SVM对偶问题

作者: chaihahaha
时间: 2020-01-26
分类: 人工智能笔记
评论

最优化问题：$$ \begin{align*} &\max_a & & \sum_{i=1}^m a_i - \frac{1}{2} \sum_{i=1}^m\sum_{j=1}^m a_i a_j y_i y_j x_i^\top x_j \label{softsvmd}\tag{$\mathrm{SVM_{soft} dual}$}\\ &...

展开阅读

最优化问题：

$$ \begin{align*} &\max_a & & \sum_{i=1}^m a_i - \frac{1}{2} \sum_{i=1}^m\sum_{j=1}^m a_i a_j y_i y_j x_i^\top x_j \label{softsvmd}\tag{$\mathrm{SVM_{soft} dual}$}\\ &s.t. \ & & \sum_{i=1}^m a_i y_i=0 \\ & & & 0\leq a_i \leq C,\ i=1,\cdots,m \tag{Box Constraint} \end{align*} $$

求解代码：

import numpy as np
import cvxpy as cp
x = np.array([[0,0],[1,0],[0,-1],[-1,1],[-2,1],[-1,2]]) # training samples
y = np.array([[-1],[-1],[-1],[1],[1],[1]]) # training labels
m = len(y) # # of samples
d = x.shape[1] # dim of samples
a = cp.Variable((m,1)) # lagrange multiplier
C = 1 # trade-off parameter
G = np.matmul(y*x, (y*x).T) # Gram matrix
objective = cp.Maximize(cp.sum(a)-(1/2)*cp.quad_form(a, G))
constraints = [0 <= a, a <= C, cp.sum(cp.multiply(a,y)) == 0] # box constraint
prob = cp.Problem(objective, constraints)
result = prob.solve()
print(a.value)

多类SVM

import numpy as np
import cvxpy as cp
def rbf(sigma=1):
    def rbf_kernel(x1,x2,sigma):
        X12norm = np.sum(x1**2,1,keepdims=True)-2*x1@x2.T+np.sum(x2**2,1,keepdims=True).T
        return np.exp(-X12norm/(2*sigma**2))
    return lambda x1,x2: rbf_kernel(x1,x2,sigma)

def poly(n=3):
    return lambda x1,x2: (x1 @ x2.T)**n

class svm_model_cvxpy:
    def __init__(self, m,n_class):
        self.n_svm = n_class * (n_class - 1)//2
        self.m = m # number of samples
        self.n_class = n_class

        # multiplier
        self.a = [cp.Variable(shape=(m,1),pos=True) for i in range(self.n_svm)]
        # bias
        self.b = np.zeros((self.n_svm,1))

        # kernel function  should input x [n,d] y [m,d] output [n,m]
        # Example of kernels: rbf(1.0), poly(3)
        self.kernel = rbf(1)

        # Binary setting for every SVM,
        # Mij says the SVMj should give
        # Mij label to sample with class i
        self.lookup_matrix=np.zeros((self.n_class, self.n_svm))

        # The two classes SVMi concerns,
        # lookup_class[i]=[pos, neg]
        self.lookup_class=np.zeros((self.n_svm, 2))

        k=0
        for i in range(n_class-1):
            for j in range(i+1,n_class):
                self.lookup_class[k, 0]=i
                self.lookup_class[k, 1]=j
                k += 1

        for i in range(n_class):
            for j in range(self.n_svm):
                if i == self.lookup_class[j,0] or i == self.lookup_class[j,1]:
                    if self.lookup_class[j, 0]==i:
                        self.lookup_matrix[i,j]=1.0
                    else:
                        self.lookup_matrix[i,j]=-1.0
    def fit(self, x, y_multiclass, kernel=rbf(1), C=0.001):
        y_multiclass=y_multiclass.reshape(-1)
        self.x = x
        self.y_multiclass = y_multiclass
        self.kernel = kernel
        self.C = C
        self.y_matrix = np.stack([self.cast(y_multiclass, k) for k in range(self.n_svm)],0)
        for k in range(self.n_svm):
            print("training ",k,"th SVM in ",self.n_svm)
            y = self.y_matrix[k, :].reshape((-1,1))
            yx = y*x
            G = kernel(yx, yx) # Gram matrix
            objective = cp.Maximize(cp.sum(self.a[k])-(1/2)*cp.quad_form(self.a[k], G))
            if not objective.is_dcp():
                print("Not solvable!")
                assert objective.is_dcp()
            constraints = [self.a[k] <= C, cp.sum(cp.multiply(self.a[k],y)) == 0] # box constraint
            prob = cp.Problem(objective, constraints)
            result = prob.solve()
            x_pos = x[y[:,0]==1,:]
            x_neg = x[y[:,0]==-1,:]
            b_min = -np.min(self.wTx(k,x_pos)) if x_pos.shape[0]!=0 else 0
            b_max = -np.max(self.wTx(k,x_neg)) if x_neg.shape[0]!=0 else 0
            self.b[k,0] = (1/2)*(b_min + b_max)
        self.a_matrix = np.stack([i.value.reshape(-1) for i in self.a],0)

    def predict(self,xp):
        k_predicts = (self.y_matrix * self.a_matrix) @ self.kernel(xp,self.x).T  + self.b
        result = np.argmax(self.lookup_matrix @ k_predicts,axis=0)
        return result

    def cast(self, y, k):
        # cast the multiclass label of dataset to
        # the pos/neg (with 0) where pos/neg are what SVMk concerns
        return (y==self.lookup_class[k, 0]).astype(np.float32) - (y==self.lookup_class[k, 1]).astype(np.float32)

    def wTx(self,k,xi):
        # The prediction of SVMk without bias, w^T @ xi
        y = self.y_matrix[k, :].reshape((-1,1))
        a = self.a[k].value.reshape(-1,1)
        wTx0 =  self.kernel(xi, self.x) @ (y * a)
        return wTx0

    def get_avg_pct_spt_vec(self):
        # the average percentage of support vectors,
        # test error shouldn't be greater than it if traing converge
        return np.sum((0.0<self.a_matrix) & (self.a_matrix<self.C)).astype(np.float32)/(self.n_svm*self.m)

基于tensorflow的多类别支持向量机-SVM

作者: chaihahaha
时间: 2020-01-02
分类: 人工智能笔记
评论

import tensorflow as tf import numpy as np import operator as op from functools import reduce class svm_model: def __init__(self,n_class, dimension, learning_rate=1e-2, regularization=1): ...

展开阅读

import tensorflow as tf
import numpy as np
import operator as op
from functools import reduce

class svm_model:
    def __init__(self,n_class, dimension, learning_rate=1e-2, regularization=1):
        self.learning_rate=learning_rate
        self.n_class=n_class
        self.dimension=dimension
        self.w=[]
        self.b=[]
        self.logit=[]
        self.logit_tmp=[]
        self.loss=[]
        #prediction=[]
        self.correct_logit=[]
        self.lookup_class=dict()
        self.w_model=np.random.uniform(0,1,(self.ncr(n_class,2), dimension)).astype(np.float32)
        self.b_model=np.random.uniform(0,1,(self.ncr(n_class,2),1)).astype(np.float32)
        self.lookup_matrix=np.zeros((n_class, self.ncr(n_class,2)),dtype=np.float32)
        self.batch_x=tf.placeholder(tf.float32,shape=(None,dimension),name="batch_x")
        self.batch_y=tf.placeholder(tf.float32,shape=(None,1),name="batch_y")
        self.w = tf.Variable(tf.random_uniform([self.ncr(n_class,2), self.dimension]))
        self.b = tf.Variable(tf.random_uniform([self.ncr(n_class,2),1]))
        self.batch_class_size=[]
        k=0
        for i in range(n_class-1):
            for j in range(i+1,n_class):
                self.lookup_class[k]=[i,j]
                k += 1

        for i in range(n_class):
            for j in range(self.ncr(n_class,2)):
                if i in self.lookup_class[j]:
                    if self.lookup_class[j][0]==i:
                        self.lookup_matrix[i,j]=1.0
                    else:
                        self.lookup_matrix[i,j]=-1.0

        
        for i in range(self.ncr(n_class,2)):
            idx=tf.where(tf.keras.backend.any(tf.equal(self.batch_y,self.lookup_class[i]),1)) # tf.where and tf.gather_nd is equivalent to a[condition is true]
            self.logit.append(tf.matmul(tf.reshape(self.w[i,:],(1,dimension)), tf.gather_nd(self.batch_x, idx), transpose_b=True) + self.b[i,:])
            self.logit_tmp.append(tf.tanh(self.logit[i]))
            self.correct_logit.append(self.zonp(tf.cast(tf.equal(tf.gather_nd(self.batch_y, idx),self.lookup_class[i][0]),tf.float32)))
            self.batch_class_size.append(tf.cast(tf.shape(self.correct_logit[i])[0],tf.float32))
            self.loss.append(tf.maximum(0.0,1-tf.matmul(self.logit_tmp[i],self.correct_logit[i])/self.batch_class_size[i]) + regularization * tf.norm(self.w[i,:], 2))

        self.prediction = tf.argmax(tf.matmul(self.lookup_matrix, tf.tanh(tf.matmul(self.w, self.batch_x, transpose_b=True) + self.b)),axis=0)
        self.total_loss = sum(self.loss)
        self.opt = tf.train.RMSPropOptimizer(learning_rate).minimize(self.total_loss)
        
    def ncr(self,n, r):
        r = min(r, n-r)
        numer = reduce(op.mul, range(n, n-r, -1), 1)
        denom = reduce(op.mul, range(1, r+1), 1)
        return numer // denom
    def zonp(self,zero_one):
        return 2*zero_one-1

    def fit(self,data_x,data_y,iter_time=1000):
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            sess.run([tf.assign(self.w,self.w_model),tf.assign(self.b,self.b_model)])
            for _ in range(iter_time):
                _,loss_val,w_model,b_model = sess.run([self.opt,self.total_loss,self.w,self.b], feed_dict={self.batch_x:data_x,self.batch_y:data_y})
                print(loss_val)
            #print(sess.run(self.batch_class_size, feed_dict={self.batch_y:data_y}))
            #print(sess.run(self.logit_tmp, feed_dict={self.batch_x:data_x,self.batch_y:data_y}))
        self.w_model = w_model
        self.b_model = b_model
        
        return w_model,b_model
    def predict(self,data_x):
        with tf.Session() as sess:  
            sess.run(tf.global_variables_initializer())
            sess.run([tf.assign(self.w,self.w_model),tf.assign(self.b,self.b_model)])
            result = sess.run(self.prediction,feed_dict={self.batch_x:data_x})
        return result.reshape(-1,1)
    
    
def increase_dims(data):
    _,dim0=data.shape
    k=0
    lookup_class=dict()
    for i in range(dim0-1):
        for j in range(i+1,dim0):
            lookup_class[k]=[i,j]
            k += 1
    #print([data]+[data[:,lookup_class[i][0]]*data[:,lookup_class[i][1]] for i in range(dim0*(dim0-1)//2)])
    result=np.hstack([data]+[(data[:,lookup_class[i][0]]*data[:,lookup_class[i][1]]).reshape(-1,1) for i in range(dim0*(dim0-1)//2)]+[data[:,i].reshape(-1,1)**2 for i in range(dim0)])
    return result/np.max(result)

拿到数据后首先将训练数据和标签转置成 $n\times\text{dim}$ 的矩阵形状（n是样本数，dim是特征维数），经过increase_dims()函数处理，将特征升高到2次非线性的 $\begin{pmatrix}n\\2\end{pmatrix}$ 维空间中，接下来使用如下命令进行拟合和识别：

svm = svm_model(number_of_classes,train_x.shape[1],learning_rate,regularization)
svm.fit(train_x,train_y,iterations)
svm.predict(test_x)

在ubuntu上安装tensorflow并解决CUDA与驱动适配问题

作者: chaihahaha
时间: 2020-01-02
分类: 人工智能笔记
评论

conda install tensorflow-gpu以上命令安装好tensorflow-gpu后，我遇到了这样的问题：CUDA driver version is insufficient for CUDA runtime version经过分析，我发现使用anaconda自动分析依赖安装的cudatoolkit的版本是CUDA 10.0 (10.0.130)，而ubuntu装机自带的N...

展开阅读

conda install tensorflow-gpu

以上命令安装好tensorflow-gpu后，我遇到了这样的问题：

CUDA driver version is insufficient for CUDA runtime version

经过分析，我发现使用anaconda自动分析依赖安装的cudatoolkit的版本是CUDA 10.0 (10.0.130)，而ubuntu装机自带的NVIDIA驱动版本是nvidia-38

使用如下命令查询Nvidia驱动版本：

nvidia-smi

使用如下命令查询cudatoolkit, cuda, cudnn版本：

conda list cudatoolkit
cat /usr/local/cuda/version.txt
cat /usr/local/cuda/include/cudnn.h | grep CUDNN_MAJOR -A 2

英伟达官网给出的CUDA和驱动版本对应关系为：

CUDA Toolkit	Linux x86_64 Driver Version
CUDA 10.2 (10.2.89)	>= 440.33
CUDA 10.1 (10.1.105)	>= 418.39
CUDA 10.0 (10.0.130)	>= 410.48
CUDA 9.2 (9.2.88)	>= 396.26
CUDA 9.1 (9.1.85)	>= 390.46
CUDA 9.0 (9.0.76)	>= 384.81
CUDA 8.0 (8.0.61 GA2)	>= 375.26
CUDA 8.0 (8.0.44)	>= 367.48
CUDA 7.5 (7.5.16)	>= 352.31
CUDA 7.0 (7.0.28)	>= 346.46

因此，我找到了nvidia-384对应的CUDA版本：CUDA 9.0 (9.0.76)

使用如下命令对CUDA进行降级即可解决问题：

conda install cudatoolkit=9.0 tensorflow-gpu

有时，你需要的cudatoolkit在默认channel里可能没有，使用如下命令安装其他channel里的对应版本：

conda install -c numba cudatoolkit=9.1 tensorflow-gpu

使用nvidia dali tensorflow plugin为Tensorflow训练加速

作者: chaihahaha
时间: 2020-01-02
分类: 人工智能笔记
评论

使用Tensorflow进行计算机视觉研究时，常常会遇到磁盘读写瓶颈，具体表现为CPU和磁盘使用率极高，而GPU使用率很低。对于这种IO瓶颈，可以使用NVIDIA开源的DALI库进行数据读写加速，以下是安装和使用教程。安装（需要TensorFlow 版本1.7或更高，CUDA9.0或更高）CUDA9.0：pip install --extra-index-url https://develo...

展开阅读

使用Tensorflow进行计算机视觉研究时，常常会遇到磁盘读写瓶颈，具体表现为CPU和磁盘使用率极高，而GPU使用率很低。对于这种IO瓶颈，可以使用NVIDIA开源的DALI库进行数据读写加速，以下是安装和使用教程。

安装（需要TensorFlow 版本1.7或更高，CUDA9.0或更高）

CUDA9.0：

pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/cuda/9.0 nvidia-dali-tf-plugin

CUDA10.0：

pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/cuda/10.0 nvidia-dali-tf-plugin

例子：

class SimplePipeline(Pipeline):
    def __init__(self, batch_size, num_threads, device_id, img_dir):
        super(SimplePipeline, self).__init__(batch_size, num_threads, device_id, seed = 12)
        self.input = ops.FileReader(file_root = img_dir)
        self.decode = ops.ImageDecoder(device = 'mixed', output_type = types.RGB)
    def define_graph(self):
        pngs, labels = self.input()
        images = self.decode(pngs)
        return images

dark_pipe = SimplePipeline(batch_size, 1, 0, dark_img_dir)
gt_pipe = SimplePipeline(batch_size, 1, 0, gt_img_dir)
daliop = dali_tf.DALIIterator()
in_image= daliop(pipeline = dark_pipe, shapes=[[batch_size,400,600,3]],dtypes=[tf.uint8])
in_image=tf.to_float(in_image[0])/255.0

gt_image= daliop(pipeline = gt_pipe, shapes=[[batch_size,400,600,3]],dtypes=[tf.uint8])
gt_image=tf.to_float(gt_image[0])/255.0

with tf.Session() as sess:
    in_img,gt_img = sess.run([in_image, gt_image])

以上对应文件路径如下

root
    -dark_img_dir
        -images
        -labels
    -gt_img_dir
        -images
        -labels

使用SMO算法训练SVM-pytorch

作者: chaihahaha
时间: 2020-01-02
分类: 人工智能笔记
评论

以下是SVM代码：import torch import numpy as np from random import shuffle from sklearn.utils import shuffle as shuffle_ds def rbf(sigma=1): def rbf_kernel(x1,x2,sigma): X12norm = torch.sum(x1...

展开阅读

以下是SVM代码：

import torch
import numpy as np
from random import shuffle
from sklearn.utils import shuffle as shuffle_ds
def rbf(sigma=1):
    def rbf_kernel(x1,x2,sigma):
        X12norm = torch.sum(x1**2,1,keepdims=True)-2*x1@x2.T+torch.sum(x2**2,1,keepdims=True).T
        return torch.exp(-X12norm/(2*sigma**2))
    return lambda x1,x2: rbf_kernel(x1,x2,sigma)

def poly(n=3):
    return lambda x1,x2: (x1 @ x2.T)**n

class svm_model_torch:
    def __init__(self, m, n_class, device="cpu"):
        self.device = device
        self.n_svm = n_class * (n_class - 1)//2
        self.m = m # number of samples
        self.n_class = n_class
        self.blacklist = [set() for i in range(self.n_svm)]

        # multiplier
        self.a = torch.zeros((self.n_svm,self.m), device=self.device) # SMO works only when a is initialized to 0
        # bias
        self.b = torch.zeros((self.n_svm,1), device=self.device)

        # kernel function  should input x [n,d] y [m,d] output [n,m]
        # Example of poly kernel: lambda x,y:  torch.matmul(x,y.T)**2
        self.kernel = lambda x,y:  torch.matmul(x,y.T)


        # Binary setting for every SVM,
        # Mij says the SVMj should give
        # Mij label to sample with class i
        self.lookup_matrix=torch.zeros((self.n_class, self.n_svm), device=self.device)

        # The two classes SVMi concerns,
        # lookup_class[i]=[pos, neg]
        self.lookup_class=torch.zeros((self.n_svm, 2), device=self.device)

        k=0
        for i in range(n_class-1):
            for j in range(i+1,n_class):
                self.lookup_class[k, 0]=i
                self.lookup_class[k, 1]=j
                k += 1

        for i in range(n_class):
            for j in range(self.n_svm):
                if i == self.lookup_class[j,0] or i == self.lookup_class[j,1]:
                    if self.lookup_class[j, 0]==i:
                        self.lookup_matrix[i,j]=1.0
                    else:
                        self.lookup_matrix[i,j]=-1.0

    def fit(self, x_np, y_multiclass_np, C, iterations=1, kernel=rbf(1)):
        x_np, y_multiclass_np = shuffle_ds(x_np,y_multiclass_np)
        self.C = C # box constraint
        # use SMO algorithm to fit
        x = torch.from_numpy(x_np).float() if not torch.is_tensor(x_np) else x_np
        x = x.to(self.device)
        self.x = x.to(self.device)

        y_multiclass = torch.from_numpy(y_multiclass_np).view(-1,1) if not torch.is_tensor(y_multiclass_np) else y_multiclass_np
        y_multiclass=y_multiclass.view(-1)
        self.y_matrix = torch.stack([self.cast(y_multiclass, k) for k in range(self.n_svm)],0).to(self.device)
        self.kernel = kernel
        a = self.a
        b = self.b
        for iteration in range(iterations):
            print("Iteration: ",iteration)
            for k in range(self.n_svm):
                y = self.y_matrix[k, :].view(-1).tolist()
                index = [i for i in range(len(y)) if y[i]!=0]
                shuffle(index)
                traverse = []
                if index is not None:
                    traverse = [i for i in range(0, len(index)-1, 2)]
                    if len(index)>2:
                         traverse += [len(index)-2]
                for i in traverse:
                    if str(index[i])+str(index[i+1]) not in self.blacklist[k]:
                        y1 = y[index[i]]
                        y2 = y[index[i+1]]
                        x1 = x[index[i],:].view(1,-1)
                        x2 = x[index[i+1],:].view(1,-1)
                        a1_old = a[k,index[i]].clone()
                        a2_old = a[k,index[i+1]].clone()

                        if y1 != y2:
                            H = max(min(self.C, (self.C + a2_old-a1_old).item()),0)
                            L = min(max(0, (a2_old-a1_old).item()),self.C)
                        else:
                            H = max(min(self.C, (a2_old + a1_old).item()),0)
                            L = min(max(0, (a2_old + a1_old - self.C).item()),self.C)
                        E1 =  self.g_k(k, x1) - y1
                        E2 =  self.g_k(k, x2) - y2
                        a2_new = torch.clamp(a2_old + y2 * (E1-E2)/self.kernel(x1 - x2,x1 - x2), min=L, max=H)
                        a[k,index[i+1]] = a2_new

                        a1_new = a1_old - y1 * y2 * (a2_new - a2_old)
                        a[k, index[i]] = a1_new

                        b_old = b[k,0]
                        K11 = self.kernel(x1,x1)
                        K12 = self.kernel(x1,x2)
                        K22 = self.kernel(x2,x2)
                        b1_new = b_old - E1 + (a1_old-a1_new)*y1*K11+(a2_old-a2_new)*y2*K12
                        b2_new = b_old - E2 + (a1_old-a1_new)*y1*K12+(a2_old-a2_new)*y2*K22
                        if (0<a1_new) and (a1_new<self.C):
                            b[k,0] = b1_new
                        if (0<a2_new) and (a2_new<self.C):
                            b[k,0] = b2_new
                        if ((a1_new == 0) or (a1_new ==self.C)) and ((a2_new == 0) or (a2_new==self.C)) and (L!=H):
                            b[k,0] = (b1_new + b2_new)/2
                        if b_old == b[k,0] and a[k,index[i]] == a1_old and a[k,index[i+1]] == a2_old:
                            self.blacklist[k].add(str(index[i]) + str(index[i+1]))

    def predict(self,x_np):
        xp = torch.from_numpy(x_np) if not torch.is_tensor(x_np) else x_np
        xp = xp.float().to(self.device)
        k_predicts = (self.y_matrix.to(self.device) * self.a) @ self.kernel(xp,self.x).T  + self.b
        result = torch.argmax(self.lookup_matrix @ k_predicts,axis=0)
        return result.to("cpu").numpy()

    def cast(self, y, k):
        # cast the multiclass label of dataset to
        # the pos/neg (with 0) where pos/neg are what SVMk concerns
        return (y==self.lookup_class[k, 0]).float() - (y==self.lookup_class[k, 1]).float()


    def wTx(self,k,xi):
        # The prediction of SVMk without bias, w^T @ xi
        y = self.y_matrix[k, :].reshape((-1,1))
        a = self.a[k,:].view(-1,1)
        wTx0 =  self.kernel(xi, self.x) @ (y * a)
        return wTx0


    def g_k(self,k,xi):
        # The prediction of SVMk, xi[1,d]
        return self.wTx(k,xi) + self.b[k,0].view(1,1)


    def get_w(self, k):
        y = self.cast(self.y_multiclass, k)
        a = self.a[k,:].view(-1,1)
        return torch.sum(a*y*self.x,0).view(-1,1)

    def get_svms(self):
        for k in range(self.n_svm):
            sk = 'g' + str(self.lookup_class[k, 0].item()) + str(self.lookup_class[k, 1].item()) + '(x)='
            w = self.get_w(k)
            for i in range(w.shape[0]):
                sk += "{:.3f}".format(w[i,0].item()) + ' x' + "{:d}".format(i) +' + '
            sk += "{:.3f}".format(self.b[k,0].item())
            print(sk)

    def get_avg_pct_spt_vec(self):
        # the average percentage of support vectors,
        # test error shouldn't be greater than it if traing converge
        return torch.sum((0.0<self.a) & (self.a<self.C)).float().item()/(self.n_svm*self.m)

以下示例代码使用了此SVM模型：

import numpy as np
#from svm_torch import *
data_x = np.array([[-2,1],[-2,2],[-1,1],[-1,2],[1,1],[1,2],[2,1],[2,2],[1,-1],[1,-2],[2,-1],[2,-2],[-2,-1],[-2,-2],[-1,-1],[-1,-2]])
data_y = np.array([[0],[0],[0],[0],[1],[1],[1],[1],[2],[2],[2],[2],[3],[3],[3],[3]])
m = len(data_x)
c = len(np.unique(data_y))
svm = svm_model_torch(m,1,c)
svm.fit(data_x,data_y,10)

print(svm.predict(data_x)) # 预测结果
svm.get_svms() # Cn2 个SVM分类界面的表达式
print(svm.a)  # 拉格朗日乘子

最后使用mlxtend绘制分类界面：

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import numpy as np
colors = ['red','green','blue','purple']
data_x = np.array([[-2,1],[-2,2],[-1,1],[-1,2],
                  [1,1],[1,2],[2,1],[2,2],
                  [1,-1],[1,-2],[2,-1],[2,-2],
                  [-2,-1],[-2,-2],[-1,-1],[-1,-2]])
data_y = np.array([[0],[0],[0],[0],
                  [1],[1],[1],[1],
                  [2],[2],[2],[2],
                  [3],[3],[3],[3]]).reshape(-1)
fig = plt.figure()
fig = plt.scatter(data_x[:,0],data_x[:,1],c=data_y, cmap=ListedColormap(colors))

from mlxtend.plotting import plot_decision_regions
x=np.linspace(-3,3,1000)
test_x = np.array(np.meshgrid(x,x)).T.reshape(-1,2)
test_y = svm.predict(test_x).reshape(-1).numpy()
scatter_kwargs = {'alpha': 0.0}
fig =plot_decision_regions(test_x, test_y, clf=svm,scatter_kwargs=scatter_kwargs)
plt.show()

绘制结果：

Tensorflow迁移学习

作者: chaihahaha
时间: 2020-01-02
分类: 人工智能笔记
评论

首先，为了方便划分不同的网络模块（子网络）、分别导入权重、指定是否需要训练、指定是否需要复用，需要使用如下语句为网络权重设定scope；使用collection将隐含层输出保存为字典；在with xxx:语句内部定义的网络层也要定义scopeimport tensorflow as tf import tensorflow.contrib.slim as slim # 需要Reuse时，设...

展开阅读

首先，为了方便划分不同的网络模块（子网络）、分别导入权重、指定是否需要训练、指定是否需要复用，需要使用如下语句为网络权重设定scope；使用collection将隐含层输出保存为字典；在with xxx:语句内部定义的网络层也要定义scope

import tensorflow as tf
import tensorflow.contrib.slim as slim

# 需要Reuse时，设定reuse=tf.AUTO_REUSE
with tf.variable_scope("model_name", "model_name", reuse=tf.AUTO_REUSE) as sc:

    # 之后就可以在下面定义网络了
    conv1 = slim.conv2d(input, 32, [3, 3], rate=1, activation_fn=lrelu, scope='layer_name')
    
    # 还可以使用end_points字典保存隐含层输出
    end_points = slim.utils.convert_collection_to_dict("collection_name")   
    end_points[sc.name + "/layer_name'] = conv1

然后，在优化器优化代码处定义需要训练的scope

loss = #定义好你的loss

# 从scope获得需要训练的变量表
train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "model_name")

# 将var_list设定为刚才获得的变量表
opt = tf.train.AdamOptimizer(learning_rate=1e-5).minimize(loss, var_list=train_vars)

最后，导入预训练的权重

# 导入权重前要进行变量初始化
sess.run(tf.global_variables_initializer())

# 错误？导入预训练的权重（导入后会被初始化覆盖？）
# tf.train.init_from_checkpoint("model_name.ckpt", {"model_name/":"model_name/"})

# 更好的导入权重的方法
saver = tf.train.Saver(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, "model_name"))
saver_sid.restore(sess, "model_name.ckpt")

Tensorflow Debug指南

作者: chaihahaha
时间: 2020-01-02
分类: 人工智能笔记
评论

通用的Debug方案：使用Tensorflow/Keras中在不同的网络层之间传递的tensor对象的eval()方法进行调试，在一个Tensorflow Session中，使用tf.global_variables_initializer()以初始化之前定义的tensor对象。之后就可以对之前定义的全局变量进行更改，以对网络的输入进行自定义，并使用eval()方法观察层之间的输出值了。in...

展开阅读

通用的Debug方案：

使用Tensorflow/Keras中在不同的网络层之间传递的tensor对象的eval()方法进行调试，在一个Tensorflow Session中，使用tf.global_variables_initializer()以初始化之前定义的tensor对象。之后就可以对之前定义的全局变量进行更改，以对网络的输入进行自定义，并使用eval()方法观察层之间的输出值了。

input_test = X[0],Y[0]  #从输入数据中截取的测试片段
output_test = model(
        [tf.convert_to_tensor(input_test[0][0],float), 
         tf.convert_to_tensor(input_test[0][1],float)]
    ) #输入网络，得到最终输出
loss = my_loss(tf.convert_to_tensor(input_test[1],float), output_test)
test_x1=tf.convert_to_tensor([[1,2],[2,3]],float) #对my_layer层的测试输入，第一个参数
test_x2=tf.convert_to_tensor([[2,1],[4,3]],float) #对my_layer层的测试输入，第二个参数
test_simulation = my_layer([test_x1,test_x2])     #计算测试结果
print(test_simulation.shape)                             #不用启动session也可以输入中间层的形状
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print(test_simulation.eval())
    print(input_test[1],output_test.eval())
    print(loss.eval())

TypeError

TypeError: Input 'b' of 'MatMul' Op has type float32 that does not match
 type float64 of argument 'a'.

这种错误是可能是由于输入给模型（或函数）的tensor数据类型不是正常的float类型，可以将错误的tensor输入给函数tf.convert_to_tensor()以得到正常的tensor，其中函数convert_to_tensor的dtype参数应设置为np.float32或float（不怕麻烦的话可以把所有的tensor都设置一遍，肯定能解决这个问题）。如下例所示：

input = tf.convert_to_tensor(input,np.float32)

ValueError

ValueError: An operation has `None` for gradient. Please make sure that all of your
 ops have a gradient defined (i.e. are differentiable). Common ops without gradient:
 K.argmax, K.round, K.eval.

出现这种情况时一般是自己定义的Loss函数出了问题

ValueError: Error when checking target: expected dense to have 3 dimensions, 
but got array with shape (1, 1)

这种错误一般是由于输入张量数据和类别标签的张量维数（形状/shape）与网络结构的输入层或输出层不匹配导致的。比如在使用model.fit_generator()对训练数据进行拟合时，如果输入给这个函数的generator参数定义为如下形式：

X=[1,2,3]
Y=[1,2,3]
def generator(X,Y):
    while True:
        for i in range(len(Y)):
            yield [[[[X[i]]]],[[[[Y[i]]]]
model.fit_generator(generator(X,Y), steps_per_epoch=1, epochs=10, verbose=1)

那么generator输出的训练数据X在传递给fit_generator后会被自动转化成形状为(1,1,1)的张量，则模型的输入层就必须是(None, 1, 1)，其中模型输入层的形状约定为(batch的大小, input_shape[0], input_shape[1])，即第一层中的input_shape参数应被设为(1, 1)。generator输出的类别标签Y也是形状为(1,1,1)的张量，因此Batch的大小应为1，输出层的形状应被设计为(None, 1, 1)。注意：输入输出层形状的第一个参数是Batch的大小，与真实数据维数和形状无关，输入层的input_shape参数只是设定了输入数据形状，构成了输入层形状的后几个参数。

为了避免上述麻烦，建议在数据输入到网络之前进行reshape以适合网络的输入输出形状。对上例：

X=[1,2,3]
Y=[1,2,3]
def generator(X,Y):
    while True:
        for i in range(len(Y)):
            yield np.array(X[i]).reshape(input_layer_shape),
                  np.array([Y[i]).reshape(output_layer_shape)
model.fit_generator(generator(X,Y), steps_per_epoch=1, epochs=10, verbose=1)

AttributeError

AttributeError: 'Model' object has no attribute 'total_loss'

出现这种情况一般是模型定义和编译的代码运行时出了问题，模型没有正确生成，而你忘记处理了，导致生成了一半的模型出现缺什么东西的问题。

MemoryError

你的内存满了，试着减小模型的大小，或者换个大点的内存吧。

ResourceExhaustedError: OOM when allocating tensor with shape[1,64,1080,1920] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
     [[{{node concat_3}} = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](conv2d_transpose_3, g_conv1_2/Maximum, concat-2-LayoutOptimizer)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

     [[{{node DepthToSpace/_3}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", 
send_device_incarnation=1, tensor_name="edge_277_DepthToSpace", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

换个显存更大的机器就解决了

如果有两个或以上python进程都在使用gpu，有时会报出复制错误等运行时错误，这时关掉其余的进程或kernel即可。

Loss函数由于形式复杂，常常会出现NaN的现象，对于这种bug，可以使用以下语句逐个tensor进行分析：

tf.reduce_any(tf.math.is_nan(value)).eval(session=tf.Session())

常常引起NaN的函数有：tf.math.divide，tf.math.log，tf.math.sqrt，对于除以0的NaN可以使用如下函数保证分母不为0：

def safe_divisor(x):
    return tf.where(tf.logical_and(tf.less(x,1e-6),tf.greater_equal(x,0)), 1e-6 * tf.ones_like(x), x)

对于log和sqrt，可以使用以下函数保证大于0：

def safe_positive(x):
    return tf.math.abs(x)+0.001
def normalize(image):
    i_max = tf.math.reduce_max(image)
    i_min = tf.math.reduce_min(image)
    if i_max == i_min:
        return image
    return tf.math.divide(image - i_min, i_max - i_min)

此外，学习率过大也会引起loss变为NaN，但是通常随机初始化权重后，还没有反向传播时loss并不是NaN

ValueError: Cannot convert a partially known TensorShape to a Tensor: xxx

此类语句会引起这种错误：x,y,z = tensor.shape，因为tensor.shape还是tensor类型，不能转成元组

TypeError: Cannot create initializer for non-floating point type.

Placeholder的数据类型和网络输入类型不匹配会导致此错误

在所有Placeholder，变量，optimizer之后添加sess.run(tf.global_variables_initializer())即可解决

ValueError: Cannot convert an unknown Dimension to a Tensor: ?

将tensor.shape[0]改成tf.shape(tensor)[0]

tensorflow工具函数

作者: chaihahaha
时间: 2020-01-02
分类: 人工智能笔记
评论

import os import tensorflow as tf import numpy as np def mkdir(result_dir): if not os.path.isdir(result_dir): os.makedirs(result_dir) return def load_ckpt_initialize(checkpoint_d...

展开阅读

import os
import tensorflow as tf
import numpy as np

def mkdir(result_dir):
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    return


def load_ckpt_initialize(checkpoint_dir, sess):
    saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())
    ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
    if ckpt:
        print('loaded ' + ckpt.model_checkpoint_path)
        saver.restore(sess, ckpt.model_checkpoint_path)  
    return saver
 

def sobel_loss(img1,img2):
# sobel边缘检测后计算L1 loss，本来是为了防止模糊化，但似乎有一些问题，网络会去学到不存在的边缘
    edge_1 = tf.math.abs(tf.image.sobel_edges(img1))
    edge_2 = tf.math.abs(tf.image.sobel_edges(img2))
    m_1 = tf.reduce_mean(edge_1)
    m_2 = tf.reduce_mean(edge_2)
    edge_bin_1 = tf.cast(edge_1>m_1, tf.float32)
    edge_bin_2 = tf.cast(edge_2>m_2, tf.float32)
    return tf.reduce_mean(tf.math.abs(edge_bin_1-edge_bin_2))

def load_image_to_memory(image_dir):
# 将image_dir下的所有图片加载到内存，存到一个list里
    if image_dir[-1]!="/" and image_dir[-2:]!="\\":
        print("invalid dir")
        return -1
    image_pack=[]
    for i in os.listdir(image_dir):
        image_tmp = plt.imread(image_dir + i)
        image_pack.append(image_tmp)
    return image_pack

def generate_batch(train_pics,batch_size,dark_pack,gt_pack):
""" 
   :argument 
             train_pics: 图片个数 
             batch_size: int 
             dark_pack: list, appended [h,w,c] 图片(nparray), train_pics 个元素
             gt_pack: list, appended [h,w,c] 图片(nparray), train_pics个元素
   :returns
            nparray [batch_size, h,w,c] 随机截取的一个batch
"""
    input_patches = []
    gt_patches = []
    for ind in np.random.permutation(train_pics)[:batch_size]:
        imgrgb=dark_pack[ind]
        imggt = gt_pack[ind]
        
        W = imgrgb.shape[0]
        H = imgrgb.shape[1]
        ps = max(min(W//4,H//4),8)

        xx = np.random.randint(0, W - ps)
        yy = np.random.randint(0, H - ps)

        img_feed_in = imgrgb[np.newaxis,:,:,:] 
        img_feed_gt = imggt[np.newaxis,:,:,:] 
        # random crop flip to generate patches
        input_patch = img_feed_in[:, xx:xx + ps, yy:yy + ps, :]
        gt_patch = img_feed_gt[:, xx * 2:xx * 2 + ps * 2, yy * 2:yy * 2 + ps * 2, :]
        if np.random.randint(2, size=1)[0] == 1:  # random flip
            input_patch = np.flip(input_patch, axis=1)
            gt_patch = np.flip(gt_patch, axis=1)
        if np.random.randint(2, size=1)[0] == 1:
            input_patch = np.flip(input_patch, axis=2)
            gt_patch = np.flip(gt_patch, axis=2)
        if np.random.randint(2, size=1)[0] == 1:  # random transpose
            input_patch = np.transpose(input_patch, (0, 2, 1, 3))
            gt_patch = np.transpose(gt_patch, (0, 2, 1, 3))
        input_patch = np.minimum(input_patch, 1.0)
        input_patches.append(input_patch)
        gt_patches.append(gt_patch)
    return np.concatenate(input_patches,0),np.concatenate(gt_patches,0)

def cov(x,y):
# NHWC格式图像的协方差covariance
    mshape = x.shape
    #n,h,w,c
    x_bar = tf.reduce_mean(x, axis=[1,2,3])
    y_bar = tf.reduce_mean(y, axis=[1,2,3])
    x_bar = tf.einsum("i,jkl->ijkl",x_bar,tf.ones_like(x[0,:,:,:]))
    y_bar = tf.einsum("i,jkl->ijkl",y_bar,tf.ones_like(x[0,:,:,:]))
    return tf.reduce_mean((x-x_bar)*(y-y_bar), [1,2,3])

def cumsum(xs):
# tensorflow version "np.cumsum"
    values = tf.unstack(xs)
    out = []
    prev = tf.zeros_like(values[0])
    for val in values:
        s = prev + val
        out.append(s)
        prev = s
    result = tf.stack(out)
    return result

def piecewise_linear_fn(x, x1,x2,y1,y2):
# 分段线性插值
    return tf.where(tf.logical_or(tf.less(x,x1), tf.greater(x,x2)),
                    tf.constant(0.0,shape=[1]), 
                    y1 + (y2-y1)/(x2-x1)*(x-x1))

def count(matrix, minval, maxval):
# 计数 count(minval< matrix < maxval)
    return tf.reduce_sum(tf.where( tf.logical_and(tf.greater(matrix, minval), tf.less(matrix, maxval)), 
                         tf.ones_like(matrix, dtype=tf.float32), 
                         tf.zeros_like(matrix, dtype=tf.float32)))

def generate_opt(loss):
    lr = tf.placeholder(tf.float32)
    opt = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)
    return opt, lr

def generate_weights(shape):
    weights = tf.Variable(tf.random_uniform(shape=shape,minval=0.0,maxval=1.0))
    return weights

pytorch debug指南

作者: chaihahaha
时间: 2020-01-02
分类: 人工智能笔记
评论

RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same在初始化后的model后添加代码model.cuda()即可RuntimeError: cuda runtime error (59) : device-side assert trig...

展开阅读

RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

在初始化后的model后添加代码model.cuda()即可

RuntimeError: cuda runtime error (59) : device-side assert triggered at xxx

使用如下flag重新运行python脚本：

CUDA_LAUNCH_BLOCKING=1 python your_script.py

并根据assertion fail信息进行debug

a if condition else b + c if condition else d == a if condition else (b + c if condition else d)

因此一定要在if else表达式外面加括号

Key error 是因为字典的key找不到引起的

pytorch默认复制索引，因此用一个tensor记录历史值不能直接用等号，要用clone()

a=torch.zeros((2,3))
a_old = a[1,1] # 0
a[1,1] = 1
print(a_old) # 1