感知器(perception)是二分类线性分类模型,其输入为实例的特征向量,输出为实例的类别,取+1和-1的二值,感知机对应于输入空间(特征空间)中将实例划分为正负两类的分离超平面,属于判别模型,感知机学习旨在求出将训练数据进行线性划分的分离超平面,为此导入基于误分类的损失函数,利用梯度下降法对于损失函数进行极小化,求得感知器模型。——–摘自统计学习方法
感知器包含输入层和输出层,主要对线性的数据能有较好的区分效果。
感知器算法总的来说当实例点被误分类,即位于分离的超平面的一侧时,调整w,b的值,使分离超平面向该误分类的一侧移动,以减少该分类点与超平面的距离,直至超平面超过该误分类点使其被正确分类;
感知器分为原始形式和对偶形式,两种形式对感知器求解方法不同。具体对偶形式的感知器可以参考知乎:
https://www.zhihu.com/question/26526858
感知器学习算法原始形式:
输入:训练数据集T={(x1,y1),(x2,y2),⋯,(xn,xn)},其中xi∈χ=Rn,yi∈γ={−1,+1},i=1,2,⋯,N;学习率η(0<η<1);
输出: w,b;感知器模型f(x)=sign(w⋅x+b)
(1) 选取初值w0,b0
(2)在训练集中选取数据(xi,yi)
(3)如果yi(w⋅xi+b)≤0
w←w+ηyixi
b←b+ηyi
(4)转至(2),直到训练集中没有误分类的点
算法实现:
以例2.1为例:
实现代码:
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<iostream>
using namespace std;
int w[2] = {0};
int b = 0;
int step = 1;
class point
{
public:
point();
void givepoint(int inx,int iny,int intrue);
~point();
int x;
int y;
int mytrue;
private:
};
point::point()
{
}
void point::givepoint(int inx,int iny,int intrue)
{
x = inx;
y = iny;
mytrue = intrue;
}
point::~point()
{
}
void minL(point *inpoint,int pointnum)
{
int tempW[2];
int tempB=0;
int tempY;
int truecount = 0;
printf("初始w:(%d,%d),b:%d\n", w[0], w[1], b);
while (1)
{
tempW[0] = w[0];
tempW[1] = w[1];
tempB = b;
for (int i = 0; i < pointnum; i++)
{
tempY = tempW[0] * inpoint[i].x + tempW[1] * inpoint[i].y + tempB;
if ((tempY>0 && inpoint[i].mytrue == 1) || (tempY <=0 && inpoint[i].mytrue == -1))
{
truecount++;
continue;
}
else
{
if (tempY > 0)
{
w[0] = w[0] - step*inpoint[i].x;
w[1] = w[1] - step*inpoint[i].y;
b = b - step * 1;
}
else if (tempY <= 0)
{
w[0] = w[0] + step*inpoint[i].x;
w[1] = w[1] + step*inpoint[i].y;
b = b + step * 1;
}
printf("更新w:(%d,%d),b:%d\n", w[0], w[1], b);
break;
}
}
if (truecount == 3)
{
break;
}
else
{
truecount = 0;
}
}
}
int main()
{
int pointnum;
printf("请输入点的个数:\n");
scanf("%d", &pointnum);
point *inpoint=new point[pointnum];
printf("请输入样本点的坐标及样本种类,用正负1表示\n");
for (int i=0; i < pointnum; i++)
{
int tempx, tempy, temptrue;
scanf("%d%d%d", &tempx, &tempy, &temptrue);
inpoint[i].givepoint(tempx, tempy, temptrue);
}
getchar();
printf("输入完毕开始分类\n");
minL(inpoint, pointnum);
printf("最终w:(%d,%d),b:%d\n", w[0], w[1], b);
getchar();
}
实现效果:
感知器学习算法对偶形式:
输入:训练数据集T={(x1,y1),(x2,y2),⋯,(xn,xn)},其中xi∈χ=Rn,yi∈γ={−1,+1},i=1,2,⋯,N;学习率η(0<η<1);
输出:a,b;感知机模型f(x)=sign(∑Nj=1ajyjxj⋅x+b)
其中α=(α1,α2,⋯,αN)T
(1)α←0,b←0
(2)在训练集中选取数据(xi,yi)
(3)如果yi{∑Nj=1αjyjxj⋅xi+b}≤0
αi←αi+η
b←b+ηyi
(4)转至(2)直到没有误分类数据
对偶形式中训练实例仅以内积的形式出现,为了方便可以先将训练实例间的内积计算出来并以矩阵的形式储存,用Gram矩阵(还记得现控课上被Gram矩阵笼罩的阴影吗?没错这边又回来了ˋ_ˊ* )
G=[xi⋅xj]N×N
同样用上面的例子实现代码:
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<iostream>
using namespace std;
int w[2] = { 0 };
int b = 0;
int step = 1;
int Y[3] = { 0 };
class point
{
public:
point();
void givepoint(int inx, int iny, int intrue);
~point();
int x;
int y;
int mytrue;
private:
};
point::point()
{
}
void point::givepoint(int inx, int iny, int intrue)
{
x = inx;
y = iny;
mytrue = intrue;
}
point::~point()
{
}
void Duality_minL(point *inpoint, int pointnum,int *alpha,int **gram)
{
int tempW[2];
int tempB=0;
int tempY=0;
int truecount=0;
printf("初始a:(%d,%d,%d),b:%d\n", alpha[0], alpha[1], alpha[2], b);
while (1)
{
// tempW[0] = w[0];
// tempW[1] = w[1];
tempB = b;
for (int i = 0; i < pointnum; i++)
{
tempY = 0;
for (int j = 0; j <pointnum; j++)
{
tempY = tempY + alpha[j] * Y[j] * gram[j][i];
}
tempY = tempY + b;
//Y[i] = tempY;
if ((tempY>0 && inpoint[i].mytrue == 1) || (tempY <= 0 && inpoint[i].mytrue == -1))
{
truecount++;
continue;
}
else
{
if (tempY > 0)
{
Y[i] = -1;
alpha[i] = alpha[i] + 1;
b = b +Y[i]*step;
}
else if (tempY <= 0)
{
Y[i] = 1;
alpha[i] = alpha[i] + 1;
b = b + Y[i] * step;
}
printf("更新a:(%d,%d,%d),b:%d\n", alpha[0], alpha[1],alpha[2], b);
break;
}
}
if (truecount == 3)
{
break;
}
else
{
truecount = 0;
}
}
for (int i = 0; i < pointnum; i++)
{
w[0] = w[0] + alpha[i] * inpoint[i].x*inpoint[i].mytrue;
w[1] = w[1] + alpha[i] * inpoint[i].y*inpoint[i].mytrue;
}
}
int main()
{
int pointnum;
printf("请输入点的个数:\n");
scanf("%d", &pointnum);
point *inpoint = new point[pointnum];
int *alpha = new int[pointnum];
for (int i = 0; i < pointnum; i++)
{
alpha[i] = 0;
}
printf("请输入样本点的坐标及样本种类,用正负1表示\n");
for (int i = 0; i < pointnum; i++)
{
int tempx, tempy, temptrue;
scanf("%d%d%d", &tempx, &tempy, &temptrue);
inpoint[i].givepoint(tempx, tempy, temptrue);
}
int **gram = new int *[pointnum];
for (int i = 0; i < pointnum; i++)
{
gram[i] = new int[pointnum];
}
for (int i = 0; i < pointnum; i++)
{
for (int j = 0; j < pointnum; j++)
{
gram[i][j] = inpoint[i].x*inpoint[j].x + inpoint[i].y*inpoint[j].y;
}
}
getchar();
printf("输入完毕开始分类\n");
//minL(inpoint, pointnum);
Duality_minL(inpoint, pointnum, alpha, gram);
printf("最终w:(%d,%d),b:%d\n", w[0], w[1], b);
getchar();
}
这边感知器只能用于对线性的数据集进行二分类,而且因为样本量较少 所以能很快并且无误地将样本的正负集区分开来,当样本量多时可能无法全部无误地区分,这是应该要用到循环次数和损失函数;
暂且先这样,之后再改一下在mnist数据集上进行尝试