【数据挖掘与商务智能决策】第十章支持向量机

文章列表

1. 线性可分SVM

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

1.1 生成模拟数据

# 导入sklearn模拟二分类数据生成模块
from sklearn.datasets import make_blobs
# 生成模拟二分类数据集
X, y =  make_blobs(n_samples=150, n_features=2, centers=2, cluster_std=1.2, random_state=40)
# 设置颜色参数
colors = {0:'r', 1:'g'}
# 绘制二分类数据集的散点图
plt.scatter(X[:,0], X[:,1], marker='o', c=pd.Series(y).map(colors))
plt.show();

【数据挖掘与商务智能决策】第十章支持向量机

# 将标签转换为1/-1
y_ = y.copy()
y_[y_==0] = -1
y_ = y_.astype(float)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y_, test_size=0.3, random_state=43)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(105, 2) (105,) (45, 2) (45,)

1.2 线性可分支持向量机

# 导入sklearn线性SVM分类模块
from sklearn.svm import LinearSVC
# 创建模型实例
clf = LinearSVC(random_state=0, tol=1e-5)
# 训练
clf.fit(X_train, y_train)
# 预测
y_pred = clf.predict(X_test)

from sklearn.metrics import accuracy_score
# 计算测试集准确率
print(accuracy_score(y_test, y_pred))

1.0

from matplotlib.colors import ListedColormap 绘制线性可分支持向量机决策边界图
def plot_classifer(model, X, y):# 超参数边界x_min = -7x_max = 12y_min = -12y_max = -1step = 0.05# meshgridxx, yy = np.meshgrid(np.arange(x_min, x_max, step),np.arange(y_min, y_max, step))# 模型预测z = model.predict(np.c_[xx.ravel(), yy.ravel()])# 定义color mapcmap_light = ListedColormap(['#FFAAAA', '#AAFFAA'])cmap_bold = ListedColormap(['#FF0000', '#003300'])z = z.reshape(xx.shape)plt.figure(figsize=(8, 5), dpi=96)plt.pcolormesh(xx, yy, z, cmap=cmap_light)plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)plt.show()

plot_classifer(clf, X_train, y_train)

【数据挖掘与商务智能决策】第十章支持向量机

plot_classifer(clf, X_test, y_test)

【数据挖掘与商务智能决策】第十章支持向量机

2. 广义线性可分SVM

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

2.1 生成模拟数据

mean1, mean2 = np.array([0, 2]), np.array([2, 0])
covar = np.array([[1.5, 1.0], [1.0, 1.5]])
X1 = np.random.multivariate_normal(mean1, covar, 100)
y1 = np.ones(X1.shape[0])
X2 = np.random.multivariate_normal(mean2, covar, 100)
y2 = -1 * np.ones(X2.shape[0])
X_train = np.vstack((X1[:80], X2[:80]))
y_train = np.hstack((y1[:80], y2[:80]))
X_test = np.vstack((X1[80:], X2[80:]))
y_test = np.hstack((y1[80:], y2[80:]))
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
print(X_train.shape)

(160, 2) (160,) (40, 2) (40,)
(160, 2)

# 设置颜色参数
colors = {1:'r', -1:'g'}
# 绘制二分类数据集的散点图
plt.scatter(X_train[:,0], X_train[:,1], marker='o', c=pd.Series(y_train).map(colors))
plt.show();

【数据挖掘与商务智能决策】第十章支持向量机

2.2 广义线性可分支持向量机

from sklearn import svm
# 创建svm模型实例
clf = svm.SVC(kernel='linear')
# 模型拟合
clf.fit(X_train, y_train)
# 模型预测
y_pred = clf.predict(X_test)

from sklearn.metrics import accuracy_score
# 计算测试集准确率
print(accuracy_score(y_test, y_pred))
# 计算测试集准确率
print('Accuracy of soft margin svm based on sklearn: ', accuracy_score(y_test, y_pred))

1.0
Accuracy of soft margin svm based on sklearn:  1.0

2.3 结果可视化

def plot_classifier(X1_train, X2_train, clf):plt.plot(X1_train[:,0], X1_train[:,1], "ro")plt.plot(X2_train[:,0], X2_train[:,1], "go")plt.scatter(clf.support_vectors_[:,0], clf.support_vectors_[:,1], s=100, c="r", edgecolors="b", label="support vector")X1, X2 = np.meshgrid(np.linspace(-4,4,50), np.linspace(-4,4,50))X = np.array([[x1, x2] for x1, x2 in zip(np.ravel(X1), np.ravel(X2))])Z = clf.decision_function(X).reshape(X1.shape)plt.contour(X1, X2, Z, [0.0], colors='k', linewidths=1, origin='lower')plt.contour(X1, X2, Z + 1, [0.0], colors='grey', linewidths=1, origin='lower')plt.contour(X1, X2, Z - 1, [0.0], colors='grey', linewidths=1, origin='lower')plt.legend()plt.show()plot_classifier(X_train[y_train==1], X_train[y_train==-1], clf)

【数据挖掘与商务智能决策】第十章支持向量机

3. 非线性SVM

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

3.1 生成模拟数据

mean1, mean2 = np.array([-1, 2]), np.array([1, -1])
mean3, mean4 = np.array([4, -4]), np.array([-4, 4])
covar = np.array([[1.0, 0.8], [0.8, 1.0]])
X1 = np.random.multivariate_normal(mean1, covar, 50)
X1 = np.vstack((X1, np.random.multivariate_normal(mean3, covar, 50)))
y1 = np.ones(X1.shape[0])
X2 = np.random.multivariate_normal(mean2, covar, 50)
X2 = np.vstack((X2, np.random.multivariate_normal(mean4, covar, 50)))
y2 = -1 * np.ones(X2.shape[0])
X_train = np.vstack((X1[:80], X2[:80]))
y_train = np.hstack((y1[:80], y2[:80]))
X_test = np.vstack((X1[80:], X2[80:]))
y_test = np.hstack((y1[80:], y2[80:]))
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(160, 2) (160,) (40, 2) (40,)

# 设置颜色参数
colors = {1:'r', -1:'g'}
# 绘制二分类数据集的散点图
plt.scatter(X_train[:,0], X_train[:,1], marker='o', c=pd.Series(y_train).map(colors))
plt.show();

【数据挖掘与商务智能决策】第十章支持向量机

3.2 非线性SVM

from sklearn import svm
# 创建svm模型实例
clf = svm.SVC(kernel='rbf')
# 模型拟合
clf.fit(X_train, y_train)

SVC()

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

SVC

SVC()

# 导入sklearn准确率评估函数
from sklearn.metrics import accuracy_score
# 模型预测
y_pred = clf.predict(X_test)
# 计算测试集准确率
print('Accuracy of soft margin svm based on cvxopt: ', accuracy_score(y_test, y_pred))

Accuracy of soft margin svm based on cvxopt:  1.0

3.3 结果可视化

 绘制非线性可分支持向量机
def plot_classifier(X1_train, X2_train, clf):plt.plot(X1_train[:,0], X1_train[:,1], "ro")plt.plot(X2_train[:,0], X2_train[:,1], "go")plt.scatter(clf.support_vectors_[:,0], clf.support_vectors_[:,1], s=100, c="r", edgecolors="b", label="support vector")X1, X2 = np.meshgrid(np.linspace(-4,4,50), np.linspace(-4,4,50))X = np.array([[x1, x2] for x1, x2 in zip(np.ravel(X1), np.ravel(X2))])Z = clf.decision_function(X).reshape(X1.shape)plt.contour(X1, X2, Z, [0.0], colors='k', linewidths=1, origin='lower')plt.contour(X1, X2, Z + 1, [0.0], colors='grey', linewidths=1, origin='lower')plt.contour(X1, X2, Z - 1, [0.0], colors='grey', linewidths=1, origin='lower')plt.legend()plt.show()plot_classifier(X_train[y_train==1], X_train[y_train==-1], clf)

【数据挖掘与商务智能决策】第十章支持向量机

4. SVR

import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

4.1 生成模拟数据

np.random.seed(0)
X = np.sort(np.random.uniform(0,6,50),axis=0)
y = 2*np.sin(X)+0.1*np.random.randn(50)
X = X.reshape(-1,1)

4.2 SVR建模

from sklearn import svm

svr_rbf = svm.SVR(kernel='rbf',gamma=0.4,C=100)
svr_rbf.fit(X,y)

SVR(C=100, gamma=0.4)

SVR

SVR(C=100, gamma=0.4)

svr_linear = svm.SVR(kernel='linear',C=100)
svr_linear.fit(X,y)

SVR(C=100, kernel='linear')

SVR

SVR(C=100, kernel='linear')

svr_poly = svm.SVR(kernel='poly',degree=3,C=100)
svr_poly.fit(X,y)

SVR(C=100, kernel='poly')

SVR

SVR(C=100, kernel='poly')

4.3 生成测试数据集

X_test = np.linspace(X.min(), 1.5*X.max(), 50)
np.random.seed(0)
y_test = 2*np.sin(X_test) + 0.1*np.random.randn(50)
X_test=X_test.reshape(-1,1)

4.4 预测并可视化

y_rbf = svr_rbf.predict(X_test)
y_linear = svr_linear.predict(X_test)
y_poly = svr_poly.predict(X_test)
sp = svr_rbf.support_
plt.figure(figsize=(9, 8),facecolor='w')
plt.scatter(X[sp], y[sp], s=200, c='r', marker='*', label='RBF Support Vectors')
plt.plot(X_test, y_rbf, 'r-', linewidth=2, label='RBF Kernel')
plt.plot(X_test, y_linear, 'g-', linewidth=2, label='Linear Kernel')
plt.plot(X_test, y_poly, 'b-', linewidth=2, label='Polynomial Kernel')
plt.plot(X, y, 'ks', markersize=5, label='train data')
plt.plot(X_test, y_test, 'mo', markersize=5, label='test data')
plt.legend(loc='lower left')
plt.title('SVR', fontsize=16)
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True)
plt.show()

【数据挖掘与商务智能决策】第十章支持向量机

【数据挖掘与商务智能决策】第十章支持向量机

1. 线性可分SVM

1.1 生成模拟数据

1.2 线性可分支持向量机

2. 广义线性可分SVM

2.1 生成模拟数据

2.2 广义线性可分支持向量机

2.3 结果可视化

3. 非线性SVM

3.1 生成模拟数据

3.2 非线性SVM

3.3 结果可视化

4. SVR

4.1 生成模拟数据

4.2 SVR建模

4.3 生成测试数据集

4.4 预测并可视化

公告

DeepSeek全套部署资料免费下载

免费可商用字体批量下载

标签

【数据挖掘与商务智能决策】第十章 支持向量机

1. 线性可分SVM

1.1 生成模拟数据

1.2 线性可分支持向量机

2. 广义线性可分SVM

2.1 生成模拟数据

2.2 广义线性可分支持向量机

2.3 结果可视化

3. 非线性SVM

3.1 生成模拟数据

3.2 非线性SVM

3.3 结果可视化

4. SVR

4.1 生成模拟数据

4.2 SVR建模

4.3 生成测试数据集

4.4 预测并可视化

相关问题

公告

DeepSeek全套部署资料免费下载

免费可商用字体批量下载

标签

【数据挖掘与商务智能决策】第十章支持向量机