## 卷积神经网络基础

import torch
b=torch.zeros(1,2)
b[0,0]=a[0,0]**2+a[0,1]
b[0,1]=a[0,1]**2+a[0,0]
out=2*b
#其参数要传入和out维度一样的矩阵
out.backward(torch.FloatTensor([[1.,2.]]))
print('input:{}'.format(a.data))
print('output:{}'.format(out.data))


output:tensor([[16., 36.]])

## 二维卷积层

### 二维互相关运算

import torch
import torch.nn as nn

def corr2d(X, K):
H, W = X.shape
h, w = K.shape
Y = torch.zeros(H - h + 1, W - w + 1)
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
Y[i, j] = (X[i: i + h, j: j + w] * K).sum()
return Y


X = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
K = torch.tensor([[0, 1], [2, 3]])
Y = corr2d(X, K)
print(Y)


[37., 43.]])

### 二维卷积层

class Conv2D(nn.Module):
def __init__(self, kernel_size):
super(Conv2D, self).__init__()
self.weight = nn.Parameter(torch.randn(kernel_size))
self.bias = nn.Parameter(torch.randn(1))

def forward(self, x):
return corr2d(x, self.weight) + self.bias


X = torch.ones(6, 8)
Y = torch.zeros(6, 7)
X[:, 2: 6] = 0
Y[:, 1] = 1
Y[:, 5] = -1
print(X)
print(Y)


[1., 1., 0., 0., 0., 0., 1., 1.],
[1., 1., 0., 0., 0., 0., 1., 1.],
[1., 1., 0., 0., 0., 0., 1., 1.],
[1., 1., 0., 0., 0., 0., 1., 1.],
[1., 1., 0., 0., 0., 0., 1., 1.]])
tensor([[ 0., 1., 0., 0., 0., -1., 0.],
[ 0., 1., 0., 0., 0., -1., 0.],
[ 0., 1., 0., 0., 0., -1., 0.],
[ 0., 1., 0., 0., 0., -1., 0.],
[ 0., 1., 0., 0., 0., -1., 0.],
[ 0., 1., 0., 0., 0., -1., 0.]])

onv2d = Conv2D(kernel_size=(1, 2))
step = 30
lr = 0.01
for i in range(step):
Y_hat = conv2d(X)
l = ((Y_hat - Y) ** 2).sum()
l.backward()
# 梯度下降

# 梯度清零
if (i + 1) % 5 == 0:
print('Step %d, loss %.3f' % (i + 1, l.item()))

print(conv2d.weight.data)
print(conv2d.bias.data)


Step 10, loss 0.448
Step 15, loss 0.052
Step 20, loss 0.006
Step 25, loss 0.001
Step 30, loss 0.000
tensor([[ 0.9969, -0.9993]])
tensor([0.0013])

## 填充和步幅

### 填充

$$(n_h+p_h-k_h+1)\times(n_w+p_w-k_w+1)$$

### 步幅

$$\lfloor(n_h+p_h-k_h+s_h)/s_h\rfloor \times \lfloor(n_w+p_w-k_w+s_w)/s_w\rfloor$$

p h = p w = p p_h = p_w = p 时，我们称填充为 p p ；当 s h = s w = s s_h = s_w = s 时，我们称步幅为 s s

## 多输入通道和多输出通道

### 1x1卷积层

1 × 1 1 \times 1 卷积核可在不改变高宽的情况下，调整通道数。 1 × 1 1 \times 1 卷积核不识别高和宽维度上相邻元素构成的模式，其主要计算发生在通道维上。假设我们将通道维当作特征维，将高和宽维度上的元素当成数据样本，那么 1 × 1 1\times 1 卷积层的作用与全连接层等价。

## 卷积层的简洁实现

• in_channels (python:int) – Number of channels in the input imag
• out_channels (python:int) – Number of channels produced by the convolution
• kernel_size (python:int or tuple) – Size of the convolving kernel
• stride (python:int or tuple, optional) – Stride of the convolution. Default: 1
• padding (python:int or tuple, optional) – Zero-padding added to both sides of the input. Default: 0
• bias (bool, optional) – If True, adds a learnable bias to the output. Default: True

forward函数的参数为一个四维张量，形状为 ( N , C i n , H i n , W i n ) (N, C_{in}, H_{in}, W_{in}) ，返回值也是一个四维张量，形状为 ( N , C o u t , H o u t , W o u t ) (N, C_{out}, H_{out}, W_{out}) ，其中 N N 是批量大小， C , H , W C, H, W 分别表示通道数、高度、宽度。

X = torch.rand(4, 2, 3, 5)
print(X.shape)

conv2d = nn.Conv2d(in_channels=2, out_channels=3, kernel_size=(3, 5), stride=1, padding=(1, 2))
Y = conv2d(X)
print('Y.shape: ', Y.shape)
print('weight.shape: ', conv2d.weight.shape)
print('bias.shape: ', conv2d.bias.shape)
print(conv2d.weight.data)


torch.Size([4, 2, 3, 5])
Y.shape: torch.Size([4, 3, 3, 5])
weight.shape: torch.Size([3, 2, 3, 5])
bias.shape: torch.Size([3])

tensor([[[[-0.1092, 0.1168, 0.1400, -0.0465, -0.0568],
[-0.1320, -0.0556, 0.0207, -0.1416, -0.0540],
[ 0.0343, -0.0288, -0.0365, -0.0165, -0.1696]],

[[-0.0617, -0.1310, -0.1756, -0.1772, 0.0684],
[ 0.0529, -0.1666, 0.0058, -0.0135, -0.1763],
[ 0.1499, -0.0324, -0.1448, -0.0840, 0.1811]]],

[[[-0.0828, 0.1220, -0.1168, -0.0692, 0.1630],
[-0.0491, -0.0292, -0.1773, -0.1622, 0.0116],
[ 0.0757, 0.1569, -0.0121, -0.0682, -0.1187]],

[[ 0.0505, 0.0952, -0.1747, -0.1405, 0.0541],
[ 0.1089, 0.0578, -0.1252, 0.1132, -0.0673],
[-0.1200, 0.1759, 0.1563, -0.1153, 0.1390]]],

[[[ 0.0374, -0.1512, 0.1735, 0.1097, -0.0539],
[-0.0002, 0.0969, 0.1322, 0.0050, 0.1296],
[ 0.0055, 0.1743, -0.1126, -0.0422, 0.1633]],

[[-0.1269, -0.1741, -0.0255, -0.1518, -0.0297],
[-0.1162, 0.0448, 0.0955, 0.0330, 0.1388],
[ 0.1559, 0.1346, 0.0025, -0.1658, 0.0995]]]])

## 池化

### 池化层的简洁实现

• kernel_size – the size of the window to take a max over
• stride – the stride of the window. Default value is kernel_size
• padding – implicit zero padding to be added on both sides

forward函数的参数为一个四维张量，形状为 ( N , C , H i n , W i n ) (N, C, H_{in}, W_{in}) ，返回值也是一个四维张量，形状为 ( N , C , H o u t , W o u t ) (N, C, H_{out}, W_{out}) ，其中 N N 是批量大小， C , H , W C, H, W 分别表示通道数、高度、宽度。

X = torch.arange(32, dtype=torch.float32).view(1, 2, 4, 4)
pool2d = nn.MaxPool2d(kernel_size=3, padding=1, stride=(2, 1))
Y = pool2d(X)
print(X)
print(Y)


tensor([[[[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.],
[12., 13., 14., 15.]],
[[16., 17., 18., 19.],
[20., 21., 22., 23.],
[24., 25., 26., 27.],
[28., 29., 30., 31.]]]])
tensor([[[[ 5., 6., 7., 7.],
[13., 14., 15., 15.]],
[[21., 22., 23., 23.],
[29., 30., 31., 31.]]]])