SVHN数据集识别(Pytorch)

SVHN数据集介绍

SVHN数据集是摘自Google街景图像中的门牌号,其风格与MNIST相似。其中包含了10个类别,数字1~9对应标签1~9,而“0”的标签则为10。其中训练集有73257张图像,测试集有26032张图像。

1

Pytorch识别SVHN数据集

模块导入
1
2
3
4
5
6
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets,transforms
from torch.utils.data import DataLoader
import torch.optim as optim
参数定义

其中Resize是对图像大小重新设定,ColorJitter可以改变图像的属性,例如亮度、对比度、饱和度和色调,RandomRotation可以使得图像在设定的角度范围内随机旋转。

1
2
3
4
5
6
7
8
9
batch_size = 16
epochs = 5
transforms = transforms.Compose([
transforms.Resize((128,128)),
transforms.ColorJitter(0.3,0.3,0.2),
transforms.RandomRotation(5),
transforms.ToTensor(),
transforms.Normalize([0.485,0.465,0.406],[0.229,0.224,0.225])
])
加载数据集
1
2
3
4
train_set = datasets.SVHN("data_svhn","train",download=True,transform=transforms)
test_set = datasets.SVHN("data_svhn","test",download=True,transform=transforms)
train_loader = DataLoader(train_set,batch_size=batch_size,shuffle=True)
test_loader = DataLoader(test_set,batch_size=batch_size,shuffle=True)
网络定义

卷就完事了!4这里我采用了3个卷积层,激活函数使用的是Relu,还加入了池化操作,用了一个全连接层。在进行nn.Linear操作之前要使用view将四维拉成二维,因为全连接层的输入与输出都是二维张量。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
class SVHN_Net(nn.Module):
def __init__(self):
super(SVHN_Net,self).__init__()
self.cnn = nn.Sequential(
nn.Conv2d(3,64,3,2), #64*63*63
nn.ReLU(),
nn.MaxPool2d(2), #64*31*31
nn.Conv2d(64,64,3), #64 * 29 * 29
nn.ReLU(),
nn.MaxPool2d(2), #64*14*14
nn.Conv2d(64,32,2), #32 * 13 * 13
nn.ReLU(),
nn.MaxPool2d(2) #32 * 6 * 6
)

self.fc1 = nn.Linear(32*6*6,11)

def forward(self,x):
cnn_res = self.cnn(x)
cnn_res = cnn_res.view(cnn_res.size(0),-1)
f1 = self.fc1(cnn_res)
return f1
实例化模型和优化器
1
2
3
model = SVHN_Net()
optimizer = optim.Adam(model.parameters(),0.001)
criterion = nn.CrossEntropyLoss()
模型训练
1
2
3
4
5
6
7
8
9
10
11
def train_model(model, train_loader,optimizer ,epoch):
#模型训练
model.train()
for batch_index,(data,target) in enumerate(train_loader):
optimizer.zero_grad()
output = model(data)
loss = criterion(output,target)
loss.backward()
optimizer.step()
if batch_index % 200 == 0:
print("Train Epoch : {} \t Loss : {:.6f}".format(epoch,loss.item()))
测试模型
1
2
3
4
5
6
7
8
9
10
11
12
13
def test_model(model,test_loader):
model.eval()
correct = 0.0
test_loss = 0.0
with torch.no_grad():
for data,target in test_loader:
output = model(data)
test_loss+=F.cross_entropy(output,target).item()
#找到概率值最大的下标
pred = output.max(1,keepdim=True)[1]
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print("Test ---- Average loss : {:.4f},Accuracy : {:.3f}\n".format(test_loss,100.0*correct/len(test_loader.dataset)))
调用训练和测试模型
1
2
3
for epoch in range(epochs+1):
train_model(model,train_loader,optimizer,epoch)
test_model(model,test_loader)
结果展示

….好家伙这也太低了

2

3

4

5

6

7

模型优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
class SVHN_Net(nn.Module):
def __init__(self):
super(SVHN_Net,self).__init__()
self.cnn = nn.Sequential(
nn.Conv2d(3,16,3,2), #64*63*63
nn.ReLU(),
nn.MaxPool2d(2), #64*31*31
nn.Conv2d(16,32,3), #64 * 29 * 29
nn.ReLU(),
nn.MaxPool2d(2), #64*14*14
nn.Conv2d(32,64,2), #64 * 13 * 13
nn.ReLU(),
nn.Conv2d(64,128,2), #128 * 12 * 12
nn.ReLU(),
nn.MaxPool2d(2), #128 * 6 * 6
nn.Conv2d(128,128,3), #128 *4 *4
nn.ReLU(),
nn.MaxPool2d(2), #128 *2 *2
)


self.fc1 = nn.Linear(128 *2 *2,128)
self.fc2 = nn.Linear(128,11)

def forward(self,x):
cnn_res = self.cnn(x)
cnn_res = cnn_res.view(cnn_res.size(0),-1)
f1 = self.fc1(cnn_res)
f1 = self.fc2(f1)

return f1

8

再次优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#准确率达93.4%

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets,transforms
from torch.utils.data import DataLoader
import torch.optim as optim

batch_size = 32
epochs = 100

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

transforms = transforms.Compose([
transforms.ColorJitter(0.3,0.3,0.2),
transforms.RandomRotation(5),
transforms.ToTensor(),
transforms.Normalize([0.485,0.465,0.406],[0.229,0.224,0.225])
])

train_set = datasets.SVHN("data_svhn","train",download=True,transform=transforms)
test_set = datasets.SVHN("data_svhn","test",download=True,transform=transforms)


train_loader = DataLoader(train_set,batch_size=batch_size,shuffle=True)
test_loader = DataLoader(test_set,batch_size=batch_size,shuffle=True)

print("下载完成")

class SVHN_Net(nn.Module):
def __init__(self):
super(SVHN_Net,self).__init__()
self.cnn = nn.Sequential(
nn.Conv2d(3,32,3), #32*30*30
nn.ReLU(),
nn.Conv2d(32,32,3), #32 * 28 * 28
nn.ReLU(),
nn.MaxPool2d(2), #32*14*14
nn.Dropout(0.3),
nn.Conv2d(32,64,3), #64 * 12 * 12
nn.ReLU(),
nn.Conv2d(64,128,3), #128 * 10 * 10
nn.ReLU(),
nn.MaxPool2d(2), #128*5*5
nn.Dropout(0.3),
nn.Conv2d(128,128,3), #128 *3 *3
nn.ReLU(),
nn.MaxPool2d(2), #128 *1 *1
nn.Dropout(0.4),
# nn.Conv2d(128,64,2), #64 *1 *1
nn.ReLU()
)


self.fc1 = nn.Linear(128,10)

def forward(self,x):
# print(x.shape)
cnn_res = self.cnn(x)
# print(cnn_res.shape) #32 * 128 *1 *1
cnn_res = cnn_res.view(cnn_res.size(0),-1)
# print(cnn_res.shape) #16*96
f1 = self.fc1(cnn_res)

return f1

model = SVHN_Net()
model = model.to(device)
optimizer = optim.Adam(model.parameters(),0.0001)
criterion = nn.CrossEntropyLoss()
def train_model(model, train_loader,optimizer ,epoch):
#模型训练
model.train()
for batch_index,(data,target) in enumerate(train_loader):
data,target = data.to(device),target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output,target)
loss.backward()
optimizer.step()
if batch_index % 400 == 0:
print("Train Epoch : {} \t Loss : {:.6f}".format(epoch,loss.item()))


def test_model(model,test_loader):
model.eval()
correct = 0.0
test_loss = 0.0
with torch.no_grad():
for data,target in test_loader:
data,target = data.to(device),target.to(device)
output = model(data)
test_loss+=F.cross_entropy(output,target).item()
pred = output.max(1,keepdim=True)[1]
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print("Test ---- Average loss : {:.4f},Accuracy : {:.3f}\n".format(test_loss,100.0*correct/len(test_loader.dataset)))

for epoch in range(epochs+1):
train_model(model,train_loader,optimizer,epoch)
test_model(model,test_loader)

9