xiaopingguo/torch-train-study2

100 lines
3.7 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import torch as t
import torchvision as tv
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
#错误的还未学习
########## 超参数设置 ##########
epochs = 200
learning_rate = 0.001
batch_size = 256
gpu_ids = [0, 1, 2]
########## 一、数据加载与预处理 ##########
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
trainset = tv.datasets.CIFAR10(root='./data/', train=True, download=True, transform=transform)
testset = tv.datasets.CIFAR10(root='./data/', train=False, download=True, transform=transform)
trainloader = t.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4)
testloader = t.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=4)
########## 二、定义神经网络 ##########
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
# (in_channels, out_channels, kernel_size, stride, padding)
self.conv1 = nn.Conv2d(3, 6, 5, 1, 0)
self.conv2 = nn.Conv2d(6, 16, 5, 1, 0)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), 2, 2)
x = F.max_pool2d(F.relu(self.conv2(x)), 2, 2)
x = x.view(x.size()[0], -1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
########## 三、训练神经网络 ##########
# 神经网络实例化
net = LeNet()
if (len(gpu_ids) == 0) or (not t.cuda.is_available()):
# gpu_ids列表为空, 或者没GPU可用则设置为GPU模式。
device = t.device("cpu")
print("Train Mode : CPU")
elif t.cuda.is_available() and len(gpu_ids) > 1:
# gpu_ids列表大于1表明想用多个GPU训练
device = t.device("cuda:0")
net = nn.DataParallel(net, device_ids=gpu_ids)
print("Train Mode : Multi GPU;", gpu_ids)
else:
# gpu_ids列表等于1表明想用一个GPU训练
device = t.device("cuda:" + str(gpu_ids[0]) if t.cuda.is_available() else "cpu")
print("Train Mode : One GPU;", device)
net = net.to(device)
print("\n", "##" * 10, " NetWork ", "##" * 10, "\n", net, "\n", "##" * 26, "\n")
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = t.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
# 开始训练
for epoch in range(epochs):
running_loss = 0.0
for i, (inputs, labels) in enumerate(trainloader):
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print("Epoch%03d: Training_loss = %.5f" % (epoch + 1, running_loss))
# 保存模型并进行验证
if (epoch + 1) % 10 == 0:
if len(gpu_ids) > 1:
t.save(net.module.state_dict(), "Cifar10_LeNet_Epoch" + str(epoch + 1) + ".pth")
else:
t.save(net.state_dict(), "Cifar10_LeNet_Epoch" + str(epoch + 1) + ".pth")
correct = 0
all = 0
with t.no_grad():
for (inputs, labels) in testloader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = net(inputs)
_, predicted = t.max(outputs, 1)
all += inputs.size()[0]
correct += (predicted == labels).sum().item()
print("###" * 15)
print("Epoch%03d: TestSet_Accuracy = %.3f" % (epoch + 1, correct / all))
print("###" * 15)
print("Train Done!")