MNIST Introduction:http://yann.lecun.com/exdb/mnist/
MNIST Code:https://pytorch.org/vision/stable/generated/torchvision.datasets.MNIST.html#torchvision.datasets.MNIST
torchvision.datasets.MNIST(root, train, transform, target_transform, download)
Parameters:
MNIST/raw/train-images-idx3-ubyte
and MNIST/raw/t10k-images-idx3-ubyte
exist.train-images-idx3-ubyte
, otherwise from t10k-images-idx3-ubyte
.# 加载MNIST数据集 len(train_dataset): 训练集5000 验证集1000 测试集1000
train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [50000, 10000])
test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)# 数据加载器
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
# 定义神经网络
class Model(nn.Module):def __init__(self):super(Model, self).__init__()self.conv1 = nn.Conv2d(1, 10, kernel_size=5)self.conv2 = nn.Conv2d(10, 20, kernel_size=5)self.pool = nn.MaxPool2d(2)self.fc1 = nn.Linear(320, 50)self.fc2 = nn.Linear(50, 10)def forward(self, x):x = self.pool(nn.functional.relu(self.conv1(x)))x = self.pool(nn.functional.relu(self.conv2(x)))x = x.view(-1, 320)x = nn.functional.relu(self.fc1(x))x = self.fc2(x)return nn.functional.log_softmax(x, dim=1)# 调用网络
model = Model()
Additions:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")if torch.cuda.is_available():device = torch.device("cuda")print("Using GPU:", torch.cuda.get_device_name())
else:device = torch.device("cpu")print("Using CPU")# model
model.to(device)
# training + testing
inputs, labels = inputs.to(device), labels.to(device) # 加载到GPU
# 损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
# 训练模型
def train(model, train_loader, val_loader, optimizer, criterion, num_epochs):train_losses = []val_losses = []val_accs = []for epoch in range(num_epochs):train_loss = 0.0model.train()for i, (inputs, labels) in enumerate(train_loader):optimizer.zero_grad()outputs = model(inputs)loss = criterion(outputs, labels)loss.backward()optimizer.step()train_loss += loss.item()train_losses.append(train_loss / (i + 1))val_loss = 0.0val_acc = 0.0model.eval()with torch.no_grad():for i, (inputs, labels) in enumerate(val_loader):inputs, labels = inputs.to(device), labels.to(device) # 加载到GPUoutputs = model(inputs)loss = criterion(outputs, labels)val_loss += loss.item()_, preds = torch.max(outputs, 1)val_acc += torch.sum(preds == labels.data)val_losses.append(val_loss / (i + 1))val_accs.append(val_acc / len(val_dataset))print('Epoch: {}, Train Loss: {:.4f}, Val Loss: {:.4f}, Val Acc: {:.4f}'.format(epoch + 1, train_losses[-1],val_losses[-1], val_accs[-1]))return train_losses# 训练
train_losses = train(model, train_loader, val_loader, optimizer, criterion, num_epochs=10)
# 测试模型
model.eval()
test_loss = 0.0
test_acc = 0.0
with torch.no_grad():for inputs, labels in test_loader:inputs, labels = inputs.to(device), labels.to(device) # 加载到GPUoutputs = model(inputs)loss = criterion(outputs, labels)test_loss += loss.item()_, preds = torch.max(outputs, 1)test_acc += torch.sum(preds == labels.data)
test_loss /= len(test_loader)
test_acc /= len(test_dataset)
print('Test Loss: {:.4f}, Test Acc: {:.4f}'.format(test_loss, test_acc))
# 可视化训练过程中的损失值
plt.plot(train_losses)
plt.xlabel('Epoch')
plt.ylabel('Train Loss')
plt.show() # 训练损失逐渐下降,并且在训练集和验证集上的损失越来越接近,表明模型已经开始收敛
# 保存模型
torch.save(model.state_dict(), './cnn_mnist.pth')
# 加载模型
model = Model()
model.load_state_dict(torch.load('./cnn_mnist.pth'))
# 打印模型结构
print(model)
# 打印模型参数
# for name, param in model.named_parameters():
# if param.requires_grad:
# print(name, param.data)
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader# 加载MNIST数据集 len(train_dataset): 训练集5000 验证集1000 测试集1000
train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [50000, 10000])
test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)# 数据加载器
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)# 定义神经网络
class Model(nn.Module):def __init__(self):super(Model, self).__init__()self.conv1 = nn.Conv2d(1, 10, kernel_size=5)self.conv2 = nn.Conv2d(10, 20, kernel_size=5)self.pool = nn.MaxPool2d(2)self.fc1 = nn.Linear(320, 50)self.fc2 = nn.Linear(50, 10)def forward(self, x):x = self.pool(nn.functional.relu(self.conv1(x)))x = self.pool(nn.functional.relu(self.conv2(x)))x = x.view(-1, 320)x = nn.functional.relu(self.fc1(x))x = self.fc2(x)return nn.functional.log_softmax(x, dim=1)# 调用网络
model = Model()device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")if torch.cuda.is_available():device = torch.device("cuda")print("Using GPU:", torch.cuda.get_device_name())
else:device = torch.device("cpu")print("Using CPU")model.to(device)# 损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)# 训练模型
def train(model, train_loader, val_loader, optimizer, criterion, num_epochs):train_losses = []val_losses = []val_accs = []for epoch in range(num_epochs):train_loss = 0.0model.train()for i, (inputs, labels) in enumerate(train_loader):inputs, labels = inputs.to(device), labels.to(device) # 加载到GPUoptimizer.zero_grad()outputs = model(inputs)loss = criterion(outputs, labels)loss.backward()optimizer.step()train_loss += loss.item()train_losses.append(train_loss / (i + 1))val_loss = 0.0val_acc = 0.0model.eval()with torch.no_grad():for i, (inputs, labels) in enumerate(val_loader):inputs, labels = inputs.to(device), labels.to(device) # 加载到GPUoutputs = model(inputs)loss = criterion(outputs, labels)val_loss += loss.item()_, preds = torch.max(outputs, 1)val_acc += torch.sum(preds == labels.data)val_losses.append(val_loss / (i + 1))val_accs.append(val_acc / len(val_dataset))print('Epoch: {}, Train Loss: {:.4f}, Val Loss: {:.4f}, Val Acc: {:.4f}'.format(epoch + 1, train_losses[-1],val_losses[-1], val_accs[-1]))return train_losses# 训练
train_losses = train(model, train_loader, val_loader, optimizer, criterion, num_epochs=10)# 测试模型
model.eval()
test_loss = 0.0
test_acc = 0.0
with torch.no_grad():for inputs, labels in test_loader:inputs, labels = inputs.to(device), labels.to(device) # 加载到GPUoutputs = model(inputs)loss = criterion(outputs, labels)test_loss += loss.item()_, preds = torch.max(outputs, 1)test_acc += torch.sum(preds == labels.data)
test_loss /= len(test_loader)
test_acc /= len(test_dataset)
print('Test Loss: {:.4f}, Test Acc: {:.4f}'.format(test_loss, test_acc))# 可视化训练过程中的损失值
plt.plot(train_losses)
plt.xlabel('Epoch')
plt.ylabel('Train Loss')
plt.show() # 训练损失逐渐下降,并且在训练集和验证集上的损失越来越接近,表明模型已经开始收敛# 保存模型
torch.save(model.state_dict(), './cnn_mnist.pth')
# 加载模型
model = Model()
model.load_state_dict(torch.load('./cnn_mnist.pth'))
# 打印模型结构
print(model)
# 打印模型参数
# for name, param in model.named_parameters():
# if param.requires_grad:
# print(name, param.data)
Using GPU: NVIDIA GeForce GTX 1050 Ti
Epoch: 1, Train Loss: 1.0488, Val Loss: 0.3327, Val Acc: 0.8924
Epoch: 2, Train Loss: 0.2292, Val Loss: 0.2650, Val Acc: 0.9176
Epoch: 3, Train Loss: 0.1465, Val Loss: 0.1260, Val Acc: 0.9621
Epoch: 4, Train Loss: 0.1132, Val Loss: 0.1200, Val Acc: 0.9623
Epoch: 5, Train Loss: 0.0953, Val Loss: 0.0961, Val Acc: 0.9698
Epoch: 6, Train Loss: 0.0837, Val Loss: 0.1250, Val Acc: 0.9613
Epoch: 7, Train Loss: 0.0746, Val Loss: 0.0734, Val Acc: 0.9769
Epoch: 8, Train Loss: 0.0671, Val Loss: 0.0719, Val Acc: 0.9765
Epoch: 9, Train Loss: 0.0621, Val Loss: 0.0730, Val Acc: 0.9772
Epoch: 10, Train Loss: 0.0570, Val Loss: 0.0606, Val Acc: 0.9809
Test Loss: 0.0527, Test Acc: 0.9835
Model((conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))(conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))(pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)(fc1): Linear(in_features=320, out_features=50, bias=True)(fc2): Linear(in_features=50, out_features=10, bias=True)
)