pytorch基本用法
数据集和数据加载器
数据集
- 提供了一种方式获取数据及其label
 以官方文档中的数据集为例1 
 2
 3
 4
 5
 6
 7dataset 
 ├── train // 训练集
 │ ├── ants // label目录,有对应图片
 │ └── bees
 └── val // 测试集
 ├── ants
 └── bees
- 为网络提供不同的数据形式,例如batch
- 使用dataclass封装数据集使用torchvision提供的数据集1 
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24# 导入模块及获取help 
 from torch.utils.data import Dataset
 Dataset??
 import torch
 import cv2
 class Mydata(Dataset):
 def __init__(self, root_dir, label_dir):
 self.root_dir = root_dir
 self.label_dir = label_dir
 self.path = os.path.join(self.root_dir, self.label_dir)
 self.img_path = os.listdir(self.path)
 def __getitem__(self, idx):
 img_name = self.img_path[idx]
 img_item_path = os.path.join(self.root_dir, self.label_dir, img_name)
 img = cv2.imread(img_item_path, cv2.IMREAD_COLOR)
 img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 label = self.label_dir
 return img, label
 def __len__(self):
 return len(self.img_path)
 ants_dataset = Mydata("dataset/train/", "ants")
 bees_dataset = Mydata("dataset/train/", "bees")
 train_data = ants_dataset + bees_dataset数据加载器的使用1 
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11# 数据集tranform操作 
 dataset_transform = torchvision.transforms.Compose([
 torchvision.transforms.ToTensor(),
 ])
 # 会自动下载
 train_set = torchvision.datasets.CIFAR10(root="./CIFAR10", train=True, transform=dataset_transform, download=True)
 test_set = torchvision.datasets.CIFAR10(root="./CIFAR10", train=False, transform=dataset_transform, download=True)
 # 输出包含一个图片和数字的元组
 print(test_set[0])
 # 打印数字对应的类别
 print(test_set.classes)1 
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23import torchvision 
 from torch.utils.data import DataLoader
 from torch.utils.tensorboard import SummaryWriter
 dataset_transform = torchvision.transforms.Compose([
 torchvision.transforms.ToTensor(),
 ])
 test_set = torchvision.datasets.CIFAR10(root="./CIFAR10", train=False, transform=dataset_transform, download=False)
 # batch_size 将数据集分块,每块有多少个图像
 # shuffle 是否打乱数据 True 会使每轮迭代batch不同
 # num_workers 并行数
 # drop_last 当数据整数不能被batch_size整除时,是否丢弃最后一个小于batch_size的batch
 test_loader = DataLoader(dataset=test_set, batch_size=4, shuffle=True, num_workers=0, drop_last=False)
 writer = SummaryWriter("logs")
 # 模拟进行两轮的迭代
 for epoch in range(2):
 step=0
 for data in test_loader:
 imgs, targets = data
 writer.add_images(f"Epoch {epoch}", imgs, step)
 step+=1
 writer.close()Tensorboard的使用生成后打开tensorboard webserver1 
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15from torch.utils.tensorboard import SummaryWriter 
 import numpy as np
 import cv2
 # 设置log输出目录
 writer = SummaryWriter("logs")
 img = cv2.imread(r"moddataset/train/bees_image/16838648_415acd9e3f.jpg")
 img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 print(type(img))
 print(img.shape)
 # 绘制图像
 writer.add_image("test", img, 2, dataformats="HWC")
 # 绘制y=x的图像
 for i in range(100):
 writer.add_scalar("y=x", i, i)
 writer.close()打开后每次运行程序,回到浏览器刷新,结果就会有变化。1 tensorboard --logdir=logs --port=6006 torchvision中的transform使用transform将PIL.Image对象或numpy.ndarray对象转为tensor对象使用transform将图像进行标准化处理1 
 2
 3
 4
 5
 6
 7
 8
 9
 10from torchvision import transforms 
 from PIL import Image
 img_pil = Image.open("moddataset/train/ants_image/0013035.jpg")
 totensor = transforms.ToTensor()
 img_tensor = totensor(img_pil)
 print(type(img_tensor))
 print(img_tensor.shape)
 标准化计算公式其中为均值, 为标准差。 
 标准化可过滤图像中不必要的信息,如亮度。从而加快训练。使用tranform进行大小变换1 
 2
 3# 设置均值和标准差 
 normalize = transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
 img_normalize = normalize(img_tensor)使用transform.Compose合并以上两步转换操作1 
 2
 3
 4
 5
 6resize = transforms.Resize((500, 500)) 
 # PIL -> PIL
 # 现在可以直接传入tensor,这里作为Compose的例子
 img_resize = resize(img_pil)
 # PIL -> tensor
 img_resize = totensor(img_resize)transforms源代码中有较为详细的文档,不再进行记录1 
 2
 3
 4
 5resize_totensor = transforms.Compose([ 
 transforms.Resize((500, 500)),
 transforms.ToTensor()
 ])
 img_resize_tensor = resize_totensor(img_tensor)神经网络的基本骨架1 
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15from torch import nn 
 import torch
 # nn.Module是模型的基类,所有模型都继承它
 class Model(nn.Module):
 def __init__(self):
 super().__init__()
 # 必须重写forward函数
 def forward(self, input):
 output = input + 1
 return output
 m = Model()
 x = torch.tensor(1.0)
 output = m(x)
 print(output)卷积操作1 
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22import torch.nn.functional as F 
 import torch
 input = torch.tensor([[1, 2, 0, 3, 1],
 [0, 1, 2, 3, 1],
 [1, 2, 1, 0, 0],
 [5, 2, 3, 1, 1],
 [2, 1, 0, 1, 1]])
 kernel = torch.tensor([[1, 2, 1],
 [0,1, 0],
 [2, 1, 0]])
 # 对输入tensor进行reshap (minibatch, channels, H, W)
 input = torch.reshape(input, (1, 1, 5, 5))
 kernel = torch.reshape(kernel, (1, 1, 3, 3))
 print(input.shape)
 print(kernel.shape)
 # 指定步长进行卷积
 output = F.conv2d(input, kernel, stride=1)
 print(output)
 # 进行全零的边界填充,长度为1
 output1 = F.conv2d(input, kernel, stride=1, padding=1)
 print(output1)卷积层1 
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36import torch 
 import torchvision
 from torch import nn
 from torch.nn import Conv2d
 from torch.utils.data import DataLoader
 from torch.utils.tensorboard import SummaryWriter
 dataset = torchvision.datasets.CIFAR10("./CIFAR10", train=False, transform=torchvision.transforms.ToTensor(), download=False)
 dataloader = DataLoader(dataset, batch_size=64)
 class Model(nn.Module):
 def __init__(self):
 super().__init__()
 # 给模型添加卷积层
 self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)
 def forward(self, x):
 x = self.conv1(x)
 return x
 m = Model()
 writer = SummaryWriter("./logs")
 step = 0
 for data in dataloader:
 imgs, targets = data
 output = m(imgs)
 print(output.shape)
 writer.add_images("input", imgs, step)
 # 对输出reshape才能放进writer
 output = torch.reshape(output, (-1, 3, 30, 30))
 # 只能添加channel数为1的灰色图,和channel数为3的RGC彩色图
 # 卷积输出通道为6,所以要reshape
 writer.add_images("output", output, step)
 step+=1
 writer.close()池化层添加池化层
 池化层可以保留数据特征,并给tensor降维,减小数据量,提高训练速度。1 
 2
 3
 4
 5# 池化层默认的步长等于池化核的大小,ceil表示向上取整,设置为True 
 # 会保留小于kernel_size的矩阵
 # 池化要求数据类型为浮点类型,可为tensor指定dtype=torch.float32
 # 池化层不改变图像通道数
 self.maxpool1 = nn.MaxPool2d(kernel_size=3, ceil_mode=True)非线性激活函数ReLU函数其他激活函数与ReLU函数的用法基本相同,以下待补充其他函数的解析式子和作用。1 
 2
 3
 4
 5
 6
 7
 8
 9class Model(nn.Module): 
 def __init__(self):
 super().__init__()
 # inplace 指定是否替换原tensor, 默认不替换
 self.relu = nn.ReLU(inplace=False)
 def forward(self, x):
 output = self.relu(x)
 return output线性层1 
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13# 指定输入特征和输出特征 
 self.linear1 = nn.Linear(196608, 10)
 m = Model()
 for data in dataloader:
 imgs, targets = data
 print(imgs.shape)
 # output = torch.reshape(imgs, (1, 1, 1, -1))
 # 对图像进行展平
 output = torch.flatten(imgs)
 print(output.shape)
 output = m(output)
 print(output.shape)Seqential类似tranforms.Compose,对多个层进行合并。1 
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34import torch 
 from torch.utils.tensorboard import SummaryWriter
 from torch import nn
 class Model(nn.Module):
 def __init__(self):
 super().__init__()
 self.model1 = nn.Sequential(
 nn.Conv2d(3, 32, 5, padding=2),
 nn.MaxPool2d(2),
 nn.Conv2d(32, 32, 5, padding=2),
 nn.MaxPool2d(2),
 nn.Conv2d(32, 64, 5, padding=2),
 nn.MaxPool2d(2),
 nn.Flatten(),
 nn.Linear(1024, 64),
 nn.Linear(64, 10),
 )
 def forward(self, x):
 out = self.model1(x)
 return out
 m = Model()
 print(m)
 input1 = torch.ones((64, 3, 32, 32))
 output = m(input1)
 print(output.shape)
 writer = SummaryWriter("./logs/")
 writer.add_graph(m, input1)
 writer.close()常用损失函数1 
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22import torch 
 from torch import nn
 inputs = torch.tensor([1, 2, 3], dtype=torch.float32)
 targets = torch.tensor([1, 2, 5], dtype=torch.float32)
 inputs = torch.reshape(inputs, (1, 1, 1, 3))
 targets = torch.reshape(targets, (1, 1, 1, 3))
 loss = nn.L1Loss(reduction='sum')
 result = loss(inputs, targets)
 print(result)
 loss_mse = nn.MSELoss()
 result_mse = loss_mse(inputs, targets)
 x = torch.tensor([0.1, 0.2, 0.3])
 y = torch.tensor([1])
 x = torch.reshape(x, (1, 3))
 loss_cross = nn.CrossEntropyLoss()
 result_cross = loss_cross(x, y)
 print(result_cross)反向传播与优化器1 
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47import torch 
 from torch.utils.tensorboard import SummaryWriter
 from torch.utils.data import DataLoader
 from torch import nn
 import torchvision
 dataset = torchvision.datasets.CIFAR10("./CIFAR10", train=False, transform=torchvision.transforms.ToTensor(), download=False)
 dataloader = DataLoader(dataset, batch_size=64)
 class Model(nn.Module):
 def __init__(self):
 super().__init__()
 self.model1 = nn.Sequential(
 nn.Conv2d(3, 32, 5, padding=2),
 nn.MaxPool2d(2),
 nn.Conv2d(32, 32, 5, padding=2),
 nn.MaxPool2d(2),
 nn.Conv2d(32, 64, 5, padding=2),
 nn.MaxPool2d(2),
 nn.Flatten(),
 nn.Linear(1024, 64),
 nn.Linear(64, 10),
 )
 def forward(self, x):
 out=self.model1(x)
 return out
 m = Model()
 # 创建损失函数
 loss = nn.CrossEntropyLoss()
 # 添加优化器
 optim = torch.optim.SGD(m.parameters(), lr=0.01)
 for epoch in range(20):
 running_loss = 0.0
 for imgs, targets in dataloader:
 output = m(imgs)
 result_loss = loss(output, targets)
 # 清除上个图片计算的梯度
 optim.zero_grad()
 # 反向传播计算梯度
 result_loss.backward()
 # 优化器 参数优化
 optim.step()
 running_loss = running_loss + result_loss
 print(f"Epoch: {epoch+1}, Loss: {running_loss}")现有的网络模型使用及修改1 
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11import torchvision 
 from torch import nn
 vgg16_true = torchvision.models.vgg16()
 # 末尾添加模块
 vgg16_true.add_module("add linear", nn.Linear(1000, 10))
 # 在某个tag中添加模块
 vgg16_true.classifier.add_module("add linear in classifier", nn.Linear(1000, 10))
 # 修改模块
 vgg16_true.classifier[6] = nn.Linear(4096, 10)
 print(vgg16_true)保存和加载模型1 
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16vgg16_true = torchvision.models.vgg16() 
 # 模型的结构和参数全部保存
 torch.save(vgg16_true, "vgg16_method1.pth")
 # 读出的是model类
 # 注意:当我们保存和导入自己的模型时,需要将原模型类导入到读取的python程序中
 # 可以复制源码,也可import
 model = torch.load("vgg16_method1.pth")
 # 只保存参数,不保存结构
 torch.save(vgg16_true.state_dict(), "vgg16_method2.pth")
 # 读出的是字典
 model = torch.load("vgg16_method2.pth")
 # 模型加参数
 vgg16 = torchvision.models.vgg16()
 vgg16.load_state_dict(torch.load("vgg16_method2.pth"))完整模型训练流程model.pytrain_test.py1 
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28import torch 
 from torch import nn
 class Model(nn.Module):
 def __init__(self):
 super().__init__()
 self.model1 = nn.Sequential(
 nn.Conv2d(3, 32, 5, padding=2),
 nn.MaxPool2d(2),
 nn.Conv2d(32, 32, 5, padding=2),
 nn.MaxPool2d(2),
 nn.Conv2d(32, 64, 5, padding=2),
 nn.MaxPool2d(2),
 nn.Flatten(),
 nn.Linear(1024, 64),
 nn.Linear(64, 10),
 )
 def forward(self, x):
 out = self.model1(x)
 return out
 # 在这里对模型进行基本测试
 if __name__ == '__main__':
 m = Model()
 inputs = torch.ones((64, 3, 32, 32))
 output = m(inputs)
 print(output.shape)1 
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71import torch.optim 
 import torchvision
 from torch import nn
 from torch.utils.data import DataLoader
 from torch.utils.tensorboard import SummaryWriter
 # 模型导入
 from model import Model
 # 准备数据集和数据加载器,并设置bath_size为64
 train_dataset = torchvision.datasets.CIFAR10("./CIFAR10", train=True, transform=torchvision.transforms.ToTensor(), download=False)
 test_dataset = torchvision.datasets.CIFAR10("./CIFAR10", train=False, transform=torchvision.transforms.ToTensor(), download=False)
 train_dataloader = DataLoader(train_dataset, batch_size=64)
 test_dataloader = DataLoader(test_dataset, batch_size=64)
 # 模型实例化
 m = Model()
 # 交叉熵损失函数 10分类
 loss_fn = nn.CrossEntropyLoss()
 # 学习率 标准写法
 learning_rate = 1e-2
 # 随机梯度下降优化器
 optimizer = torch.optim.SGD(m.parameters(), lr=learning_rate)
 # 统计训练步数和测试步数 总数
 total_train_step = 0
 total_test_step = 0
 # 10轮训练
 epoch = 10
 writer = SummaryWriter("./logs")
 for i in range(epoch):
 # 当模型含有DrapLast BatchNorm层时必须写此行
 # 自己的模型可以省略,最好是写上
 m.train()
 for imgs, targets in train_dataloader:
 output = m(imgs)
 loss = loss_fn(output, targets)
 optimizer.zero_grad()
 loss.backward()
 optimizer.step()
 total_train_step += 1
 if total_train_step % 100 == 0:
 print(f"Epoch {total_train_step}, Loss: {loss.item()}")
 writer.add_scalar("train_loss", loss.item(), total_train_step)
 total_test_loss = 0
 total_accuracy = 0
 # 当模型含有DrapLast BatchNorm层时必须写此行
 # 自己的模型可以省略,最好是写上
 m.eval()
 # 清空梯度,测试不需要
 with torch.no_grad():
 for imgs, targets in test_dataloader:
 output = m(imgs)
 loss = loss_fn(output, targets)
 total_test_loss += loss
 # 按第一轴取最大,求正确预测总数
 accuracy = (output.argmax(1) == targets).sum()
 total_accuracy += accuracy
 print(f"Total test Loss: {total_test_loss}")
 # 求准确率
 print(f"Total test acc: {total_accuracy/len(test_dataset)}")
 writer.add_scalar("test_acuracy", total_accuracy/len(test_dataset), total_test_step)
 writer.add_scalar("test_loss", total_test_loss, total_test_step )
 total_test_step += 1
 torch.save(m, f"torch_{i}.pth")
 writer.close()使用GPU训练方式1:方式2:1 
 2
 3
 4
 5
 6
 7
 8
 9# 对模型 数据 损失函数调用.cuda() 
 m = Model()
 m = m.cuda()
 loss_fn = nn.CrossEntropyLoss()
 loss_fn = loss_fn.cuda()
 imgs = imgs.cuda()
 targets = targets.cuda()1 
 2
 3
 4
 5
 6
 7# 对模型 数据 损失函数调用.to(device) 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = model.to(device)
 loss_fn = loss_fn.to(device)
 imgs = imgs.to(device)
 targets = targets.to(device)验证模型1 
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27from PIL import Image 
 from torchvision import transforms
 from model import Model
 import torch
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # 图片预处理
 img = Image.open("img.png")
 img = img.convert("RGB")
 transform = transforms.Compose([
 transforms.Resize((32, 32)),
 transforms.ToTensor()
 ])
 img = transform(img)
 # 模型为gpu训练,图片也放到gpu上
 img = img.to(device)
 # reshape添加一维batchsize
 img = torch.reshape(img, (1, 3, 32, 32))
 # 如果在仅有cpu的设备上使用gpu的预训练模型,需指定map_location参数
 # m = torch.load("torch_9.pth", map_location=torch.device('cpu'))
 m = torch.load("torch_9.pth")
 # 模型为gpu训练,模型也放到gpu上
 m = m.to(device)
 m.eval()
 output = m(img)
 print(output.argmax(1))
本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 Pengunix!
