0%

李宏毅ML2020Spring HW3

李宏毅2020年春季机器学习课程HW3 食物分类

数据特点是:带标签的数据量小,有大量无标签数据。

任务:

  1. 对带标签数据进行数据增强
  2. 对无标签数据采取自监督学习
1
2
3
4
5
6
7
8
9
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder

from tqdm import tqdm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

train_tfm = transforms.Compose([
transforms.Resize((128, 128)),
transforms.ToTensor(),
])
train_tfm1 = transforms.Compose([
transforms.Resize((128, 128)),
transforms.ToTensor(),
transforms.RandomErasing(p=0.8), # 注意!RandomErasing不能处理PIL图像,只能先转换为Tensor再处理
transforms.RandomRotation(degrees=30), # 随机旋转+-30度
transforms.RandomHorizontalFlip(p=0.9), # 左右翻转
])
# 对测试数据不需要进行转换
test_tfm = transforms.Compose([
transforms.Resize((128, 128)),
transforms.ToTensor(),
])
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
batch_size = 128

# 对于数据按文件分好类的情况,直接使用DatasetFolder可以方便地进行处理
# loader是读取文件时进行的操作,输入地址,输出图像
# transform是在dataset getitem()的时候进行的

train_set0 = DatasetFolder("../input/ml2021springhw3/food-11/training/labeled", loader=lambda x: Image.open(x), extensions="jpg", transform=train_tfm)
train_set1 = DatasetFolder("../input/ml2021springhw3/food-11/training/labeled", loader=lambda x: Image.open(x), extensions="jpg", transform=train_tfm1)
train_set = train_set0 + train_set1
# Dataset的+被重载了,用ConcatDataset重载的,+相当于ConcatDataset
# 将原始图像和增强过的图像拼在一起当做训练数据

valid_set = DatasetFolder("../input/ml2021springhw3/food-11/validation", loader=lambda x: Image.open(x), extensions="jpg", transform=test_tfm)
unlabeled_set = DatasetFolder("../input/ml2021springhw3/food-11/training/unlabeled", loader=lambda x: Image.open(x), extensions="jpg", transform=train_tfm)
test_set = DatasetFolder("../input/ml2021springhw3/food-11/testing", loader=lambda x: Image.open(x), extensions="jpg", transform=test_tfm)

# Construct data loaders.
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

网络结构:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
class Classifier(nn.Module):
def __init__(self):
super(Classifier, self).__init__()
# The arguments for commonly used modules:
# torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
# torch.nn.MaxPool2d(kernel_size, stride, padding)

# input image size: [3, 128, 128]
self.cnn_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, 1, 1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0),

nn.Conv2d(64, 128, 3, 1, 1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0),

nn.Conv2d(128, 256, 3, 1, 1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0),

nn.Conv2d(256, 512, 3, 1, 1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0),

nn.Conv2d(512, 1024, 3, 1, 1),
nn.BatchNorm2d(1024),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0),
)
self.fc_layers = nn.Sequential(
nn.Linear(1024 * 4 * 4, 1024),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(512, 11)
)

def forward(self, x):
x = self.cnn_layers(x)
x = x.flatten(1)
x = self.fc_layers(x)
return x

构建pseudodataset

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
class pseudo_dataset(Dataset):
def __init__(self, features, labels):
self.x = features
self.y = labels
def __getitem__(self, index):
return self.x[index], self.y[index].item(),
def __len__(self):
return len(self.x)

def get_pseudo_labels(dataset, model, threshold=0.65):
device = "cuda" if torch.cuda.is_available() else "cpu"
# 先构建无标签数据的dataloader
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
model.eval()
# 定义softmax函数
softmax = nn.Softmax(dim=-1)

features, labels = [], []

for batch in tqdm(dataloader):
img, _ = batch
# 使用 torch.no_grad() 加速前向传播的速度
with torch.no_grad():
logits = model(img.to(device))

# 对输出的logits用softmax算出概率分布
probs = softmax(logits)

maxp, pos = torch.max(probs, dim=-1) # 找到batch中每个图像的最大概率和类别
for i, Img in enumerate(img):
if maxp[i] >= threshold: # 如果每个最大概率超过阈值,则赋予其标签
features.append(Img.cpu())
labels.append(pos[i].cpu())

psedodataset = pseudo_dataset(features, labels) # 构建pseudodataset

model.train()
return psedodataset
1
2
3
4
def try_gpu(i=0):
if torch.cuda.device_count() >= i + 1:
return torch.device(f'cuda:{i}')
return torch.device('cpu')

训练

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
model = Classifier().to(device=try_gpu())

model = torch.load('./verssion1.model').to(device=try_gpu())

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-4)

n_epochs = 80

do_semi = False

lst_valid_acc = 0

for epoch in range(n_epochs):
# 根据上一个epoch的validdata的acc,大于0.6才做自监督学习
if lst_valid_acc > 0.6:
do_semi = True
else:
do_semi = False

if do_semi:
pseudo_set = get_pseudo_labels(unlabeled_set, model, threshold=0.65)
concat_dataset = ConcatDataset([train_set, pseudo_set])
# 注意:DataLoader里面的Dataset一定不能放进GPU中,否则多线程时候就会出错!!
train_loader = DataLoader(concat_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
else:
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
# ---------- Training ----------
model.train()

train_loss = []
train_accs = []

for batch in tqdm(train_loader):
imgs, labels = batch
logits = model(imgs.to(device=try_gpu()))
loss = criterion(logits, labels.to(device=try_gpu()))
optimizer.zero_grad()
loss.backward()

# 进行梯度剪裁
grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

optimizer.step()

# 计算这一个batch的准确率
acc = (logits.argmax(dim=-1) == labels.to(device=try_gpu())).float().mean()

# 记录loss和axx
train_loss.append(loss.item())
train_accs.append(acc)

# 平均loss和acc
train_loss = sum(train_loss) / len(train_loss)
train_acc = sum(train_accs) / len(train_accs)

print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

# ---------- Validation ----------
model.eval()

valid_loss = []
valid_accs = []

for batch in tqdm(valid_loader):

imgs, labels = batch

with torch.no_grad():
logits = model(imgs.to(device=try_gpu()))

loss = criterion(logits, labels.to(device=try_gpu()))

acc = (logits.argmax(dim=-1) == labels.to(device=try_gpu())).float().mean()

valid_loss.append(loss.item())
valid_accs.append(acc)

valid_loss = sum(valid_loss) / len(valid_loss)
valid_acc = sum(valid_accs) / len(valid_accs)
lst_valid_acc = valid_acc


# Print the information.
print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")

训练完模型,对测试数据进行预测

1
2
3
4
5
6
7
8
9
10
11
12
13
14
model.eval()

predictions = []

for batch in tqdm(test_loader):
imgs, labels = batch
with torch.no_grad():
logits = model(imgs.to(device=try_gpu()))
predictions.extend(logits.argmax(dim=-1).cpu().numpy().tolist())

with open("./predict.csv", "w") as f:
f.write("Id,Category\n")
for i, pred in enumerate(predictions):
f.write(f"{i},{pred}\n")

最终提交后private的准确率仅来到了0.70412,离strong baseline还有很远距离,但实在没有心情train它了。

train不起来真难受555.

-------------本文结束感谢阅读-------------