본문 바로가기

SK네트웍스 Family AI캠프 10기/Daily 회고

30일차. PyTorch - Multiclass Classification

더보기

 

30일 차 회고.

 

 내일이 ADsP 시험이다. 그리고 내일 시험이 끝나면 단위 프로젝트를 시작해야 한다. 아직 데이터 분석에 대해서 잘 몰라서 좀 더 공부를 하면서 프로젝트를 진행해야 할 것 같다. 

 

 

 

 

1. PyTorch

 

 

1-1. Multiclass Classification Model

 

Random Seed 고정

import os
import random
import numpy as np
import torch

def reset_seeds(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED']
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic=True

 

Getting a dataset

from torchvision import datasets
from torchvision.transforms import ToTensor

train_dataset = datasets.FashionMNIST(
    root='download',
    train=True,
    download=True,
    transform=ToTensor()
)

test_dataset = datasets.FashionMNIST(
    root='download',
    train=False,
    download=True,
    transform=ToTensor()
)

len(train_dataset), len(test_dataset)
# (60000, 10000)
import matplotlib.pyplot as plt

feature, target = train_dataset[0]

feature.shape, target
# (torchSize([1, 28, 28]), 9)

plt.title(train_dataset.classes[target])
plt.imshow(feature.squeeze(), cmap='gray')
plt.axis(False)

plt.show()

 

DataLoader

from torch.utils.data import DataLoader

reset_seeds()
batch_size = 64

train_dataloader = DataLoader(
    train_dataset,
    batch_size,
    shuffle=True
)

test_dataloader = DataLoader(
    test_dataset,
    batch_size,
    shuffle=True
)

len(train_dataloader), len(test_dataloader)
# (938, 157)
features, targets = next(iter(train_dataloader))

features.shape, targets.shape
# (torch.Size([64, 1, 28, 28]), torch.Size([64]))

plt.imshow(features[0].squeeze(), cmap='gray')
plt.title(train_dataset.classes[targets[0]])
plt.axis(False)

plt.show()

 

Model

import torch
from torch import nn

class MultiModel(nn.Module):
    def __init__(self, input_size, output_size, hidden_size=32) -> None:
        super().__init__()
        
        self.linear_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size*2),
            nn.ReLU(),
            nn.Linear(hidden_size*2, output_size)
        )
    
    def forward(self, x):
        return self.linear_stack(x)

device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = MultiModel(28*28, len(train_dataset.classes)).to(device)
!pip install torchinfo

import torchinfo

torchinfo.summary(model, (10, 1, 28, 28))
# ==========================================================================================
# Layer (type:depth-idx)                   Output Shape              Param #
# ==========================================================================================
# MultiModel                               [10, 10]                  --
# ├─Sequential: 1-1                        [10, 10]                  --
# │    └─Flatten: 2-1                      [10, 784]                 --
# │    └─Linear: 2-2                       [10, 32]                  25,120
# │    └─ReLU: 2-3                         [10, 32]                  --
# │    └─Linear: 2-4                       [10, 64]                  2,112
# │    └─ReLU: 2-5                         [10, 64]                  --
# │    └─Linear: 2-6                       [10, 10]                  650
# ==========================================================================================
# Total params: 27,882
# Trainable params: 27,882
# Non-trainable params: 0
# Total mult-adds (Units.MEGABYTES): 0.28
# ==========================================================================================
# Input size (MB): 0.03
# Forward/backward pass size (MB): 0.01
# Params size (MB): 0.11
# Estimated Total Size (MB): 0.15
# ==========================================================================================

 

Training

from tqdm.auto import tqdm

def model_train(model, dataloader, device, loss_fn, optimizer):
    model.train()
    
    train_loss = 0
    
    for feature, target in tqdm(dataloader, desc='Train Loop', leave=False):
        feature = feature.to(device)
        target = target.to(device)
        
        pred = model(feature)
        
        loss = loss_fn(pred, target)
        train_loss += loss.cpu().item()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    return train_loss / len(dataloader)
def model_test(model, dataloader, device, loss_fn, accuracy_fn):
    model.eval()
    
    test_loss, test_accuracy = 0, 0
    
    with torch.inference_mode():
        for feature, target in tqdm(dataloader, desc='Test Loop', leave=False):
            feature = feature.to(device)
            target = target.to(device)
            
            pred = model(feature)
            
            loss = loss_fn(pred, target)
            test_loss += loss.cpu().item()
            
            pred_prob = nn.Softmax(dim=1)(pred).argmax(dim=1)
            test_accuracy += accuracy_fn(pred_prob.cpu(), target.cpu())
        
    return test_loss / len(dataloader), test_accuracy / len(dataloader)
def loss_plot(train_loss, test_loss):
    plt.plot(train_loss, label='Train Loss')
    plt.plot(test_loss, label='Test Loss')
    
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    
    plt.show()
from torch.optim import SGD

def training(model, train_dataloader, test_dataloader, loss_fn, accuracy_fn, device='cpu', optimizer=None, lr=lr, epoch=100):
    model = model.to(device)
    
    if optimizer is None:
        optimizer = SGD(params=model.parameters(), lr=lr)
    
    train_losses, test_losses = [], []
    
    for epoch in tqdm(range(epochs), desc='Epoch Loop', leave=True):
        train_loss = model_train(model, train_dataloader, device, loss_fn, optimizer)
        train_losses.append(train_loss)
        
        test_loss, test_accuracy = model_test(model, test_dataloader, device, loss_fn, accuracy_fn)
        test_losses.append(test_loss)
    
    loss_plot(train_losses, test_losses)
from helper_functions import accuracy_fn

reset_seeds()
model = MultiModel(1*28*28, len(train_dataset.classes))
loss_fn = nn.CrossEntropyLoss()

training(model, train_dataloader, test_dataloader, loss_fn, accuracy_fn, device)

# Save Model
from pathlib import Path

model_path = Path('models')
model_path.mkdir(parents=True, exist_ok=True)

save_name = 'Multiclass_Model.pth'
save_path = model_path / save_name

torch.save(obj=model.state_dict(), f=save_path)

 

Predictions

# Load Model
model = MultiModel(1*28*28, 10).to(device)
model.load_state_dict(torch.load(save_path))
import random

def select_random_samples(dataset, sample_size=9):
    random_imgs, random_labels = [], []

    for img, label in random.sample(list(dataset), sample_size):
        random_imgs.append(img)
        random_labels.append(label)
    
    return random_imgs, random_labels

def predict_plot(dataset, model, nrows=3, ncols=3):
    random_imgs, random_labels = select_random_samples(dataset, sample_size=nrows*ncols)
    class_names = dataset.classes
    
    plt.figure(figsize=(12, 9))
    
    for i, img in enumerate(random_imgs):
        plt.subplot(nrows, ncols, i+1)
        plt.imshow(img.permute(1, 2, 0), cmap='gray')
        plt.axis(False)
        
        pred = model(img.unsqueeze(dim=0).to(device))
        pred_idx = nn.Softmax(dim=1)(pred).argmax(dim=1)
        pred_label = class_names[pred_idx]
        truth_label = class_names[random_labels[i]]
        
        title = f"Prediction: {pred_label} | Truth: {truth_label}"
        if pred_label == truth_label:
            plt.title(title, c='b')
        else:
            plt.title(title, c='r')
    
    plt.show()

predict_plot(test_dataset, model)

 

Training - Early Stop

import numpy as np

class EarlyStopper(object):
    def __init__(self, trial_num, best_model_path) -> None:
        self.trial_num = trial_num
        self.now_trial = 0
        self.best_loss = np.inf
        self.best_model_path = best_model_path
    
    def get_best_model(self, device):
        return torch.load(self.best_model_path).to(device)
    
    def is_continuable(self):        
        if loss < self.best_loss:
            self.best_loss = loss
            self.now_trial = 0
            torch.save(model, self.best_model_path)
            print(f"Epoch: {epoch} | Test Loss: {loss:.4f} | Accuracy: {accuracy}")
            return True
        elif self.now_trial < self.trial_num:
            self.now_trial += 1
            return True
        else:
            return False
from torch.optim import SGD

def training(model, early_stopper:EarlyStopper, train_dataloader, test_dataloader, loss_fn, accuracy_fn, device='cpu', optimizer=None, lr=lr, epoch=100):
    model = model.to(device)
    
    if optimizer is None:
        optimizer = SGD(params=model.parameters(), lr=lr)
    
    train_losses, test_losses = [], []
    
    for epoch in tqdm(range(epochs), desc='Epoch Loop', leave=True):
        train_loss = model_train(model, train_dataloader, device, loss_fn, optimizer)
        train_losses.append(train_loss)
        
        test_loss, test_accuracy = model_test(model, test_dataloader, device, loss_fn, accuracy_fn)
        test_losses.append(test_loss)
        
        if not early_stopper.is_continuable(epoch, test_loss, test_accuracy, model):
            break
    
    loss_plot(train_losses, test_losses)
from helper_functions import accuracy_fn

reset_seeds()

model = MultiModel(1*28*28, len(train_dataset.classes))
early_stopper = EarlyStopper(trial_num=5, best_model_path="best_model.pth")
loss_fn = nn.CrossEntropyLoss()

training(model, early_stopper, train_dataloader, test_dataloader, loss_fn, accuracy_fn, device)

# Epoch: 0 | Test Loss: 0.8538 | Accuracy: 67.58558917197452
# Epoch: 1 | Test Loss: 0.6844 | Accuracy: 75.55732484076434
# Epoch: 2 | Test Loss: 0.6181 | Accuracy: 78.04538216560509
# ...
# Epoch: 21 | Test Loss: 0.4218 | Accuracy: 85.22093949044586
# Epoch: 22 | Test Loss: 0.4156 | Accuracy: 85.25079617834395
# Epoch: 25 | Test Loss: 0.3953 | Accuracy: 86.01711783439491

 

Predictions - Early Stop

# Load Model
best_model = early_stopper.get_best_model(device)
best_model.eval()

best_targets, best_preds = [], []

with torch.inference_mode():
    for feature, target in tqdm(test_dataloader, desc='Test Loop'):
        best_targets.extend(target.numpy())
        
        pred = best_model(feature.to(device))
        np_pred = pred.argmax(dim=1).cpu().numpy()
        best_preds.extend(np_pred)
from sklearn.metrics import confusion_matrix

norn_conf_mx = confusion_matrix(
    y_valid,
    pred,
    normalize='true'
)
import seaborn as sns

plt.figure(figsize=(10, 10))
sns.heatmap(norn_conf_mx, annot=True, cmap='coolwarm', linewidths=0.5)
plt.xlabel('Prediction')
plt.ylabel('Actual')

plt.show()

predict_plot(test_dataset, best_model)