본문 바로가기

SK네트웍스 Family AI캠프 10기/Daily 회고

32일차. Deep Learning - Vision(CNN & Fine Tuning)

더보기

 

32일 차 회고.

 

 데이터 전처리를 아직도 어떻게 해야 할지를 모르겠다. 하나씩 추가할수록 점수가 더 낮아져서 어느 부분이 문제인지를 모르겠다.

 

 

 

 

1. Deep Learning - Vision

 

 

CNN - Data Augmentation

 

Create Transform

sample_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

sample_dataset = datasets.ImageFolder(train_dir, transform=sample_transform)

"""
features, target = sample_dataset[0]
features.shape
# torch.Size([3, 64, 64])

features_numpy = features.numpy()
features_numpy.shape
# (3, 64, 64) -> (color, height, width)

np.mean(features_numpy, axis=2).shape		# width에 대한 평균값
# (3, 64)

np.mean(features_numpy, axis=(1, 2)).shape	# (height, width)에 대한 평균값
# (3,)
"""
def calculate_norm(dataset):			# Train Dataset을 통해서만 mean, std 산출
    # dataset의 (height, width)에 대한 평균
    mean_ = np.array([np.mean(x.numpy(), axis=(1, 2)) for x, _ in dataset])
    # mean_ -> (dataset_size, color)
    mean_r = mean_[:, 0].mean()
    mean_g = mean_[:, 1].mean()
    mean_b = mean_[:, 2].mean()
    
    # dataset의 (height, width)에 대한 표준편차
    std_ = np.array(np.std(x.numpy(), axis=(1, 2)) for x, _ in dataset])
    # std_ -> (dataset_size, color)
    std_r = std_[:, 0].mean()
    std_g = std_[:, 1].mean()
    std_b = std_[:, 2].mean()
    
    return (mean_r, mean_g, mean_b), (std_r, std_g, std_b)

mean_, std_ = calculate_norm(sample_dataset)
mean_, std_
# ((0.5354544, 0.41756177, 0.3301338), (0.22768107, 0.22967243, 0.21931246))
train_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean_, std_),
    # Data Augmentatio -> Only Train Data
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5)
])

test_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean_, std_)
])

 

Dataset

train_data = datasets.ImageFolder(train_dir, transform=train_transform)
test_data = datasets.ImageFolder(test_dir, transform=test_transform)

 

DataLoader

import os

BATCH_SIZE = 32
NUM_WORKERS=os.cpu_count()			# CPU Core

torch.manual_seed(42)

train_dataloader = DataLoader(
    train_data,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_worker=NUM_WORKERS
)

test_dataloader = DataLoader(
    test_data,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_worker=NUM_WORKERS
)

 

CNN Model

class CNNModel(nn.Module):
    def __init__(self, input_shape:int, hidden_units:int, output_shape:int):
        super().__init__()
        self.block_1 = nn.Sequential(
            nn.Conv2d(
                in_channels=input_shape,
                out_channels=hidden_units,
                kernel_size=3,
                stride=1,
                padding=1
            ),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=hidden_units,
                out_channels=hidden_units,
                kernel_size=3,
                stride=1,
                padding=1
            ),
            nn.ReLU(),
            nn.MaxPool2d(
                kernel_size=2,
                stride=2
            )
        )
        self.block_2 = nn.Sequential(
            nn.Conv2d(hidden_units, hidden_units*2, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(hidden_units*2, hidden_units*2, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(
                in_features=hidden_units*2*16*16,
                out_features=hidden_units*2*16*16
            ),
            nn.ReLU(),
            nn.Linear(
                in_features=hidden_units*2*16*16,
                out_features=output_shape
            )
        )
    
    def forward(self, x:torch.Tensor):
        x = self.block_1(x)
        x = self.block_2(x)
        x = self.classifier(x)
        return x

cnn_model = CNNModel(
    input_shape=3,
    hidden_units=10,
    output_shape=len(train_data.classes)
).to(device)

 

Training

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(
    params=cnn_model.parameters(),
    lr=0.1
)

reset_seeds()

epochs = 10
epoch_count = []
train_loss_values = []
test_loss_values = []
for epoch in tqdm(range(epochs), desc='Epoch', position=0):
    epoch_count.append(epoch)
    train_loss, train_acc = train_step(
        data_loader=train_dataloader,
        model=cnn_model,
        loss_fn=loss_fn,
        optimizer=optimizer,
        accuracy_fn=accuracy_fn,
        device=device
    )
    train_loss_values.append(train_loss.detach().numpy())
    
    test_loss, test_acc = test_step(
        data_loader=test_dataloader,
        model=cnn_model,
        loss_fn=loss_fn
        accuracy_fn=accuracy_fn,
        device=device
    )
    test_loss_values.append(test_loss.detach().numpy())
plt.plot(epoch_count, train_loss_values, label='Train Loss')
plt.plot(epoch_count, test_loss_values, label='Test Loss')

plt.title('Training and Test Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

 

Evaluate

reset_seeds()

cnn_model_results = eval_model(
    model=cnn_model,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn
)

cnn_model_results
#{'model_name': 'CNNModel',
# 'model_loss': 1.1106144189834595,
# 'model_acc': 26.041666666666668}

 

 

 

CNN - VGG & ResNet

 

TinyVGG

class TinyVGG(nn.Module):
    def __init__(self, input_shape:int, hidden_units:int, output_shape:int) -> None:
        super().__init__()
        
        self.conv_block_1 = nn.Sequential(
            nn.Conv2d(
                in_channels=input_shape,
                out_channels=hidden_units,
                kernel_size=3,
                stride=1,
                padding=1
            ),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=hidden_units,
                out_channels=hidden_units,
                kernel_size=3,
                stride=1,
                padding=1
            ),
            nn.ReLU(),
            nn.MaxPool2d(
                kernel_size=2,
                stride=2
            )
        )
        self.conv_block_2 = nn.Sequential(
            nn.Conv2d(hidden_units, hidden_units*2, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(hidden_units*2, hidden_units*2, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(
                in_features=hidden_units*2*16*16,
                out_features=hidden_units*2*16*16
            )
            nn.ReLU(),
            nn.Linear(
                in_features=hidden_units*2*16*16,
                out_features=output_shpe
            )
        
    def forward(self, x:torch.Tensor):
        x = self.conv_block_1(x)
        x = self.conv_block_2(x)
        x = self.classifier(x)
        return x

torch.manual_seed(42)

vgg_model = TinyVGG(
    input_shape=3,
    hidden_units=10,
    output_shape=len(train_dataset.classes)
).to(device)

torchinfo.summary(vgg_model, (32, 3, 64, 64), col_names=['kernel_size', 'input_size', 'output_size', 'num_params'])
# ============================================================================================================================================
# Layer (type:depth-idx)                   Kernel Shape              Input Shape               Output Shape              Param #
# ============================================================================================================================================
# TinyVGG                                  --                        [32, 3, 64, 64]           [32, 3]                   --
# ├─Sequential: 1-1                        --                        [32, 3, 64, 64]           [32, 10, 32, 32]          --
# │    └─Conv2d: 2-1                       [3, 3]                    [32, 3, 64, 64]           [32, 10, 64, 64]          280
# │    └─ReLU: 2-2                         --                        [32, 10, 64, 64]          [32, 10, 64, 64]          --
# │    └─Conv2d: 2-3                       [3, 3]                    [32, 10, 64, 64]          [32, 10, 64, 64]          910
# │    └─ReLU: 2-4                         --                        [32, 10, 64, 64]          [32, 10, 64, 64]          --
# │    └─MaxPool2d: 2-5                    2                         [32, 10, 64, 64]          [32, 10, 32, 32]          --
# ├─Sequential: 1-2                        --                        [32, 10, 32, 32]          [32, 20, 16, 16]          --
# │    └─Conv2d: 2-6                       [3, 3]                    [32, 10, 32, 32]          [32, 20, 32, 32]          1,820
# │    └─ReLU: 2-7                         --                        [32, 20, 32, 32]          [32, 20, 32, 32]          --
# │    └─Conv2d: 2-8                       [3, 3]                    [32, 20, 32, 32]          [32, 20, 32, 32]          3,620
# │    └─ReLU: 2-9                         --                        [32, 20, 32, 32]          [32, 20, 32, 32]          --
# │    └─MaxPool2d: 2-10                   2                         [32, 20, 32, 32]          [32, 20, 16, 16]          --
# ├─Sequential: 1-3                        --                        [32, 20, 16, 16]          [32, 3]                   --
# │    └─Flatten: 2-11                     --                        [32, 20, 16, 16]          [32, 5120]                --
# │    └─Linear: 2-12                      --                        [32, 5120]                [32, 5120]                26,219,520
# │    └─ReLU: 2-13                        --                        [32, 5120]                [32, 5120]                --
# │    └─Linear: 2-14                      --                        [32, 5120]                [32, 3]                   15,363
# ============================================================================================================================================
# Total params: 26,241,513
# Trainable params: 26,241,513
# Non-trainable params: 0
# Total mult-adds (Units.GIGABYTES): 1.17
# ============================================================================================================================================
# Input size (MB): 1.57
# Forward/backward pass size (MB): 32.77
# Params size (MB): 104.97
# Estimated Total Size (MB): 139.31
# ============================================================================================================================================

 

ResNet

# Basic Block
import torch.nn.functional as F

class BasicBlock(nn.Module):
    espansion = 1
    def __init__(self, in_planes, planes, stride=1):
        super().__init__()
        
        self.conv1 = nn.Conv2d(
            in_planes,
            planes,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias=False
        )
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(
            planes,
            planes,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=False
        )
        self.bn2 = nn.BatchNorm2d(planes)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Seqential(
                nn.Conv2d(
                    in_planes,
                    self.expansion*planes,
                    kernel_size=1,
                    stride=stride,
                    bias=False
                ),
                nn.BatchNorm2d(self.expansion*planes)
            )
    
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out
# ResNet
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        
        self.in_planes = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(2048*block.expansion, num_classes)
    
    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layer = []
        
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        
        return nn.Sequential(*layers)
    
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def ResNet18(num_classes):
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes)

torch.manual_seed(42)

resnet_model = ResNet18(len(train_dataset.classes)).to(device)

 

 

Fine Tuning

 

Fine Tuning

  • Pre-Training(사전 학습)
    • dataset을 기반으로 충분히 학습을 완료한 모델
    • 사용
      • PyTorch Pre-trained Model
      • HuggingFace Hub
      • timm
      • Paperswithcode
  • Transfer Learning(전이 학습)
    • 이미 학습한 모델(pre-trained model)을 이용하여 새로운 dataset(features, target)으로 다시 학습
  • Fine Tuning(미세 조정)
    • pre-trained model의 일부 layer를 수정하여 새로운 dataset(features, target)으로 다시 학습
    • 전략
      • dataset이 크고, 유사성이 작음
        • 모델 전체 학습
      • dataset이 크고, 유사성이 큼
        • pre-trained model의 일부분과 classifier 학습
      • dataset이 작고, 유사성이 작음
        • pre-trained model의 일부분과 classifier 학습
      • dataset이 작고, 유사성이 큼
        • classifier만 학습

 

Setup

# Global Variables
from google.colab import drive
drive.mount('/content/data')
import easydict
args = easydict.EasyDict()

args.default_path = ""
args.train_dir = args.default_path+ "data/pizza_steak_sushi/train"
args.test_dir = args.default_path+ "data/pizza_steak_sushi/test"
# install
try:
    import torch
    import torchvision
except:
    !pip3 install -U torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
    import torch
    import torchvision

from torch import nn
from torchvision import transforms

try:
    from torchinfo import summary
except:
    !pip install -q torchinfo

    from torchinfo import summary

try:
    from torchmetrics import ConfusionMatrix
    from mlxtend.plotting import plot_confusion_matrix
except:
    !pip install -q torchmetrics -U mlxtend

    from torchmetrics import ConfusionMatrix
    from mlxtend.plotting import plot_confusion_matrix
# Import
from tqdm.auto import tqdm
import sys

sys.path.append(args.default_path)		# sys에 등록된 path에 해당 default_path 추가
from service import data_setup, engine		# default_path 경로에 service 폴더

import matplotlib.pyplot as plt

import random
from PIL import Image
from pathlib import Path
# Function
def plot_loss_curves(results):
    loss = results['train_loss']
    test_loss = results['test_loss']
    
    accuracy = results['train_acc']
    test_accuracy = results['test_acc']
    
    epochs = range(len(results['train_loss']))
    
    plt.figure(figsize=(15, 7))
    
    plt.subplot(1, 2, 1)
    plt.plot(epochs, accuracy, label='train_accuracy')
    plt.plot(epochs, test_accuracy, label='test_accuracy')
    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.legend()
from typing import List, Tuple
from PIL import Image

def pred_and_plot_image(model:torch.nn.Module, image_path:str, class_names:List[str],
                        transform:torchvision.transforms, image_size:Tuple[int,int]=(224,224),
                        device:torch.device='cpu'):
    img = Image.open(image_path)					# (color, row, column)
    
    if transform is not None:
        image_transform = transform
    else:
        image_transform = transforms.Compose([
            transforms.Resize(image_size),
            transforms.ToTensor()
        ])
    
    model.to(device)
    model.eval()
    
    with torch.inference_mode():
        transformed_image = image_transform(img).unsqueeze(dim=0)	# (batch, color, row, column)
        target_image_pred = model(transformed_image.to(device))		# (batch, target_size)
    
    target_image_pred_probs = torch.softmax(target_image_pred, dim=1)
    target_image_pred_label = torch.argmax(target_image_pred_probs, dim=1)
    
    plt.figure()
    plt.imshow(img)
    plt.title(f'Pred: {class_names[target_image_pred_label]] | Prob: {target_image_pred_probs.max():.3f}')
    plt.axis(False)
def plot_heatmap_of_confusion_matrix(model, dataloader):
    y_preds = []
    test_targets = []
    
    model.to(device)
    model.eval()
    
    with torch.inference_mode():
        for X, y in tqdm(dataloader, desc='Making Predictions', leave=False):
            test_targets.append(y)
            X, y = X.to(device), y.to(device)
            
            y_logit = model(X)
            y_pred = torch.softmax(y_logit, dim=1).argmax(dim=1)
            y_preds.append(y_pred.cpu())
        
    y_preds = torch.cat(y_preds)
    test_targets = torch.cat(test_targets)
    
    confmat = ConfusionMatrix(num_classes=len(class_names), task='multiclass')
    confmat_tensor = confmat(preds=y_preds, target=torch.tensor(test_targets))
    
    fig, ax = plot_confusion_matrix(
        conf_mat=confmat_tensor.numpy(),
        class_names=class_names,
        figsize=(10, 7)
    )
# device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

 

EfficientNet

  • 한정된 자원으로 최대의 효율을 내기 위한 방법으로 Model Scaling(depth, width, resolution)의 크기를 조절한다.
# Pre-training - weights
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT

weights
# EfficientNet_B0_Weights.IMAGENET1K_V1
# Pre-training - transforms
auto_transforms = weights.transforms()

auto_transforms
# ImageClassification(
#     crop_size=[224]
#     resize_size=[256]
#     mean=[0.485, 0.456, 0.406]
#     std=[0.229, 0.224, 0.225]
#     interpolation=InterpolationMode.BICUBIC
# )
# Pre-training - Dataset & DataLoader
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=args.train_dir,
    test_dir=args.test_dir,
    transform=auto_transforms,
    batch_size=32
)

train_dataloader, test_dataloader, class_names
# (<torch.utils.data.dataloader.DataLoader at 0x7c0f0bee03d0>,
#  <torch.utils.data.dataloader.DataLoader at 0x7c0f0a376710>,
#  ['pizza', 'steak', 'sushi'])
# Pre-training - model
from torchvision.model_api import WeightsEnum
from torch.hub import load_state_dict_from_url

def get_state_dict(self, *args, **kwargs):
    kwargs.pop('check_hash')
    return load_state_dict_from_url(self.url, *args, **kwargs)

WeightsEnum.get_state_dict = get_state_dict

efficientnet_model = torchvision.models.efficientnet_b0(weights=weights).to(device)
# EfficientNet(
#   (features): Sequential(
#     (0): Conv2dNormActivation(
#       (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
#       (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
#       (2): SiLU(inplace=True)
#     )
# ...
#   (classifier): Sequential(
#     (0): Dropout(p=0.2, inplace=True)
#     (1): Linear(in_features=1280, out_features=1000, bias=True)
#   )
# )
# Pre-training - summary
summary(
    model=efficientnet_model,
    input_size=(32, 3, 224, 224),
    col_names=['input_size', 'output_size', 'num_params', 'trainable'],
    col_width=20,
    row_settings=['var_names']
)
# ============================================================================================================================================
# Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
# ============================================================================================================================================
# EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 1000]           --                   True
# ├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1280, 7, 7]     --                   True
# │    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   True
# │    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   864                  True
# │    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   64                   True
# │    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
# ...
# ├─Sequential (classifier)                                    [32, 1280]           [32, 1000]           --                   True
# │    └─Dropout (0)                                           [32, 1280]           [32, 1280]           --                   --
# │    └─Linear (1)                                            [32, 1280]           [32, 1000]           1,281,000            True
# ============================================================================================================================================
# Total params: 5,288,548
# Trainable params: 5,288,548
# Non-trainable params: 0
# Total mult-adds (Units.GIGABYTES): 12.35
# ============================================================================================================================================
# Input size (MB): 19.27
# Forward/backward pass size (MB): 3452.35
# Params size (MB): 21.15
# Estimated Total Size (MB): 3492.77
# ============================================================================================================================================

 

Fine Tuning

# output layer
for param in efficientnet_model.features.parameters():
    param.requires_grad = False

summary(
    model=efficientnet_model,
    input_size=(32, 3, 224, 224),
    col_names=['input_size', 'output_size', 'num_params', 'trainable'],
    col_width=20,
    row_settings=['var_names']
)
# ============================================================================================================================================
# Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
# ============================================================================================================================================
# EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 1000]           --                   False
# ├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1280, 7, 7]     --                   False
# │    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   False
# │    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   864                  False
# │    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   64                   False
# │    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
# ...
# ├─Sequential (classifier)                                    [32, 1280]           [32, 1000]           --                   True
# │    └─Dropout (0)                                           [32, 1280]           [32, 1280]           --                   --
# │    └─Linear (1)                                            [32, 1280]           [32, 1000]           1,281,000            True
# ============================================================================================================================================
# Total params: 5,288,548
# Trainable params: 1,281,000
# Non-trainable params: 4,007,548
# Total mult-adds (Units.GIGABYTES): 12.35
# ============================================================================================================================================
# Input size (MB): 19.27
# Forward/backward pass size (MB): 3452.35
# Params size (MB): 21.15
# Estimated Total Size (MB): 3492.77
# ============================================================================================================================================
torch.manual_seed(42)
torch.cuda.manual_seed(42)

output_shape = len(class_names)

efficientnet_model.classifier = torch.nn.Sequential(
    torch.nn.Dropout(p=0.2, inplace=True),
    torch.nn.Linear(
        in_features=1280,
        out_features=output_shape,
        bias=True
    )
).to(device)

summary(
    model=efficientnet_model,
    input_size=(32, 3, 224, 224),
    col_names=['input_size', 'output_size', 'num_params', 'trainable'],
    col_width=20,
    row_settings=['var_names']
)
# ============================================================================================================================================
# Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
# ============================================================================================================================================
# EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 3]              --                   Partial
# ├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1280, 7, 7]     --                   False
# │    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   False
# │    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   (864)                False
# │    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   (64)                 False
# │    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
# ...
# ├─Sequential (classifier)                                    [32, 1280]           [32, 3]              --                   True
# │    └─Dropout (0)                                           [32, 1280]           [32, 1280]           --                   --
# │    └─Linear (1)                                            [32, 1280]           [32, 3]              3,843                True
# ============================================================================================================================================
# Total params: 4,011,391
# Trainable params: 3,843
# Non-trainable params: 4,007,548
# Total mult-adds (G): 12.31
# ============================================================================================================================================
# Input size (MB): 19.27
# Forward/backward pass size (MB): 3452.09
# Params size (MB): 16.05
# Estimated Total Size (MB): 3487.41
# ============================================================================================================================================
# Training
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(efficientnet_model.parameters(), lr=0.001)

torch.manual_seed(42)
torch.cuda.manual_seed(42)

results = engine.train(
    model=efficientnet_model,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    optimizer=optimizer,
    loss_fn=loss_fn,
    epochs=10,
    device=device
)
# Plotting Loss Curves
plot_loss_curves(results)

Underfitting

# Confusion Matrix
plot_heatmap_of_confusion_matrix(efficientnet_model, test_dataloader)

 

MobileNet

  • 다른 기존 모델보다 layer는 많지만 전체 연산량을 줄여 고성능이 아닌 환경에서도 작동할 수 있는 알고리즘
  • Depth-wise Separable Convolution 연산량
    • $D_K × D_K × M × D_F × D_F + M × N × D_F × D_F$

  • 알고리즘 효율