본문 바로가기

SK네트웍스 Family AI캠프 10기/Daily 회고

33일차. Deep Learning - Vision(Fine Tuning)

더보기

 

33일 차 회고

 

 오늘 수업도 따라가기 조금 힘들었던 것 같다. GPU가 제한이 걸려서 제대로 돌리지 못한 탓도 있는 것 같다. 그리고 단위 프로젝트는 일단 다음주 화요일까지 각자 모델을 완성하고 화면도 구현해보기로 했다.

.

 

 

 

1. Deep Learning - Vision

 

 

Fine Tuning

 

Data Augmentation - GPU(Google Colab - T4)

# Import Module
!pip install torchinfo

import os
import random

import numpy as np
import torch
from torch import nn

from torch.utils.data import DataLoader

import torchvision
from torchvision import datasets, transforms

from torchinfo import summary
def reset_seeds(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
# Global Variables
import easydict

args = easydict.EasyDict()
args.device='cuda' if torch.cuda.is_available() else 'cpu'
args.NUM_EPOCHS = 30
args.NUM_TRIALS = 5
args.BATCH_SIZE = 32
args.best_fine_tuning_model = 'best_fine_tuning_model.pt'
# Load Data
import requests
import zipfile
from pathlib import Path

data_path = Path('data/')
image_path = data_path / 'pizza_steak_sushi'

if image_path.is_dir():
    print(f"{image_path} directory exists.')
else:
    print(f"Did not find {image_path} directory, creating one...')
    image_path.mkdir(parents=True, exist_ok=True)
    
    with open(data_path / 'pizza_steak_sushi.zip', 'wb') as f:
        request = requests.get('https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip')
        print('Downloading pizza, steak, sushi data...')
        f.write(request.content)
    
    with zipfile.ZipFile(data_path / 'pizza_steak_sushi.zip', 'r') as zip_ref:
        print('Unzipping pizza, steak, sushi data...')
        zip_ref.extractall(image_path)

# Did not find data/pizza_steak_sushi directory, creating one...
# Downloading pizza, steak, sushi data...
# Unzipping pizza, steak, sushi data...

train_dir = image_path / 'train'
test_dir = image_path / 'test'
# EDA - PIL
import random
from PIL import Image

reset_seeds()

image_path_list = list(image_path.glob('*/*/*.jpg'))		# glob: Path 타입에서만 사용 가능

random_image_path = random.choice(image_path_list)

image_class = random_image_path.parent.stem

img = Image.open(random_image_path)

print(f"Random image path: {random_image_path}")
print(f"Image class: {image_class}")
print(f"Image height: {img.height}")
print(f"Image width: {img.width}")
# Random image path: data/pizza_steak_sushi/train/sushi/2980779.jpg
# Image class: sushi
# Image height: 512
# Image width: 512

img

# EDA - Matplotlib
import numpy as np
import matplitlib.pyplot as plt

img_as_array = np.asarray(img)

plt.figure(figsize=(10, 7))
plt.imshow(img_As_array)
plt.title(f"Image class: {image_class} | Image shape: {img_as_array.shape} -> [height, width, color_channels]")
plt.axis(False)

# Data Augmentation
from PIL import ImageEnhance

train_dir_list = list(train_dir.glob('*/*.jpg'))
len(train_dir_list)
# 225

for im_path in train_dir_list:
    _im = Image.open(im_path)
    _im_name = str(im_path).split('.')[0]
    
    _im_rotate(180).save(_im_name+'_rotate.jpg')
    _im.transpose(Image.FLIP_LEFT_RIGHT).save(_im_name+'_transpose.jpg')

train_dir_list = list(train_dir.glob('*/*.jpg'))
len(train_dir_list)
# 675

for im_path in train_dir_list:
    _im = Image.open(im_path)
    _im_name = str(im_path).split('.')[0]
    
    enhancer = ImageEnhance.Sharpness(_im)
    enhancer.enhance(10.0).save(_im_name+'_sharpness.jpg')
    enhancer = ImageEnhance.Contrast(_im)
    enhancer.enhance(2).save(_im_name+'_contrast.jpg')

train_dir_list = list(train_dir.glob('*/*.jpg'))
len(train_dir_list)
# 2025
# Dataset - Data Augmentation
weights = torchvision.models.MobileNet_V3_Small_Weights.DEFAULT
weights
# MobileNet_V3_Small_Weights.IMAGENET1K_V1

auto_transforms = weights.transforms()
auto_transforms
# ImageClassification(
#     crop_size=[224]
#     resize_size=[256]
#     mean=[0.485, 0.456, 0.406]
#     std=[0.229, 0.224, 0.225]
#     interpolation=InterpolationMode.BILINEAR
# )
# Dataset - Class
from typing import Tuple, Dict, List

def find_classes(directory):
    classes = sorted(entry.name for entry in os.scandir(directory) if entry.is_dir())
    
    if not classes:
        raise FileNotFoundError(f"Couldn't find any classes in {directory}.")
    
    class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
    
    return classes, class_to_idx

classes, class_to_idx = find_classes(train_dir)
import pathlib
from torch.utils.data import Dataset

class ImageFolder Custom(Dataset):
    def __init__(self, targ_dir:str, transform=auto_transforms) -> None:
        self.paths = list(pathlib.Path(targ_dir).glob('*/*.jpg'))
        self.transform = transform
        self.classes, self.class_to_idx = find_classes(targ_dir)
    
    def load_image(self, index:int) -> Image.Image:
        Image_path = self.paths[index]
        return Image.open(image_path)
    
    def __len__(self) -> int:
        return len(self.paths)
    
    def __getitem(self, index:int) -> Tuple[torch.Tensor, int]:
        img = self.load_image(index)
        class_name = self.paths[index].parent.name
        class_idx = self.class_to_idx[class_name]
        
        if self.transform:
            return self.transform(img), class_idx
        else:
            return img, class_idx
reset_seeds()

train_dataset = ImageFolderCustom(
    targ_dir=train_dir,
    transform=auto_transforms
)

test_dataset = ImageFolderCustom(
    targ_dir=test_dir,
    transform=auto_transforms
)
# DataLoader
reset_seeds()

train_dataloader = DataLoader(
    train_dataset,
    batch_size=args.BATCH_SIZE,
    shuffle=True
)

test_dataloader = DataLoader(
    test_dataset,
    batch_size=args.BATCH_SIZE,
    shuffle=True
)
# Model - Pretrained Weight
reset_seeds()

model = torchvision.models.mobilenet_v3_small(weights=weights).to(args.device)

summary(
    model=model,
    input_size=(32, 3, 224, 224),
    col_names=['input_size', 'output_size', 'num_params', 'trainable'],
    col_width=20,
    row_settings=['var_names']
)
# ============================================================================================================================================
# Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
# ============================================================================================================================================
# MobileNetV3 (MobileNetV3)                                    [32, 3, 224, 224]    [32, 1000]           --                   True
# ├─Sequential (features)                                      [32, 3, 224, 224]    [32, 576, 7, 7]      --                   True
# │    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 16, 112, 112]   --                   True
# │    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 16, 112, 112]   432                  True
# │    │    └─BatchNorm2d (1)                                  [32, 16, 112, 112]   [32, 16, 112, 112]   32                   True
# │    │    └─Hardswish (2)                                    [32, 16, 112, 112]   [32, 16, 112, 112]   --                   --
# ...
# ├─Sequential (classifier)                                    [32, 576]            [32, 1000]           --                   True
# │    └─Linear (0)                                            [32, 576]            [32, 1024]           590,848              True
# │    └─Hardswish (1)                                         [32, 1024]           [32, 1024]           --                   --
# │    └─Dropout (2)                                           [32, 1024]           [32, 1024]           --                   --
# │    └─Linear (3)                                            [32, 1024]           [32, 1000]           1,025,000            True
# ...
# Model - Fine Tuning
for param in model.features.parameters():
    param.requires_grad = False

reset_seeds()

output_shape = len(train_dataset.classes)

model.classifier = torch.nn.Sequential(
    torch.nn.Linear(
        in_features=576,
        out_features=1024,
        bias=True
    ).to(args.device),
    torch.nn.Dropout(p=0.2, inplace=True),
    torch.nn.Linear(
        in_features=1024,
        out_features=output_shape,
        bias=True
    )
).to(args.device)

summary(
    model=model,
    input_size=(32, 3, 224, 224),
    col_names=['input_size', 'output_size', 'num_params', 'trainable'],
    col_width=20,
    row_settings=['var_names']
)
# ============================================================================================================================================
# Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
# ============================================================================================================================================
# MobileNetV3 (MobileNetV3)                                    [32, 3, 224, 224]    [32, 3]              --                   Partial
# ├─Sequential (features)                                      [32, 3, 224, 224]    [32, 576, 7, 7]      --                   False
# │    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 16, 112, 112]   --                   False
# │    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 16, 112, 112]   (432)                False
# │    │    └─BatchNorm2d (1)                                  [32, 16, 112, 112]   [32, 16, 112, 112]   (32)                 False
# ...
# ├─Sequential (classifier)                                    [32, 576]            [32, 3]              --                   True
# │    └─Linear (0)                                            [32, 576]            [32, 1024]           590,848              True
# │    └─Dropout (1)                                           [32, 1024]           [32, 1024]           --                   --
# │    └─Linear (2)                                            [32, 1024]           [32, 3]              3,075                True
# ...
# Learning - Engine(Train Step)
from tqdm.auto import tqdm

def train_step(model:nn.Module, dataloader:torch.utils.data.DataLoader,
               loss_fn:nn.Module, optimizer:torch.optim.Optimizer):
    model.train()
    
    train_loss, train_accuracy = 0, 0
    
    for feature, target in tqdm(dataloader, desc='Train Step', leave=False):
        feature, target = feature.to(args.device), target.to(args.device)
        
        pred = model(feature)
        
        loss = loss_fn(pred, target)
        train_loss += loss
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_accuracy += (pred_class == target).sum().item() / len(pred)
    
    train_loss /= len(dataloader)
    train_accuracy /= len(dataloader)
    
    return train_loss, train_accuracy
# Learning - Engine(Test Step)
def test_step(model:nn.Module, dataloader:DataLoader, loss_fn:nn.Module):
    model.eval()
    
    test_loss, test_accuracy = 0, 0
    
    with torch.inference_mode():
        for feature, target in tqdm(dataloader, desc='Test Step', leave=False):
            feature, target = feature.to(device), target.to(device)
            
            pred = model(feature)
            
            loss = loss_fn(pred, target)
            test_loss += loss.item()
            
            pred_labels = pred.argmax(dim=1)
            test_accuracy ((pred_labels == y).sum().item() / len(pred_labels))
        
    test_loss /= len(dataloader)
    test_accuracy /= len(dataloader)
    
    return test_loss, test_accuracy
# Learning - Engine(Early Stopping)
class EarlyStopper(object):
    def __init__(self, num_trials, save_path):
        self.num_trials = num_trials
        self.trial_counter = 0
        self.best_loss = np.inf
        self.save_path = save_path
    
    def is_continuable(self, model, loss):
        if loss < self.best_loss:
            self.best_loss = loss
            self.trial_counter = 0
            torch.save(model, self.save_path)
            return True
        elif self.trial_counter + 1 < self.num_trials:
            self.trial_counter += 1
            return True
        else:
            return False
    
    def get_best_model(self, device=args.device):
        return torch.load(self.save_path).to(device)
# Learning - Training
def main(model:torch.nn.Module,
         train_dataloader:torch.utils.data.DataLoader,
         test_dataloader:torch.utils.data.DataLoader,
         optimizer:torch.optim.Optimizer,
         early_stopper,
         loss_fn:torch.nn.Module=nn.CrossEntropyLoss(),
         epochs:int=10):
    results = {
        'train_loss': [],
        'train_accuracy': [],
        'test_loss': [],
        'test_accuracy': []
    }
    
    for epoch in tqdm(range(epochs), desc='Training'):
        train_loss, train_accuracy = train_step(model=model,
                                                dataloader=train_dataloader,
                                                loss_fn=loss_fn,
                                                optimizer=optimizer)
        test_loss, test_accuracy = test_step(model=model,
                                             dataloader=test_dataloader,
                                             loss_fn=loss_fn)
        
        print(
            f'Epoch: {epoch+1} | '
            f'train_loss: {train_loss:.4f} | '
            f'train_accuracy: {train_accuracy:.4f} | '
            f'test_loss: {test_loss:.4f} | '
            f'test_accuracy: {test_accuracy:.4f}'
        )
        
        results['train_loss'].append(train_loss)
        results['train_accuracy'].append(train_accuracy)
        results['test_loss'].append(test_loss)
        results['test_accuracy'].append(test_accuracy)
        
        if not early_stopper.is_continuable(model, test_loss):
            print(f'validation: best loss: {early_stopper.best_loss}')
            break
    
    return results
reset_seeds()

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)

early_stopper = EarlyStopper(num_trials=args.NUM_TRIALS, save_path=args.best_fine_tuning_model)

fine_tuning_result = main(model=model,
                          train_dataloader=train_dataloader,
                          test_dataloader=test_dataloader,
                          optimizer=optimizer,
                          early_stopper=early_stopper,
                          loss_fn=loss_fn,
                          epochs=args.NUM_EPOCHS)
# Learning - Training(Loss Graph)
def plot_loss_curves(results):
    loss = results['train_loss']
    test_loss = results['test_loss']

    accuracy = results['train_acc']
    test_accuracy = results['test_acc']

    epochs = range(len(results['train_loss']))

    plt.figure(figsize=(15, 7))

    plt.subplot(1, 2, 1)
    plt.plot(epochs, loss, label='train_loss')
    plt.plot(epochs, test_loss, label='test_loss')
    plt.title('Loss')
    plt.xlabel('Epochs')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(epochs, accuracy, label='train_accuracy')
    plt.plot(epochs, test_accuracy, label='test_accuracy')
    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.legend()

plot_loss_curves(fine_tuning_result)

# Learning - Best Model
best_model = early_stopper.get_best_model(device=args.device)
# Learning - Best Model(Test Score)
reset_seeds()

preds = []
targets = []
test_accuracy = 0

best_model.eval()

with torch.inference_mode():
    for feature, target in test_dataloader:
        feature, target = feature.to(args.device), target.to(args.device)
        
        test_pred = best_model(feature)
        
        pred_class = torch.argmax(torch.softmax(test_pred, dim=1), dim=1)
        test_accuracy += (pred_class == target).sum().item() / len(test_pred)
        
        preds.append(pred_class.cpu())
        targets.append(target.cpu())

test_accuracy /= len(test_dataloader)
pred_tensor = torch.cat(preds)
targets_tensor = torch.cat(targets)

print(f'Best Model에 대한 정확도: {test_accuracy}')
# Best Model에 대한 정확도: 0.8257575757575758
# Learning - Best Model(Confusion Matrix)
try:
    import torchmetrics, mlxtend
    print(f"mlxtend version: {mlxtend.__version__}")
    assert int(mlxtend.__version__.split(".")[1]) >= 19, "mlxtend verison should be 0.19.0 or higher"
except:
    !pip install -q torchmetrics -U mlxtend
    import torchmetrics, mlxtend
    print(f"mlxtend version: {mlxtend.__version__}")
from torchmetrics import ConfusionMatrix
from mlxtend.plotting import plot_confusion_matrix

class_names = train_dataset.classes

reset_seeds()

confmat = ConfusionMatrix(num_classes=len(class_names), task='multiclass')
confmat_tensor = confmat(preds=y_pred_tensor, target=targets_tensor)

fig, ax = plot_confusion_matrix(
    conf_mat=confmat_tensor.numpy(),
    class_names=class_names,
    figsize=(10, 7)
)

 

Experiments

# EfficientNet - Best Model
best_efficientnet_model = early_stopper.get_best_model(device=args.device)

reset_seeds()

test_accuracy = 0

best_efficientnet_model.eval()

with torch.inference.mode():
    for feature, target in efficientnet_test_dataloader:
        feature, target = feature.to(args.device), target.to(args.device)
        
        test_pred = best_efficientnet_model(feature)
        
        pred_class = torch.argmax(torch.softmax(test_pred, dim=1), dim=1)
        test_accuracy += (pred_class == target).sum().item() / len(test_pred)

efficientnet_test_accuraacy = test_accuracy / len(efficientnet_test_dataloader)
print(f'Best Efficientnet Model에 대한 정확도: {efficientnet_test_accuracy}')
# Best Efficientnet Model에 대한 정확도: 0.8967803030303031
# MobileNet - Best Model
best_mobilenet_model = early_stopper.get_best_model(device=args.device)

reset_seeds()

test_accuracy = 0

best_mobilenet_model.eval()

with torch.inference.mode():
    for feature, target in mobilenet_test_dataloader:
        feature, target = feature.to(args.device), target.to(args.device)
        
        test_pred = best_mobilenet_model(feature)
        
        pred_class = torch.argmax(torch.softmax(test_pred, dim=1), dim=1)
        test_accuracy += (pred_class == target).sum().item() / len(test_pred)

mobilenet_test_accuraacy = test_accuracy / len(mobilenet_test_dataloader)
print(f'Best MobileNet Model에 대한 정확도: {mobilenet_test_accuraacy}')
# Best MobileNet Model에 대한 정확도: 0.8570075757575758
# VGG - Best Model
best_vgg_model = early_stopper.get_best_model(device=args.device)

reset_seeds()

test_accuracy = 0

best_vgg_model.eval()

with torch.inference.mode():
    for feature, target in vgg_test_dataloader:
        feature, target = feature.to(args.device), target.to(args.device)
        
        test_pred = best_vgg_model(feature)
        
        pred_class = torch.argmax(torch.softmax(test_pred, dim=1), dim=1)
        test_accuracy += (pred_class == target).sum().item() / len(test_pred)

vgg_test_accuraacy = test_accuracy / len(vgg_test_dataloader)
print(f'Best VGG Model에 대한 정확도: {vgg_test_accuraacy}')
# Best VGG Model에 대한 정확도: 0.9071969696969697
# Best Model 비교
import pandas as pd

df_efficientnet = pd.DataFrame(efficientnet_result)
df_mobilenet = pd.DataFrame(mobilenet_result)
df_vgg = pd.DataFrame(vgg_result)

plt.figure(figsize=(15, 10))

vgg_epochs = range(len(df_vgg))
mobilenet_epochs = range(len(df_mobilenet))
efficientnet_epochs = range(len(df_efficientnet))

plt.subplot(2, 2, 1)
plt.plot(vgg_epochs, df_vgg["train_loss"], label="VGG")
plt.plot(mobilenet_epochs, df_mobilenet["train_loss"], label="MobileNet")
plt.plot(efficientnet_epochs, df_efficientnet["train_loss"], label="EfficientNet")
plt.title("Train Loss")
plt.xlabel("Epochs")
plt.legend()

plt.subplot(2, 2, 2)
plt.plot(vgg_epochs, df_vgg["test_loss"], label="VGG")
plt.plot(mobilenet_epochs, df_mobilenet["test_loss"], label="MobileNet")
plt.plot(efficientnet_epochs, df_efficientnet["test_loss"], label="EfficientNet")
plt.title("Test Loss")
plt.xlabel("Epochs")
plt.legend()

plt.subplot(2, 2, 3)
plt.plot(vgg_epochs, df_vgg["train_acc"], label="VGG")
plt.plot(mobilenet_epochs, df_mobilenet["train_acc"], label="MobileNet")
plt.plot(efficientnet_epochs, df_efficientnet["train_acc"], label="EfficientNet")
plt.title("Train Accuracy")
plt.xlabel("Epochs")
plt.legend()

plt.subplot(2, 2, 4)
plt.plot(vgg_epochs, df_vgg["test_acc"], label="VGG")
plt.plot(mobilenet_epochs, df_mobilenet["test_acc"], label="MobileNet")
plt.plot(efficientnet_epochs, df_efficientnet["test_acc"], label="EfficientNet")
plt.title("Test Accuracy")
plt.xlabel("Epochs")
plt.legend()

 

XAI

 

import argparse
import torch

parser = argparse.ArgumentParser()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
parser.add_argument('--device', default=device)
parser.add_argument('--NUM_EPOCHS', default=30)
parser.add_argument('--NUM_TRIALS', default=5)
parser.add_argument('--BATCH_SIZE', default=32)
parser.add_argument('--best_efficientnet_model', default='best_efficientnet_model.pt')
parser.add_argument('--best_mobilenet_model', default='best_mobilenet_model.pt')
parser.add_argument('--best_vgg_model', default='best_vgg_model.pt')

args, _ = parser.parse_known_args()
print(f'args : ', args)
# args :  Namespace(device=device(type='cpu'), NUM_EPOCHS=30, NUM_TRIALS=5,
  BATCH_SIZE=32, best_efficientnet_model='best_efficientnet_model.pt',
  best_mobilenet_model='best_mobilenet_model.pt', best_vgg_model='best_vgg_model.pt')
  • SHAP(Shapley Additive exPlanations)
    • Shapley value와 feature 간 독립성을 기초로 인공지능의 예측을 설명하는 데 적용한 설명 모델
    • 모델의 각 입력 변수가 최종 예측 결과에 미친 기여도를 수치화하여, 모델이 어떻게 그리고 왜 특정 결정을 내렸는지를 이해할 수 있다.
import json

import numpy as np
import torch
import torchvision

import shap
model = torchvision.models.mobilenet_v2(pretrained=True, progress=False)
model.to(device)

model.eval()

feature, target = shap.datasets.imagenet50()
url = 'https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json'
with open(shap.datasets.cache(url)) as file:
    class_names = [v[1] for v in json.load(file).values()]
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

def nhwc_to_nchw(x: torch.Tensor) -> torch.Tensor:
    if x.dim() == 4:
        x = x if x.shape[1] == 3 else x.permute(0, 3, 1, 2)
    elif x.dim() == 3:
        x = x if x.shape[0] == 3 else x.permute(2, 0, 1)
    return x

def nchw_to_nhwc(x: torch.Tensor) -> torch.Tensor:
    if x.dim() == 4:
        x = x if x.shape[3] == 3 else x.permute(0, 2, 3, 1)
    elif x.dim() == 3:
        x = x if x.shape[2] == 3 else x.permute(1, 2, 0)
    return x


transform = [
    torchvision.transforms.Lambda(nhwc_to_nchw),
    torchvision.transforms.Lambda(lambda x: x * (1 / 255)),
    torchvision.transforms.Normalize(mean=mean, std=std),
    torchvision.transforms.Lambda(nchw_to_nhwc),
]

inv_transform = [
    torchvision.transforms.Lambda(nhwc_to_nchw),
    torchvision.transforms.Normalize(
        mean=(-1 * np.array(mean) / np.array(std)).tolist(),
        std=(1 / np.array(std)).tolist(),
    ),
    torchvision.transforms.Lambda(nchw_to_nhwc),
]

transform = torchvision.transforms.Compose(transform)
inv_transform = torchvision.transforms.Compose(inv_transform)
def predict(img:np.ndarray) -> torch.Tensor:
    img = nhwc_to_nchw(torch.Tensor(img))
    img = img.to(device)
    output = model(img)
    return output
feature_train = transform(torch.Tensor(feature))
out = predict(feature_train[1:3])
classes = torch.argmax(out, axis=1).cpu().numpy()
print(f"Classes: {classes}: {np.array(class_names)[classes]}")
# Classes: [132 814]: ['American_egret' 'speedboat']
topk = 4
batch_size = 50
n_evals = 10000

masker_blur = shap.maskers.Image("blur(128,128)", feature_train[0].shape)

explainer = shap.Explainer(predict, masker_blur, output_names=class_names)

shap_values = explainer(
    feature_train[1:2],
    max_evals=n_evals,
    batch_size=batch_size,
    outputs=shap.Explanation.argsort.flip[:topk]
)
shap_values.data = inv_transform(shap_values.data).cpu().numpy()[0]
shap_values.values = [val for val in np.moveaxis(shap_values.values[0], -1, 0)]

shap.image_plot(
    shap_values=shap_values.values,
    pixel_values=shap_values.data,
    labels=shap_values.output_names,
    true_labels=[class_names[132]],
)

  • Captum
    • 데이터 feature가 모델의 예측 또는 뉴런 활성화에 미치는 영향을 이해하고, 모델의 동작 방식을 알 수 있다.
!pip install captum

import requests
from PIL import Image
from io import BytesIO

response = requests.get('https://image.freepik.com/free-photo/two-beautiful-puppies-cat-dog_58409-6024.jpg')
img = Image.open(BytesIO(response.content))
import torchvision
from torchvision import models, transforms

center_crop = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
])

normalize = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])
input_img = normalize(center_crop(img)).unsqueeze(0)
model = torchvision.models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1).eval()
from captum.attr import Occlusion

occlusion = Occlusion(model)

strides = (3, 9, 9)
sliding_window_shapes = (3, 45, 45)
baselines = 0

target = 208
attribution_dog = occlusion.attribute(
    input_img,
    strides = strides,
    target=target,
    sliding_window_shapes=sliding_window_shapes,
    baselines=baselines
)

target = 283
attribution_cat = occlusion.attribute(
    input_img,
    strides = strides,
    target=target,
    sliding_window_shapes=sliding_window_shapes,
    baselines=0
)
import numpy as np
from captum.attr import visualization as viz

vis_types = ["heat_map", "original_image"]
vis_signs = ["all", "all"]

attribution_dog = np.transpose(attribution_dog.squeeze().cpu().detach().numpy(), (1, 2, 0))

_ = viz.visualize_image_attr_multiple(
    attribution_dog,
    np.array(center_crop(img)),
    vis_types,
    vis_signs,
    ["attribution for dog", "image"],
    show_colorbar = True
)

attribution_cat = np.transpose(attribution_cat.squeeze().cpu().detach().numpy(), (1, 2, 0))

_ = viz.visualize_image_attr_multiple(
    attribution_cat,
    np.array(center_crop(img)),
    ["heat_map", "original_image"],
    ["all", "all"],
    ["attribution for cat", "image"],
    show_colorbar = True
)