더보기
33일 차 회고
오늘 수업도 따라가기 조금 힘들었던 것 같다. GPU가 제한이 걸려서 제대로 돌리지 못한 탓도 있는 것 같다. 그리고 단위 프로젝트는 일단 다음주 화요일까지 각자 모델을 완성하고 화면도 구현해보기로 했다.
.
1. Deep Learning - Vision
Fine Tuning
Data Augmentation - GPU(Google Colab - T4)
# Import Module
!pip install torchinfo
import os
import random
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
import torchvision
from torchvision import datasets, transforms
from torchinfo import summary
def reset_seeds(seed=42):
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
# Global Variables
import easydict
args = easydict.EasyDict()
args.device='cuda' if torch.cuda.is_available() else 'cpu'
args.NUM_EPOCHS = 30
args.NUM_TRIALS = 5
args.BATCH_SIZE = 32
args.best_fine_tuning_model = 'best_fine_tuning_model.pt'
# Load Data
import requests
import zipfile
from pathlib import Path
data_path = Path('data/')
image_path = data_path / 'pizza_steak_sushi'
if image_path.is_dir():
print(f"{image_path} directory exists.')
else:
print(f"Did not find {image_path} directory, creating one...')
image_path.mkdir(parents=True, exist_ok=True)
with open(data_path / 'pizza_steak_sushi.zip', 'wb') as f:
request = requests.get('https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip')
print('Downloading pizza, steak, sushi data...')
f.write(request.content)
with zipfile.ZipFile(data_path / 'pizza_steak_sushi.zip', 'r') as zip_ref:
print('Unzipping pizza, steak, sushi data...')
zip_ref.extractall(image_path)
# Did not find data/pizza_steak_sushi directory, creating one...
# Downloading pizza, steak, sushi data...
# Unzipping pizza, steak, sushi data...
train_dir = image_path / 'train'
test_dir = image_path / 'test'
# EDA - PIL
import random
from PIL import Image
reset_seeds()
image_path_list = list(image_path.glob('*/*/*.jpg')) # glob: Path 타입에서만 사용 가능
random_image_path = random.choice(image_path_list)
image_class = random_image_path.parent.stem
img = Image.open(random_image_path)
print(f"Random image path: {random_image_path}")
print(f"Image class: {image_class}")
print(f"Image height: {img.height}")
print(f"Image width: {img.width}")
# Random image path: data/pizza_steak_sushi/train/sushi/2980779.jpg
# Image class: sushi
# Image height: 512
# Image width: 512
img

# EDA - Matplotlib
import numpy as np
import matplitlib.pyplot as plt
img_as_array = np.asarray(img)
plt.figure(figsize=(10, 7))
plt.imshow(img_As_array)
plt.title(f"Image class: {image_class} | Image shape: {img_as_array.shape} -> [height, width, color_channels]")
plt.axis(False)

# Data Augmentation
from PIL import ImageEnhance
train_dir_list = list(train_dir.glob('*/*.jpg'))
len(train_dir_list)
# 225
for im_path in train_dir_list:
_im = Image.open(im_path)
_im_name = str(im_path).split('.')[0]
_im_rotate(180).save(_im_name+'_rotate.jpg')
_im.transpose(Image.FLIP_LEFT_RIGHT).save(_im_name+'_transpose.jpg')
train_dir_list = list(train_dir.glob('*/*.jpg'))
len(train_dir_list)
# 675
for im_path in train_dir_list:
_im = Image.open(im_path)
_im_name = str(im_path).split('.')[0]
enhancer = ImageEnhance.Sharpness(_im)
enhancer.enhance(10.0).save(_im_name+'_sharpness.jpg')
enhancer = ImageEnhance.Contrast(_im)
enhancer.enhance(2).save(_im_name+'_contrast.jpg')
train_dir_list = list(train_dir.glob('*/*.jpg'))
len(train_dir_list)
# 2025
# Dataset - Data Augmentation
weights = torchvision.models.MobileNet_V3_Small_Weights.DEFAULT
weights
# MobileNet_V3_Small_Weights.IMAGENET1K_V1
auto_transforms = weights.transforms()
auto_transforms
# ImageClassification(
# crop_size=[224]
# resize_size=[256]
# mean=[0.485, 0.456, 0.406]
# std=[0.229, 0.224, 0.225]
# interpolation=InterpolationMode.BILINEAR
# )
# Dataset - Class
from typing import Tuple, Dict, List
def find_classes(directory):
classes = sorted(entry.name for entry in os.scandir(directory) if entry.is_dir())
if not classes:
raise FileNotFoundError(f"Couldn't find any classes in {directory}.")
class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
return classes, class_to_idx
classes, class_to_idx = find_classes(train_dir)
import pathlib
from torch.utils.data import Dataset
class ImageFolder Custom(Dataset):
def __init__(self, targ_dir:str, transform=auto_transforms) -> None:
self.paths = list(pathlib.Path(targ_dir).glob('*/*.jpg'))
self.transform = transform
self.classes, self.class_to_idx = find_classes(targ_dir)
def load_image(self, index:int) -> Image.Image:
Image_path = self.paths[index]
return Image.open(image_path)
def __len__(self) -> int:
return len(self.paths)
def __getitem(self, index:int) -> Tuple[torch.Tensor, int]:
img = self.load_image(index)
class_name = self.paths[index].parent.name
class_idx = self.class_to_idx[class_name]
if self.transform:
return self.transform(img), class_idx
else:
return img, class_idx
reset_seeds()
train_dataset = ImageFolderCustom(
targ_dir=train_dir,
transform=auto_transforms
)
test_dataset = ImageFolderCustom(
targ_dir=test_dir,
transform=auto_transforms
)
# DataLoader
reset_seeds()
train_dataloader = DataLoader(
train_dataset,
batch_size=args.BATCH_SIZE,
shuffle=True
)
test_dataloader = DataLoader(
test_dataset,
batch_size=args.BATCH_SIZE,
shuffle=True
)
# Model - Pretrained Weight
reset_seeds()
model = torchvision.models.mobilenet_v3_small(weights=weights).to(args.device)
summary(
model=model,
input_size=(32, 3, 224, 224),
col_names=['input_size', 'output_size', 'num_params', 'trainable'],
col_width=20,
row_settings=['var_names']
)
# ============================================================================================================================================
# Layer (type (var_name)) Input Shape Output Shape Param # Trainable
# ============================================================================================================================================
# MobileNetV3 (MobileNetV3) [32, 3, 224, 224] [32, 1000] -- True
# ├─Sequential (features) [32, 3, 224, 224] [32, 576, 7, 7] -- True
# │ └─Conv2dNormActivation (0) [32, 3, 224, 224] [32, 16, 112, 112] -- True
# │ │ └─Conv2d (0) [32, 3, 224, 224] [32, 16, 112, 112] 432 True
# │ │ └─BatchNorm2d (1) [32, 16, 112, 112] [32, 16, 112, 112] 32 True
# │ │ └─Hardswish (2) [32, 16, 112, 112] [32, 16, 112, 112] -- --
# ...
# ├─Sequential (classifier) [32, 576] [32, 1000] -- True
# │ └─Linear (0) [32, 576] [32, 1024] 590,848 True
# │ └─Hardswish (1) [32, 1024] [32, 1024] -- --
# │ └─Dropout (2) [32, 1024] [32, 1024] -- --
# │ └─Linear (3) [32, 1024] [32, 1000] 1,025,000 True
# ...
# Model - Fine Tuning
for param in model.features.parameters():
param.requires_grad = False
reset_seeds()
output_shape = len(train_dataset.classes)
model.classifier = torch.nn.Sequential(
torch.nn.Linear(
in_features=576,
out_features=1024,
bias=True
).to(args.device),
torch.nn.Dropout(p=0.2, inplace=True),
torch.nn.Linear(
in_features=1024,
out_features=output_shape,
bias=True
)
).to(args.device)
summary(
model=model,
input_size=(32, 3, 224, 224),
col_names=['input_size', 'output_size', 'num_params', 'trainable'],
col_width=20,
row_settings=['var_names']
)
# ============================================================================================================================================
# Layer (type (var_name)) Input Shape Output Shape Param # Trainable
# ============================================================================================================================================
# MobileNetV3 (MobileNetV3) [32, 3, 224, 224] [32, 3] -- Partial
# ├─Sequential (features) [32, 3, 224, 224] [32, 576, 7, 7] -- False
# │ └─Conv2dNormActivation (0) [32, 3, 224, 224] [32, 16, 112, 112] -- False
# │ │ └─Conv2d (0) [32, 3, 224, 224] [32, 16, 112, 112] (432) False
# │ │ └─BatchNorm2d (1) [32, 16, 112, 112] [32, 16, 112, 112] (32) False
# ...
# ├─Sequential (classifier) [32, 576] [32, 3] -- True
# │ └─Linear (0) [32, 576] [32, 1024] 590,848 True
# │ └─Dropout (1) [32, 1024] [32, 1024] -- --
# │ └─Linear (2) [32, 1024] [32, 3] 3,075 True
# ...
# Learning - Engine(Train Step)
from tqdm.auto import tqdm
def train_step(model:nn.Module, dataloader:torch.utils.data.DataLoader,
loss_fn:nn.Module, optimizer:torch.optim.Optimizer):
model.train()
train_loss, train_accuracy = 0, 0
for feature, target in tqdm(dataloader, desc='Train Step', leave=False):
feature, target = feature.to(args.device), target.to(args.device)
pred = model(feature)
loss = loss_fn(pred, target)
train_loss += loss
optimizer.zero_grad()
loss.backward()
optimizer.step()
pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
train_accuracy += (pred_class == target).sum().item() / len(pred)
train_loss /= len(dataloader)
train_accuracy /= len(dataloader)
return train_loss, train_accuracy
# Learning - Engine(Test Step)
def test_step(model:nn.Module, dataloader:DataLoader, loss_fn:nn.Module):
model.eval()
test_loss, test_accuracy = 0, 0
with torch.inference_mode():
for feature, target in tqdm(dataloader, desc='Test Step', leave=False):
feature, target = feature.to(device), target.to(device)
pred = model(feature)
loss = loss_fn(pred, target)
test_loss += loss.item()
pred_labels = pred.argmax(dim=1)
test_accuracy ((pred_labels == y).sum().item() / len(pred_labels))
test_loss /= len(dataloader)
test_accuracy /= len(dataloader)
return test_loss, test_accuracy
# Learning - Engine(Early Stopping)
class EarlyStopper(object):
def __init__(self, num_trials, save_path):
self.num_trials = num_trials
self.trial_counter = 0
self.best_loss = np.inf
self.save_path = save_path
def is_continuable(self, model, loss):
if loss < self.best_loss:
self.best_loss = loss
self.trial_counter = 0
torch.save(model, self.save_path)
return True
elif self.trial_counter + 1 < self.num_trials:
self.trial_counter += 1
return True
else:
return False
def get_best_model(self, device=args.device):
return torch.load(self.save_path).to(device)
# Learning - Training
def main(model:torch.nn.Module,
train_dataloader:torch.utils.data.DataLoader,
test_dataloader:torch.utils.data.DataLoader,
optimizer:torch.optim.Optimizer,
early_stopper,
loss_fn:torch.nn.Module=nn.CrossEntropyLoss(),
epochs:int=10):
results = {
'train_loss': [],
'train_accuracy': [],
'test_loss': [],
'test_accuracy': []
}
for epoch in tqdm(range(epochs), desc='Training'):
train_loss, train_accuracy = train_step(model=model,
dataloader=train_dataloader,
loss_fn=loss_fn,
optimizer=optimizer)
test_loss, test_accuracy = test_step(model=model,
dataloader=test_dataloader,
loss_fn=loss_fn)
print(
f'Epoch: {epoch+1} | '
f'train_loss: {train_loss:.4f} | '
f'train_accuracy: {train_accuracy:.4f} | '
f'test_loss: {test_loss:.4f} | '
f'test_accuracy: {test_accuracy:.4f}'
)
results['train_loss'].append(train_loss)
results['train_accuracy'].append(train_accuracy)
results['test_loss'].append(test_loss)
results['test_accuracy'].append(test_accuracy)
if not early_stopper.is_continuable(model, test_loss):
print(f'validation: best loss: {early_stopper.best_loss}')
break
return results
reset_seeds()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)
early_stopper = EarlyStopper(num_trials=args.NUM_TRIALS, save_path=args.best_fine_tuning_model)
fine_tuning_result = main(model=model,
train_dataloader=train_dataloader,
test_dataloader=test_dataloader,
optimizer=optimizer,
early_stopper=early_stopper,
loss_fn=loss_fn,
epochs=args.NUM_EPOCHS)
# Learning - Training(Loss Graph)
def plot_loss_curves(results):
loss = results['train_loss']
test_loss = results['test_loss']
accuracy = results['train_acc']
test_accuracy = results['test_acc']
epochs = range(len(results['train_loss']))
plt.figure(figsize=(15, 7))
plt.subplot(1, 2, 1)
plt.plot(epochs, loss, label='train_loss')
plt.plot(epochs, test_loss, label='test_loss')
plt.title('Loss')
plt.xlabel('Epochs')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(epochs, accuracy, label='train_accuracy')
plt.plot(epochs, test_accuracy, label='test_accuracy')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.legend()
plot_loss_curves(fine_tuning_result)

# Learning - Best Model
best_model = early_stopper.get_best_model(device=args.device)
# Learning - Best Model(Test Score)
reset_seeds()
preds = []
targets = []
test_accuracy = 0
best_model.eval()
with torch.inference_mode():
for feature, target in test_dataloader:
feature, target = feature.to(args.device), target.to(args.device)
test_pred = best_model(feature)
pred_class = torch.argmax(torch.softmax(test_pred, dim=1), dim=1)
test_accuracy += (pred_class == target).sum().item() / len(test_pred)
preds.append(pred_class.cpu())
targets.append(target.cpu())
test_accuracy /= len(test_dataloader)
pred_tensor = torch.cat(preds)
targets_tensor = torch.cat(targets)
print(f'Best Model에 대한 정확도: {test_accuracy}')
# Best Model에 대한 정확도: 0.8257575757575758
# Learning - Best Model(Confusion Matrix)
try:
import torchmetrics, mlxtend
print(f"mlxtend version: {mlxtend.__version__}")
assert int(mlxtend.__version__.split(".")[1]) >= 19, "mlxtend verison should be 0.19.0 or higher"
except:
!pip install -q torchmetrics -U mlxtend
import torchmetrics, mlxtend
print(f"mlxtend version: {mlxtend.__version__}")
from torchmetrics import ConfusionMatrix
from mlxtend.plotting import plot_confusion_matrix
class_names = train_dataset.classes
reset_seeds()
confmat = ConfusionMatrix(num_classes=len(class_names), task='multiclass')
confmat_tensor = confmat(preds=y_pred_tensor, target=targets_tensor)
fig, ax = plot_confusion_matrix(
conf_mat=confmat_tensor.numpy(),
class_names=class_names,
figsize=(10, 7)
)

Experiments
# EfficientNet - Best Model
best_efficientnet_model = early_stopper.get_best_model(device=args.device)
reset_seeds()
test_accuracy = 0
best_efficientnet_model.eval()
with torch.inference.mode():
for feature, target in efficientnet_test_dataloader:
feature, target = feature.to(args.device), target.to(args.device)
test_pred = best_efficientnet_model(feature)
pred_class = torch.argmax(torch.softmax(test_pred, dim=1), dim=1)
test_accuracy += (pred_class == target).sum().item() / len(test_pred)
efficientnet_test_accuraacy = test_accuracy / len(efficientnet_test_dataloader)
print(f'Best Efficientnet Model에 대한 정확도: {efficientnet_test_accuracy}')
# Best Efficientnet Model에 대한 정확도: 0.8967803030303031
# MobileNet - Best Model
best_mobilenet_model = early_stopper.get_best_model(device=args.device)
reset_seeds()
test_accuracy = 0
best_mobilenet_model.eval()
with torch.inference.mode():
for feature, target in mobilenet_test_dataloader:
feature, target = feature.to(args.device), target.to(args.device)
test_pred = best_mobilenet_model(feature)
pred_class = torch.argmax(torch.softmax(test_pred, dim=1), dim=1)
test_accuracy += (pred_class == target).sum().item() / len(test_pred)
mobilenet_test_accuraacy = test_accuracy / len(mobilenet_test_dataloader)
print(f'Best MobileNet Model에 대한 정확도: {mobilenet_test_accuraacy}')
# Best MobileNet Model에 대한 정확도: 0.8570075757575758
# VGG - Best Model
best_vgg_model = early_stopper.get_best_model(device=args.device)
reset_seeds()
test_accuracy = 0
best_vgg_model.eval()
with torch.inference.mode():
for feature, target in vgg_test_dataloader:
feature, target = feature.to(args.device), target.to(args.device)
test_pred = best_vgg_model(feature)
pred_class = torch.argmax(torch.softmax(test_pred, dim=1), dim=1)
test_accuracy += (pred_class == target).sum().item() / len(test_pred)
vgg_test_accuraacy = test_accuracy / len(vgg_test_dataloader)
print(f'Best VGG Model에 대한 정확도: {vgg_test_accuraacy}')
# Best VGG Model에 대한 정확도: 0.9071969696969697
# Best Model 비교
import pandas as pd
df_efficientnet = pd.DataFrame(efficientnet_result)
df_mobilenet = pd.DataFrame(mobilenet_result)
df_vgg = pd.DataFrame(vgg_result)
plt.figure(figsize=(15, 10))
vgg_epochs = range(len(df_vgg))
mobilenet_epochs = range(len(df_mobilenet))
efficientnet_epochs = range(len(df_efficientnet))
plt.subplot(2, 2, 1)
plt.plot(vgg_epochs, df_vgg["train_loss"], label="VGG")
plt.plot(mobilenet_epochs, df_mobilenet["train_loss"], label="MobileNet")
plt.plot(efficientnet_epochs, df_efficientnet["train_loss"], label="EfficientNet")
plt.title("Train Loss")
plt.xlabel("Epochs")
plt.legend()
plt.subplot(2, 2, 2)
plt.plot(vgg_epochs, df_vgg["test_loss"], label="VGG")
plt.plot(mobilenet_epochs, df_mobilenet["test_loss"], label="MobileNet")
plt.plot(efficientnet_epochs, df_efficientnet["test_loss"], label="EfficientNet")
plt.title("Test Loss")
plt.xlabel("Epochs")
plt.legend()
plt.subplot(2, 2, 3)
plt.plot(vgg_epochs, df_vgg["train_acc"], label="VGG")
plt.plot(mobilenet_epochs, df_mobilenet["train_acc"], label="MobileNet")
plt.plot(efficientnet_epochs, df_efficientnet["train_acc"], label="EfficientNet")
plt.title("Train Accuracy")
plt.xlabel("Epochs")
plt.legend()
plt.subplot(2, 2, 4)
plt.plot(vgg_epochs, df_vgg["test_acc"], label="VGG")
plt.plot(mobilenet_epochs, df_mobilenet["test_acc"], label="MobileNet")
plt.plot(efficientnet_epochs, df_efficientnet["test_acc"], label="EfficientNet")
plt.title("Test Accuracy")
plt.xlabel("Epochs")
plt.legend()

XAI
import argparse
import torch
parser = argparse.ArgumentParser()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
parser.add_argument('--device', default=device)
parser.add_argument('--NUM_EPOCHS', default=30)
parser.add_argument('--NUM_TRIALS', default=5)
parser.add_argument('--BATCH_SIZE', default=32)
parser.add_argument('--best_efficientnet_model', default='best_efficientnet_model.pt')
parser.add_argument('--best_mobilenet_model', default='best_mobilenet_model.pt')
parser.add_argument('--best_vgg_model', default='best_vgg_model.pt')
args, _ = parser.parse_known_args()
print(f'args : ', args)
# args : Namespace(device=device(type='cpu'), NUM_EPOCHS=30, NUM_TRIALS=5,
BATCH_SIZE=32, best_efficientnet_model='best_efficientnet_model.pt',
best_mobilenet_model='best_mobilenet_model.pt', best_vgg_model='best_vgg_model.pt')
- SHAP(Shapley Additive exPlanations)
- Shapley value와 feature 간 독립성을 기초로 인공지능의 예측을 설명하는 데 적용한 설명 모델
- 모델의 각 입력 변수가 최종 예측 결과에 미친 기여도를 수치화하여, 모델이 어떻게 그리고 왜 특정 결정을 내렸는지를 이해할 수 있다.
import json
import numpy as np
import torch
import torchvision
import shap
model = torchvision.models.mobilenet_v2(pretrained=True, progress=False)
model.to(device)
model.eval()
feature, target = shap.datasets.imagenet50()
url = 'https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json'
with open(shap.datasets.cache(url)) as file:
class_names = [v[1] for v in json.load(file).values()]
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
def nhwc_to_nchw(x: torch.Tensor) -> torch.Tensor:
if x.dim() == 4:
x = x if x.shape[1] == 3 else x.permute(0, 3, 1, 2)
elif x.dim() == 3:
x = x if x.shape[0] == 3 else x.permute(2, 0, 1)
return x
def nchw_to_nhwc(x: torch.Tensor) -> torch.Tensor:
if x.dim() == 4:
x = x if x.shape[3] == 3 else x.permute(0, 2, 3, 1)
elif x.dim() == 3:
x = x if x.shape[2] == 3 else x.permute(1, 2, 0)
return x
transform = [
torchvision.transforms.Lambda(nhwc_to_nchw),
torchvision.transforms.Lambda(lambda x: x * (1 / 255)),
torchvision.transforms.Normalize(mean=mean, std=std),
torchvision.transforms.Lambda(nchw_to_nhwc),
]
inv_transform = [
torchvision.transforms.Lambda(nhwc_to_nchw),
torchvision.transforms.Normalize(
mean=(-1 * np.array(mean) / np.array(std)).tolist(),
std=(1 / np.array(std)).tolist(),
),
torchvision.transforms.Lambda(nchw_to_nhwc),
]
transform = torchvision.transforms.Compose(transform)
inv_transform = torchvision.transforms.Compose(inv_transform)
def predict(img:np.ndarray) -> torch.Tensor:
img = nhwc_to_nchw(torch.Tensor(img))
img = img.to(device)
output = model(img)
return output
feature_train = transform(torch.Tensor(feature))
out = predict(feature_train[1:3])
classes = torch.argmax(out, axis=1).cpu().numpy()
print(f"Classes: {classes}: {np.array(class_names)[classes]}")
# Classes: [132 814]: ['American_egret' 'speedboat']
topk = 4
batch_size = 50
n_evals = 10000
masker_blur = shap.maskers.Image("blur(128,128)", feature_train[0].shape)
explainer = shap.Explainer(predict, masker_blur, output_names=class_names)
shap_values = explainer(
feature_train[1:2],
max_evals=n_evals,
batch_size=batch_size,
outputs=shap.Explanation.argsort.flip[:topk]
)
shap_values.data = inv_transform(shap_values.data).cpu().numpy()[0]
shap_values.values = [val for val in np.moveaxis(shap_values.values[0], -1, 0)]
shap.image_plot(
shap_values=shap_values.values,
pixel_values=shap_values.data,
labels=shap_values.output_names,
true_labels=[class_names[132]],
)

- Captum
- 데이터 feature가 모델의 예측 또는 뉴런 활성화에 미치는 영향을 이해하고, 모델의 동작 방식을 알 수 있다.
!pip install captum
import requests
from PIL import Image
from io import BytesIO
response = requests.get('https://image.freepik.com/free-photo/two-beautiful-puppies-cat-dog_58409-6024.jpg')
img = Image.open(BytesIO(response.content))
import torchvision
from torchvision import models, transforms
center_crop = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
])
normalize = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
])
input_img = normalize(center_crop(img)).unsqueeze(0)
model = torchvision.models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1).eval()
from captum.attr import Occlusion
occlusion = Occlusion(model)
strides = (3, 9, 9)
sliding_window_shapes = (3, 45, 45)
baselines = 0
target = 208
attribution_dog = occlusion.attribute(
input_img,
strides = strides,
target=target,
sliding_window_shapes=sliding_window_shapes,
baselines=baselines
)
target = 283
attribution_cat = occlusion.attribute(
input_img,
strides = strides,
target=target,
sliding_window_shapes=sliding_window_shapes,
baselines=0
)
import numpy as np
from captum.attr import visualization as viz
vis_types = ["heat_map", "original_image"]
vis_signs = ["all", "all"]
attribution_dog = np.transpose(attribution_dog.squeeze().cpu().detach().numpy(), (1, 2, 0))
_ = viz.visualize_image_attr_multiple(
attribution_dog,
np.array(center_crop(img)),
vis_types,
vis_signs,
["attribution for dog", "image"],
show_colorbar = True
)
attribution_cat = np.transpose(attribution_cat.squeeze().cpu().detach().numpy(), (1, 2, 0))
_ = viz.visualize_image_attr_multiple(
attribution_cat,
np.array(center_crop(img)),
["heat_map", "original_image"],
["all", "all"],
["attribution for cat", "image"],
show_colorbar = True
)


'SK네트웍스 Family AI캠프 10기 > Daily 회고' 카테고리의 다른 글
| 35일차. Deep Learning - 추천 시스템 (0) | 2025.02.28 |
|---|---|
| 34일차. Deep Learning - Modular & TensorBoard & HPO Tuning (0) | 2025.02.27 |
| 32일차. Deep Learning - Vision(CNN & Fine Tuning) (0) | 2025.02.25 |
| 31일차. Deep Learning - Vision(Image Preprocessing & CNN) (0) | 2025.02.24 |
| 30일차. PyTorch - Multiclass Classification (0) | 2025.02.21 |