더보기
31일 차 회고.
단위 프로젝트를 진행하는데 나름대로 데이터 전처리를 실행하고 XGBoost 모델을 사용했는데도 test score이 80도 나오지 않았다. 다른 팀원들도 최대로 잘 나온 게 80 초반이라고 해서 수요일까지 전처리에 시간을 더 써서 해보기로 했다. 데이터 분석 쪽으로 취업을 희망하고 있지만 너무 어렵고 제대로 알지 못하는 것 같아서 막막하다.
1. Deep Learning - Image Preprocessing
Torchvision
Classic Image Processing Algorithm
- 형태학적 이미지 처리(Morphological Image Processing)
- 확장(Dilation): 이미지에서 객체의 경계에 픽셀 추가
- 침식(Erosion): 이미지에서 객체의 경계에 픽셀 제거
- 가우시안 평활화(Gaussian Image Processing)
- 가우시안 함수로 이미지 블러링
- 엣지 검출(Edge Detection in Image Processing)
- 이미지 내 객체의 경계 탐색
- 밝기의 불연속성 감지
Image Processing using Nerual Networks

- 컨볼루션 신경망(CNN; Convolutional Neural Network)
- Convolutional Layer
- Pooling Layer
- Fully Connected Layer
- 이미지가 로드됐을 때 변환된다.
from PIL import Image
import numpy as np
import torch
from torchvision import transforms as T
ori_img = Image.open('cat.jpg')
ori_img

def plot(ori_img, imgs:list, with_ori:bool=True, row_title:str=None, **imshow_kwargs):
if not isinstance(imgs[0], list): # isinstance(A, B) -> type(A) == type(B) 비교
imgs = [imgs]
num_rows = len(imgs)
num_cols = len(imgs[0]) + with_ori
fig, axs = plt.subplots(
nrows=num_rows,
ncols=num_cols,
squeeze=False,
figsize=(10, 15)
)
for row_idx, row in enumerate(imgs):
row = [ori_img] + row if with_ori else row
for col_idx, img in enumerate(row):
ax = axs[row_idx, col_idx]
ax.imshow(np.asarray(img), **imshow_kwargs)
ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])
if with_ori:
axs[0, 0].set(title='Original Image')
axs[0, 0].title.set_size(8)
if row_title is not None:
for row_idx in range(num_rows):
axs[row_idx, 0].set(ylabel=row_title[row_idx])
plt.tight_layout()
- Padding
T.Pad(padding=50)(ori_img)

padded_imgs = [T.Pad(padding=padding)(ori_img) for padding in (3, 10, 30, 50)]
plot(ori_img, padded_imgs)

- Resize
ori_img.size
# (1898, 1266)
resized_imgs = [T.Resize(size=size)(ori_img) for size in (30, 50, 100, ori_img.size[0])]
plot(ori_img, resized_imgs)

resized_imgs = [T.Resize(size=size)(ori_img) for size in ((30, 60), (50, 100), (100, 200))]
plot(ori_img, resized_imgs)

- CenterCrop
center_crops = [T.CenterCrop(size=size)(ori_img) for size in (100, 150, 200, ori_img.size[0])]
plot(ori_img, center_crops)

center_crops = [T.CenterCrop(size=size)(ori_img) for size in ((100, 150), (150, 200), (200, 250))]
plot(ori_img, center_crops)

- FiveCrop
(top_left, top_right, bottom_left, bottom_right, center) = T.FiveCrop(size=(500, 500))(ori_img)
plot(ori_img, [top_left, top_right, bottom_left, bottom_right, center])

- Grayscale
gray_img = T.Grayscale()(ori_img)
plot(ori_img, [gray_img], cmap='gray')

- ColorJitter
jitter = T.ColorJitter(brightness=.5, hue=.3)
jitted_imgs = [jitter(ori_img) for _ in range(4)]
plot(ori_img, jitted_imgs)

- GaussianBlur
blurrer = T.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5))
blurred_imgs = [blurrer(ori_img) for _ in range(4)]
plot(ori_img, blurred_imgs)

- RandomPerspective
perspective_transformer = T.RandomPerspective(distortion_scale=0.5, p=1.0)
perspective_imgs = [perspective_transformer(ori_img) for _ in range(4)]
plot(ori_img, perspective_imgs)

- RandomRotation
rotater = T.RandomRotation(degrees=(0, 180))
rotated_imgs = [rotater(ori_img) for _ in range(4)]
plot(ori_img, rotated_imgs)

- RandomAffine
affine_transformer = T.RandomAffine(degrees=(30, 70), translate=(0.1, 0.3), scale=(0.5, 0.75))
affine_imgs = [affine_transformer(ori_img) for _ in range(4)]
plot_ori_img, affine_imgs)

PIL
Image Class
from PIL import Image
img = Image.open('deer.jpg')
img

img.mode
# 'RGB'
logo = Image.open('logo_pillow.png')
logo

logo.mode
# 'RGBA'
logo.size
# (216, 73)
import numpy as np
np.array(logo).shape
# (73, 216, 4)
Image Manipulation
- crop
box = (100, 150, 300, 300)
cropped_image = img.crop(box)
cropped_image

- rotate
rotated_image = img.rotate(180)
rotated_image

- merge - copy & paste
img.mode, logo.mode
# 'RGB', 'RGBA'
np.array(img).shape, np.array(logo).shape
# ((471, 589, 3), (73, 216, 4))
np.array(img).shape, np.array(logo)[:, :, :3].shape
# ((471, 589, 3), (73, 216, 3))
tmp_img = img.copy()
position = (40, 350)
tmp_img.paste(logo, position)
tmp_img

- convert
img.convert('L')

Image Enhancement
from PIL import ImageEnhance
- Sharpness
enhancer = ImageEnhance.Sharpness(img)
enhancer.enhance(10.0)

- Contrast
enhancer = ImageEnhance.Contrast(img)
enhancer.enhance(2)

- ImageFilter
from PIL import ImageFilter
img.filter(ImageFilter.BLUR)

img.filter(ImageFilter.CONTOUR)

img.filter(ImageFilter.BLUR).filter(ImageFilter.FIND_EDGES)

Save Image
- 물리적으로 image를 저장할 수 있다.
save_img = img.filter(ImageFilter.BLUR).filter(ImageFilter.FIND_EDGES)
save_img.save('save_img.jpg')
2. Deep Learning - Vision
CNN
Import Module
import torch
from torch import nn
from torch.utils.data import DataLoader
import torchvision
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
!pip install torchinfo
import torchinfo
import os
import random
import numpy as np
import pandas as pd
SEED = 42
def reset_seeds(seed=42):
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
reset_seeds(SEED)
def accuracy_fn(y_true, y_pred):
correct = torch.eq(y_true, y_pred).sum().item()
accuracy = (correct / len(y_pred) * 100
return accuracy
def eval_model(model:nn.Module, device:str, dataloader:DataLoader,
loss_fn:nn.Module, accuracy_fn) -> dict:
model.to(device)
model.eval()
loss, accuracy = 0, 0
with torch.inference_mode():
for feature, target in tqdm(dataloader, desc='Eval Step'):
feature, target = feature.to(device), target.to(device)
pred = model(feature)
loss += loss_fn(pred, target)
accuracy += accuracy_fn(y_true=target, y_pred=pred.argmax(dim=1))
loss /= len(dataloader)
accuracy /= len(dataloader)
return {
'model': model.__class__.__name__,
'loss': loss.item(),
'accuracy': accuracy
}
def train_step(model:nn.Module, device:str, dataloader:DataLoader,
loss_fn:nn.Module, accuracy_fn, optimizer:torch.optim.Optimizer):
model.to(device)
model.train()
train_loss, train_accuracy = 0, 0
for feature, target in tqdm(dataloader, desc='Train Step', leave=False):
feature, target = feature.to(device), target.to(device)
pred = model(feature)
loss = loss_fn(pred, target)
train_loss += loss
train_accuracy += accuracy_fn(y_true=target, y_pred=pred.argmax(dim=1))
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss /= len(dataloader)
train_accuracy /= len(dataloader)
return train_loss, train_accuracy
def test_step(model:nn.Module, device:str, dataloader:DataLoader, loss_fn:nn.Module, accuracy_fn):
model.to(device)
model.eval()
loss, accuracy = 0, 0
with torch.inference_mode():
for feature, target in tqdm(dataloader, desc='Test Step', leave=False):
feature, target = feature.to(device), target.to(device)
pred = model(feature)
loss += loss_fn(pred, target)
accuracy += accuracy_fn(y_true=target, y_pred=pred.argmax(dim=1))
loss /= len(dataloader)
accuracy /= len(dataloader)
return loss, accuracy
Data
import easydict
args = easydict.EasyDict()
from google.colab import drive
drive.mount('/content/data')
args.data_path = ''
args.device = 'cuda' if torch.cuda.is_available() else 'cpu'
from pathlib import Path
data_path = Path(args.data_path)
train_path = data_path / 'train'
test_path = data_path / 'test'
train_imgs = list(train_path.glob('*/*.jpg'))
from torchvision import transforms as T
simple_transform = T.Compose([
T.Resize((64, 64)),
T.RandomPerspective(distortion_scale=0.6, p=1.0),
T.RandomRotation(degrees=(0, 180)),
T.ToTensor()
])
train_dataset = datasets.ImageFolder(
root=train_path,
transform=simple_transform
)
test_dataset = datasets.ImageFolder(
root=test_path,
transform=simple_transform
)
args.batch_size = 32
train_dataloader = DataLoader(
train_dataset,
batch_size=args.batch_size,
shuffle=True
)
test_dataloader = DataLoader(
test_dataset,
batch_size=args.batch_size,
shuffle=True
)
CNN Model

class CNNModel(nn.Module):
def __init__(self, input_shape, target_shape, hidden_units=32) -> None:
super().__init__()
self.block_1 = nn.Sequential(
nn.Conv2d(in_channels=input_shape,
out_channels=hidden_units,
kernel_size=3,
stride=1,
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.block_1 = nn.Sequential(
nn.Conv2d(in_channels=hidden_units,
out_channels=hidden_units*2,
kernel_size=3,
stride=1,
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(in_features=hidden_units*2*16*16,
out_features=hidden_units*2*16*16),
nn.ReLU(),
nn.Linear(in_features=hidden_units*2*16*16,
out_features=target_shape)
)
def forward(self, x): # x -> (batch(=32), color(=3), row(=64), column(=64))
out = self.block_1(x) # out -> (batch(=32), feature_map(=32), row(=32), column(=32))
out = self.block_2(out) # out -> (batch(=32), feature_map(=64), row(=16), column(=16))
return self.classifier(out) # out -> (batch(=32), target_size(=32*2*16*16))
cnn_model = CNNModel(input_shape=3, output_shape=3).to(args.device)
input_size = (32, 3, 64, 64)
torchinfo.summary(cnn_model, input_size, col_names=['kernel_size', 'input_size', 'output_size', 'num_params'])
# ============================================================================================================================================
# Layer (type:depth-idx) Kernel Shape Input Shape Output Shape Param #
# ============================================================================================================================================
# CNNModel -- [32, 3, 64, 64] [32, 3] --
# ├─Sequential: 1-1 -- [32, 3, 64, 64] [32, 32, 32, 32] --
# │ └─Conv2d: 2-1 [3, 3] [32, 3, 64, 64] [32, 32, 64, 64] 896
# │ └─ReLU: 2-2 -- [32, 32, 64, 64] [32, 32, 64, 64] --
# │ └─MaxPool2d: 2-3 2 [32, 32, 64, 64] [32, 32, 32, 32] --
# ├─Sequential: 1-2 -- [32, 32, 32, 32] [32, 64, 16, 16] --
# │ └─Conv2d: 2-4 [3, 3] [32, 32, 32, 32] [32, 64, 32, 32] 18,496
# │ └─ReLU: 2-5 -- [32, 64, 32, 32] [32, 64, 32, 32] --
# │ └─MaxPool2d: 2-6 2 [32, 64, 32, 32] [32, 64, 16, 16] --
# ├─Sequential: 1-3 -- [32, 64, 16, 16] [32, 3] --
# │ └─Flatten: 2-7 -- [32, 64, 16, 16] [32, 16384] --
# │ └─Linear: 2-8 -- [32, 16384] [32, 16384] 268,451,840
# │ └─ReLU: 2-9 -- [32, 16384] [32, 16384] --
# │ └─Linear: 2-10 -- [32, 16384] [32, 3] 49,155
# ============================================================================================================================================
# Total params: 268,520,387
# Trainable params: 268,520,387
# Non-trainable params: 0
# Total mult-adds (Units.GIGABYTES): 9.32
# ============================================================================================================================================
# Input size (MB): 1.57
# Forward/backward pass size (MB): 54.53
# Params size (MB): 1074.08
# Estimated Total Size (MB): 1130.18
# ============================================================================================================================================
Training
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(
params=cnn_model.parameters(),
lr=0.1
)
epochs = 10
train_loss_values = []
test_loss_values = []
for epoch in tqdm(range(epochs), desc='Epoch'):
train_loss, train_acc = train_step(
model=cnn_model,
device=args.device,
dataloader=train_dataloader,
loss_fn=loss_fn,
accuracy_fn=accuracy_fn,
optimizer=optimizer
)
train_loss_values.append(train_loss.detach().numpy())
test_loss, test_acc = train_step(
model=cnn_model,
device=args.device,
dataloader=test_dataloader,
loss_fn=loss_fn,
accuracy_fn=accuracy_fn
)
test_loss_values.append(test_loss.detach().numpy())
plt.plot(train_loss_values, label='Train Loss')
plt.plot(test_loss_values, label='Test Loss')
plt.title('Training and Test Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

'SK네트웍스 Family AI캠프 10기 > Daily 회고' 카테고리의 다른 글
| 33일차. Deep Learning - Vision(Fine Tuning) (0) | 2025.02.26 |
|---|---|
| 32일차. Deep Learning - Vision(CNN & Fine Tuning) (0) | 2025.02.25 |
| 30일차. PyTorch - Multiclass Classification (0) | 2025.02.21 |
| 29일차. PyTorch - Binary Classification & Multiclass Classification (0) | 2025.02.20 |
| 28일차. PyTorch - Model Layers & Regression (0) | 2025.02.19 |