더보기
34일 차 회고.
오늘은 대부분이 그동안 배운 것들을 모듈화해 보는 수업이라 괜찮았던 것 같다. 그런데 단위 프로젝트를 하던 중에 어제까지는 PyCaret이 잘 실행됐는데 오늘 와서 다시 실행해 보니까 자꾸 오류가 떠서 답답했다. 해결을 해보려고 해도 안 돼서 그냥 내가 직접 모델을 하나씩 돌려서 비교해 보기로 했다.
1. Modular
Visual Stuido Code
가상환경 생성 및 모듈 설치
py -3.12 -m venv .venv
.\.venv\Scripts\activate
python -m pip install --upgrade pip
pip install torch torchvision torchinfo
pip install jupyter matplotlib tqdm
pip freeze > requirements.txt
# 타인의 requirements.txt 파일을 통해 가상환경을 생성할 경우
# py -3.12 -m venv .venv
# .\.venv\Scripts\activate
# python -m pip install --upgrade pip
# pip install -r requirements.txt
파일 구조
learning_modular/
├── service/
│ ├── models/
│ └── vgg_model.py
│ ├── data_setup.py # a file to prepare and download data if needed
│ ├── engine.py # a file containing various training functions
│ ├── model_builder.py # a file to create a PyTorch model
│ ├── train.py # a file to leverage all other files and train a target PyTorch model
│ └── utils.py # a file dedicated to helpful utility functions
├── models/
│ ├── LearningModular.pth
├── data/
│ └── pizza_steak_sushi/
│ ├── train/
│ │ ├── pizza/
│ │ │ ├── image01.jpeg
│ │ │ └── ...
│ │ ├── steak/
│ │ └── sushi/
│ └── test/
│ ├── pizza/
│ ├── steak/
│ └── sushi/
└── test.ipynb
Service/
data_setup.py
import os
from pathlib import Path
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
def create_dataset(
root:str='./data/pizza_steak_sushi',
train:bool=True,
transform:transforms.Compose=None
) -> Dataset:
if train:
root = Path(root) / 'train'
else:
root = Path(root) / 'test'
return datasets.ImageFolder(root=root, transform=transform)
def create_dataloader(
dataset:Dataset,
batch_size:int=32,
shuffle:bool=True
):
return DataLoader(
dataset=dataset,
batch_size=batch_size,
shuffle=shuffle
)
# test.ipynb
from torchvision import transforms
from service.data_setup import create_dataset, create_dataloader
my_transform = transforms.Compose([
transforms.Resize((64, 64)),
transforms.ToTensor()
])
train_dataset = create_dataset(train=True, transform=my_transform)
test_dataset = create_dataset(train=False, transform=my_transform)
len(train_dataset), len(test_dataset)
# (225, 75)
train_dataloader = create_dataloader(train_dataset)
test_dataloader = create_dataloader(test_dataset)
len(train_dataloader), len(test_dataloader)
# (8, 3)
features, targets = next(iter(train_dataloader))
features.shape
# torch.Size([32, 3, 64, 64])
models/vgg_model.py
import torch
from torch import nn
class VGGBlock(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size):
super().__init__()
self.block = nn.Sequential(
nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size
),
nn.ReLU(),
nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
def forward(self, x):
return self.block(x)
class ClassifierBlock(nn.Module):
def __init__(self, in_features, out_features):
super().__init__()
self.block = nn.Sequential(
nn.Flatten(),
nn.Linear(
in_features=in_features,
out_features=out_features
)
)
def forward(self, x):
return self.block(x)
class TinyVGG(nn.Module):
def __init__(self, color_size, target_size, hidden_size=10):
super().__init__()
self.vgg1 = VGGBlock(in_channels=color_size, out_channels=hidden_size)
self.vgg2 = VGGBlock(in_channels=hidden_size, out_channels=hidden_size)
self.classifier = ClassifierBlock(in_features=hidden_size*13*13, out_features=target_size)
def forward(self, x):
vgg1_out = self.vgg1(x)
vgg2_out = self.vgg2(vgg1_out)
return self.classifier(vgg2_out)
# test.ipynb
import torch
from service.models.vgg_model import VGGBlock, ClassifierBlock, TinyVGG
# Debugging
input_data = torch.rand(size=(10, 3, 64, 64))
input_data.shape
# torch.Size([10, 3, 64, 64])
vgg1 = VGGBlock(in_channels=3, out_channels=10)
vgg1_out = vgg1(input_data)
vgg1_out.shape
# torch.Size([10, 10, 30, 30])
vgg2 = VGGBlock(in_channels=10, out_channels=10)
vgg2_out = vgg1(vgg1_out)
vgg2_out.shape
# torch.Size([10, 10, 13, 13])
# Test
import torch
from service.models.vgg_model import TinyVGG
model = TinyVGG(color_size=3, target_size=3)
input_data = torch.randn(size=(64, 3, 64, 64))
input_data.shape
# torch.Size([64, 3, 64, 64])
pred = model(input_data)
pred.shape
# torch.Size([64, 3])
model_builder.py
from models.vgg_model import TinyVGG
def create_model(
color_size,
target_size,
is_trained:bool=False
):
if is_trained:
return
return TinyVGG(color_size=color_size, target_size=target_size)
# test.ipynb
from service.model_builder import create_model
my_vgg = create_model(color_size=3, target_size=3)
engine.py
import torch
from tqdm.auto import tqdm
from typing import Dict, List, Tuple
from utils import EarlyStopper
def train_step(
model: torch.nn.Module,
dataloader:torch.utils.data.DataLoader,
loss_fn: torch.nn.Module,
optimizer: torch.optim.Optimizer,
device: torch.device,
valid_fn=None
) -> Tuple[float, float]:
model.train()
train_loss, train_valid = 0, 0
for _, (X, y) in tqdm(enumerate(dataloader), desc='Train Loop', leave=False, total=len(dataloader)):
X, y = X.to(device), y.to(device)
y_pred = model(X)
loss = loss_fn(y_pred, y)
train_loss += loss.item()
if valid_fn:
train_valid += valid_fn(y_pred.argmax(dim=1).cpu().numpy(), y.cpu().numpy())
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss = train_loss / len(dataloader)
train_valid = train_valid / len(dataloader)
return train_loss, train_valid
def test_step(
model: torch.nn.Module,
dataloader: torch.utils.data.DataLoader,
loss_fn: torch.nn.Module,
device: torch.device,
valid_fn=None
) -> Tuple[float, float]:
model.eval()
test_loss, test_valid = 0, 0
with torch.inference_mode():
for _, (X, y) in tqdm(enumerate(dataloader), desc='Testing Loop', leave=False, total=len(dataloader)):
X, y = X.to(device), y.to(device)
test_pred_logits = model(X)
loss = loss_fn(test_pred_logits, y)
test_loss += loss.item()
if valid_fn:
test_valid += valid_fn(test_pred_logits.argmax(dim=1).cpu().numpy(), y.cpu().numpy())
test_loss = test_loss / len(dataloader)
test_valid = test_valid / len(dataloader)
return test_loss, test_valid
def train(
model: torch.nn.Module,
train_dataloader: torch.utils.data.DataLoader,
test_dataloader: torch.utils.data.DataLoader,
optimizer: torch.optim.Optimizer,
loss_fn: torch.nn.Module,
epochs: int,
device: torch.device,
early_stopper:EarlyStopper,
valid_fn=None
) -> Dict[str, List]:
results = {
'train_loss': [],
'train_valid': [],
'test_loss': [],
'test_valid': []
}
for epoch in tqdm(range(epochs), desc='Epoch Loop', leave=True):
train_loss, train_valid = train_step(
model=model,
dataloader=train_dataloader,
loss_fn=loss_fn,
optimizer=optimizer,
device=device,
valid_fn=valid_fn
)
test_loss, test_valid = test_step(
model=model,
dataloader=test_dataloader,
loss_fn=loss_fn,
device=device, valid_fn=valid_fn
)
print(
f'Epoch: {epoch+1} | '
f'train_loss: {train_loss:.4f} | '
f'test_loss: {test_loss:.4f} | '
)
results['train_loss'].append(train_loss)
results['test_loss'].append(test_loss)
if valid_fn:
results['train_valid'].append(train_valid)
results['test_valid'].append(test_valid)
if not early_stopper.is_continuable(model=model, loss=test_loss):
break
return results
utils.py
import torch
import numpy as np
class EarlyStopper(object):
def __init__(self, num_trials, save_path):
self.num_trials = num_trials
self.trial_counter = 0
self.best_loss = np.inf
self.save_path = save_path
def is_continuable(self, model, loss):
if loss < self.best_loss:
self.best_loss = loss
self.trial_counter = 0
torch.save(model, self.save_path)
return True
elif self.trial_counter + 1 < self.num_trials:
self.trial_counter += 1
return True
else:
return False
def get_best_model(self, device):
return torch.load(self.save_path).to(device)
train.py
import argparse
from torchvision import transforms
from data_setup import create_dataset, create_dataloader
from model_builder import create_model
from engine import train
from utils import EarlyStopper
def main(args):
transform = transforms.Compose([
transforms.Resize((64, 64)),
transforms.ToTensor()
])
train_dataset = create_dataset(train=True, transform=transform)
test_dataset = create_dataset(train=False, transform=transform)
train_dataloader = create_dataloader(dataset=train_dataset, batch_size=args.batch_size)
test_dataloader = create_dataloader(dataset=test_dataset, batch_size=args.batch_size)
feature, _ = train_dataset[0]
vgg_model = create_model(color_size=feature.shape[0], target_size=len(train_dataset.classes), is_trained=False)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
vgg_model.to(device)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
params=vgg_model.parameters(),
lr=args.lr
)
early_stopper = EarlyStopper(num_trials=5, save_path.args.trained_model)
train(
model=vgg_model,
train_dataloader=train_dataloader,
test_dataloader=test_dataloader,
optimizer=optimizer,
loss_fn=loss_fn,
epochs=args.epochs,
device=device,
early_stopper=early_stopper
)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', default=32, type=int)
parser.add_argument('--lf', default=0.01, type=float)
parser.add_argument('--epochs', default=50, type=int)
parser.add_argument('--trained_model', default='./models/trained_model.pth')
args = parser.parse_args()
main(args)
실행
python .\service\train.py
TensorBoard
TensorBoard
%matplotlib inline
import torch
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter() # 'runs' directory 생성 -> log data 기록
x = torch.arange(-5, 5, 0.1).view(-1, 1)
y = -5 * x + 0.1 * torch.randn(x.size())
model = torch.nn.Linear(1, 1)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
def train_model(iter):
for epoch in range(iter):
y1 = model(x)
loss = criterion(y1, y)
writer.add_scalar('Loss/sample', loss, epoch)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_model(10)
writer.flush() # log data 저장
writer.close() # Writer 종료
%load_ext tensorboard
%tensorboard --logdir=runs

HPO Tuning
import torch
import matplotlib.pyplot as plt
Learning Rate Scheduler
StepLR
- 특정 step에 따라 learning rate를 감소시키는 scheduler
- 일정한 step 마다 learning rate에 gamma를 곱하는 방식
model = torch.nn.Linear(2, 1)
optimizer = torch.optim.SGD(model.parameters(), lr=100)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)
lrs = []
for i in range(10):
optimizer.step() # model parameter 업데이트
lrs.append(optimizer.param_groups[0]['lr'])
scheduler.step() # lr 값 수정
plt.plot(range(10), lrs)

CyclicLR - triangular2
model = torch.nn.Linear(2, 1)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
scheduler = torch.optim.lr_scheduler.CyclicLR(
optimizer,
base_lr=0.001,
max_lr=0.1,
step_size_up=5,
mode='triangular2'
)
lrs = []
for i in range(100):
optimizer.step()
lrs.append(optimizer.param_groups[0]['lr']
scheduler.step()
plt.plot(lrs)

Ray Tune
def main():
data_dir = os.path.abspath('./load_data')
load_data(data_dir)
config = {
'l1': tune.choice([2**i for i in range(9)]),
'l2': tune.choice([2**i for i in range(9)]),
'lr': tune.loguniform(1e-4, 1e-1),
'batch_size': tune.choice([2, 4, 8, 16])
}
scheduler = ASHAScheduler(
metric='loss',
mode='min', # loss는 min / accuracy는 max
max_t=max_num_epochs,
grace_period=1,
reduction_factor=2
)
repoter = tune.JupyterNotebookReporter(
metric_columns=['loss', 'accuracy', 'training_iteration'])
result = tune.run(
partial(train_cifar, data_dir=data_dir),
resources_per_trial={'cpu': 2, 'gpu': gpus_per_trial},
config=config,
num_samples=num_samples,
scheduler=scheduler,
trial_dirname_creator=trial_str_creator
)
return result
'SK네트웍스 Family AI캠프 10기 > Daily 회고' 카테고리의 다른 글
| 36-37일차. 단위 프로젝트(데이터 분석과 머신러닝, 딥러닝) (0) | 2025.03.05 |
|---|---|
| 35일차. Deep Learning - 추천 시스템 (0) | 2025.02.28 |
| 33일차. Deep Learning - Vision(Fine Tuning) (0) | 2025.02.26 |
| 32일차. Deep Learning - Vision(CNN & Fine Tuning) (0) | 2025.02.25 |
| 31일차. Deep Learning - Vision(Image Preprocessing & CNN) (0) | 2025.02.24 |