본문 바로가기

코딩공부/패스트캠퍼스 AI 부트캠프

MNIST데이터 RNN, LSTM, GRU로 분류기 만들기(pytorch lightning)

MNIST 데이터 셋 

MNIST데이터 셋은 0~9 까지 손글씨로 적은 숫자로 구성된 데이터 셋이다.
Train dataset은 6만개 Test dataset은 1만개 로 구성되어 있습니다.

import torchvision.transforms as T
import torchvision
import torch
from torch.utils.data import DataLoader

download_root = './MNIST_DATASET'

mnist_transform = T.Compose([
    T.ToTensor(),
])

train_dataset = torchvision.datasets.MNIST(download_root, transform=mnist_transform, train=True, download=True)
test_dataset = torchvision.datasets.MNIST(download_root, transform=mnist_transform, train=False, download=Ture)

total_size = len(train_dataset)
train_num, valid_num = int(total_size * 0.8), int(total_size * 0.2)
train_dataset,valid_dataset = torch.utils.data.random_split(train_dataset, [train_num, valid_num])

batch_size = 32

train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
valid_dataloader = DataLoader(valid_dataset, batch_size = batch_size, shuffle = False)
test_dataloader = DataLoader(test_dataset, batch_size = batch_size, shuffle = False)

 

모델 만들기

RNN을 대표적으로 예시를 들어서 그려봤습니다.
각 행을 순차적으로 입력으로 들어간다는 것이 핵심입니다.

from pytorch_lightning import LightningModule, Trainer
import torch.optim as optim
import torchmetrics
import torch.nn as nn

from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import WandbLogger

import wandb

 

RNN 모델 코드

class RNNClassifire(LightningModule):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, lr):
        super().__init__()

        self.learning_rate = lr
        self.criterion = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task='multiclass', num_classes = num_classes)

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_classes = num_classes

        self.rnn = nn.RNN(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers, batch_first = True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        '''
        INPUT :
            x : [batch_size, sequence_length, input_size] => [batch_size, width, height]
        OUTPUT :
            out : [batch_size, num_classes]
        '''
        x = x.squeeze()
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return out

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr = self.learning_rate)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 5, gamma = 0.5)
        return [optimizer], [scheduler]

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)

        loss = self.criterion(y_hat, y)

        _, preds = torch.max(y_hat, dim = 1)
        acc = self.accuracy(preds, y)

        self.log(f"valid_loss", loss, on_step = False, on_epoch = True, logger = True)
        self.log(f"valid_acc", acc, on_step = False, on_epoch = True, logger = True)

        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)

        loss = self.criterion(y_hat, y)

        _, preds = torch.max(y_hat, dim = 1)
        acc = self.accuracy(preds, y)

        self.log(f"valid_loss", loss, on_step = False, on_epoch = True, logger = True)
        self.log(f"valid_acc", acc, on_step = False, on_epoch = True, logger = True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)

        loss = self.criterion(y_hat, y)

        _, preds = torch.max(y_hat, dim = 1)
        acc = self.accuracy(preds, y)

        self.log(f"test_loss", loss, on_step = False, on_epoch = True, logger = True)
        self.log(f"test_acc", acc, on_step = False, on_epoch = True, logger = True)

    def predict_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)

        _, preds = torch.max(y_hat, dim = 1)

        return preds


LSTM 모델 코드

class LSTMClassifier(LightningModule):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, lr):
        super().__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_classes = num_classes
        self.learning_rate = lr
        
        self.criterion = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task='multiclass', num_classes = num_classes)

        self.lstm = nn.LSTM(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers, batch_first = True)
        self.fc = nn.Linear(hidden_size, num_classes)


    def forward(self, x):
        '''
        INPUT
            x : [32, 1, 28, 28]
        OUTPUT
            out : [32, 10]
        '''
        x = x.squeeze()
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])

        return out

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr = self.learning_rate)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 5, gamma = 0.5)
        return [optimizer], [scheduler]

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)

        loss = self.criterion(y_hat, y)

        _, predict = torch.max(y_hat, dim = 1)
        acc = self.accuracy(predict, y)

        self.log(f"train_loss", loss, on_step = False, on_epoch = True, logger = True)
        self.log(f"train_acc", acc, on_step = False, on_epoch = True, logger = True)

        return loss
        

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)

        loss = self.criterion(y_hat, y)

        _, predict = torch.max(y_hat, dim = 1)
        acc = self.accuracy(predict, y)

        self.log(f"valid_loss", loss, on_step = False, on_epoch = True, logger = True)
        self.log(f"valid_acc", acc, on_step = False, on_epoch = True, logger = True)
        
    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)

        loss = self.criterion(y_hat, y)

        _, predict = torch.max(y_hat, dim = 1)
        acc = self.accuracy(predict, y)

        self.log(f"test_loss", loss, on_step = False, on_epoch = True, logger = True)
        self.log(f"test_acc", acc, on_step = False, on_epoch = True, logger = True)
        

    def predict_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)

        _, predict = torch.max(y_hat, dim = 1)

        return predict


GRU 모델

class GRUClassifier(LightningModule):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, lr, dropout_prob):
        super().__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_classes = num_classes
        self.learning_rate = lr
        
        self.criterion = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task='multiclass', num_classes = num_classes)

        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout = dropout_prob)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        '''
        INPUT
            x : [32, 1, 28, 28]
        OUTPUT
            out : [32, 10]
        '''
        x = x.view(x.size(0), x.size(2), x.size(3))
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        out = out[:, -1, :]
        out = self.fc(out)
        return out

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr = self.learning_rate)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 1, gamma = 0.5)
        return [optimizer], [scheduler]
        
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)

        loss = self.criterion(y_hat, y)

        _, predict = torch.max(y_hat, dim = 1)
        acc = self.accuracy(predict, y)

        self.log(f"train_loss", loss, on_step = False, on_epoch = True, logger = True)
        self.log(f"train_acc", acc, on_step = False, on_epoch = True, logger = True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)

        loss = self.criterion(y_hat, y)

        _, predict = torch.max(y_hat, dim = 1)
        acc = self.accuracy(predict, y)

        self.log(f"valid_loss", loss, on_step = False, on_epoch = True, logger = True)
        self.log(f"valid_acc", acc, on_step = False, on_epoch = True, logger = True)
        
    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)

        loss = self.criterion(y_hat, y)

        _, predict = torch.max(y_hat, dim = 1)
        acc = self.accuracy(predict, y)

        self.log(f"test_loss", loss, on_step = False, on_epoch = True, logger = True)
        self.log(f"test_acc", acc, on_step = False, on_epoch = True, logger = True)
        

    def predict_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)

        _, predict = torch.max(y_hat, dim = 1)

        return predict

 

모델 훈련 및 평가

model = RNNClassifire(input_size = 28, hidden_size = 128, num_layers = 2, num_classes = 10, lr = 0.001)

early_stopping = EarlyStopping(monitor = 'valid_loss', mode = 'min', patience=5)
lr_mointor = LearningRateMonitor(logging_interval = 'epoch')

wandb_logger = WandbLogger(project = 'MNIST_RNN')

trainer = Trainer(
    max_epochs = 50,
    accelerator = 'auto',
    callbacks = [early_stopping, lr_mointor],
    logger = wandb_logger
)

trainer.fit(
    model,
    train_dataloader,
    valid_dataloader
)

trainer.test(model, test_dataloader)

RNN 모델 말고도 다른 모델을 평가하려면 model 선언 부분만 바꿔서 실행해주면 된다!


결과 분석

그렇게 유의미하게 차이가 나지는 않았지만 LSTM과 GRU  모델이 근소하게 더 높은 accuracity를 얻었다.
하지만 이미지 데이터 분류기의 근본 모델인 CNN에 비해서는 세 모델 모두 성능이 떨어졌음을 알 수 있었다.
이러한 코드 작업을 통해 이미지데이터를 시계열 모델로 처리하는 방법을 모색하는 기회가 되었다.