본문 바로가기

study/dacon

cnn 신경망 커널 시각화

colab에서 진행함

 

 

필요한 라이브러리들

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import torch.nn.functional as F
import numpy as np
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
  

 

mnist


# Define a transform to normalize the data
transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5), (0.5))

])
# Download and load the training data
trainset = datasets.MNIST('MNIST_data', download = True, train = True, transform = transform)
testset = datasets.MNIST('MNIST_data/', download = True, train = False, transform = transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size = 64, shuffle = True)

testloader = torch.utils.data.DataLoader(testset, batch_size = 64, shuffle = True)

 

간단한 cnn 신경만 만들고

class BasicBlock(nn.Module):
  def __init__(self,in_channels, out_channels,ksize=3, stride=1, pad=1):
    super (BasicBlock, self).__init__()
    self.body = nn.Sequential (
    nn.Conv2d(in_channels, out_channels, ksize, stride, pad),
    nn.ReLU( inplace=True)
    )
  def forward(self, x):
    out = self. body(x)
    return out


# Define the network architecture
class CNN(nn.Module):
  def __init__(self):
    super (CNN, self). __init__()
    self.b1 = BasicBlock(3, 32)
    self.b2 = BasicBlock(32, 32)
    self .maxpool = nn.MaxPool2d(2, 2)
    self.dropout = nn.Dropout (0.25)
    self.b3 = BasicBlock(32, 64)
    self.b4 = BasicBlock(64, 64)
    self.linear1 = nn.Linear(3136, 512)
    self.linear2 = nn.Linear(512, 10)

  def forward(self,x):
    x = x.expand(x.shape[0],3,28,28)

    out = self.b1(x)
    out = self.b2(out)
    out = self.maxpool (out )
    out = self. dropout (out )
    out = self.b3(out)
    out = self.b4(out)
    out = self.maxpool(out)
    out = self.dropout(out)

    out = out.view(out.size(0),-1)
    out = self.linear1 (out)

    out = self.dropout (out )
    out = self.linear2(out)
    return out

  

batch_size = 256

model = CNN().cuda()

optimizer = optim.SGD(model.parameters(), lr=1e-1)

criterion = nn.CrossEntropyLoss()

epochs = 10

 

 

학습

t_accs, v_accs, t_loss, v_loss = [],[],[],[]
temp = []
for epoch in range(epochs) :

  train_loss = 0

  train_accuracy = 0

  model.train()

  for i, (images, labels) in enumerate(trainloader):
    images = images.cuda()
    labels = labels.cuda()
    optimizer.zero_grad()
    output = model(images)
    ps = torch.exp(output)
    top_p, top_class = ps.topk(1, dim = 1)
    equals = top_class == labels.view(*top_class.shape)
    labels.view(*top_class.shape)
    train_accuracy += torch.mean(equals.type(torch. FloatTensor))
    loss = criterion(output, labels)
    loss.backward()
    optimizer.step()
    train_loss += loss.item()

  t_accs.append(train_accuracy/len(trainloader))
  t_loss.append(train_loss/len(trainloader))

    # Validation pass
  test_loss = 0
  test_accuracy = 0
  # Set the model to evaluation mode
  model.eval ()
  for images, labels in testloader:
    images = images.cuda()
    labels = labels.cuda()
    log_ps = model(images)
    test_loss += criterion(log_ps, labels).item()
    ps = torch. exp(log_ps)
    top_p, top_class = ps.topk(1, dim = 1)
    equals = top_class == labels. view(*top_class.shape)
    test_accuracy += torch. mean(equals. type(torch.FloatTensor ))
  v_accs.append( test_accuracy/ len(testloader ))
  v_loss.append( test_loss/len(testloader))
  print("==> Epoch[{}/{}]". format (epoch+1 , epochs) )
  print("loss: {:.3f}, Accuracy: {:.3f}, val_loss: {:.3f}, val_accuracy: {:.3f}"
  .format(t_loss[-1], t_accs[-1], v_loss[-1],v_accs[-1]))

  model_out_path = './model.pth'
  torch.save(model.state_dict(),model_out_path)

 

결과

 

model = CNN().cuda()
model_out_path = './model.pth'
checkpoint = torch. load(model_out_path)
model.load_state_dict(checkpoint, strict = True)
kernels = []
weights = []
bias = []
for name, param in model.named_parameters():
  if 'body' in name:
    if 'weight' in name:
      kernels.append(name)
      minv,maxv = param.min(), param. max()
      param = (param-minv)/(maxv-minv)
      weights.append(param.cpu().detach().numpy())



n_kernels = 32
# Visualize conv filter
plt.figure(figsize=(20,3))
plt.title("Kernels of conv2d")
for i in range(n_kernels):
  f= weights[0][i,:,:,:]
  for j in range(3): 
    plt.subplot (3, n_kernels, j*n_kernels+i+1)
    plt.imshow( f[j,:,:],cmap='gray')
    plt.xticks([]); plt.yticks([])
    plt.title(str(i)+'_'+str(j))

 

 

 

class partial_CNN(nn.Module): 
  def __init__(self):
    super (partial_CNN, self). __init__()
    self.b1 = BasicBlock(1, 32)
  def forward(self,x):
    out = self.b1(x)
    return out

partial_Model = partial_CNN() 
checkpoint = torch. load(model_out_path)
partial_Model.load_state_dict(checkpoint, strict=False)

for test_images, _ in testloader:
  x_test = test_images[3] 
  break

plt.imshow(x_test[0,:,:], cmap='gray')
x_test = x_test . unsqueeze(0)
x_test = partial_Model(x_test).detach().numpy().squeeze()
plt.figure(figsize=(20,3))
for i in range(32):
  plt.subplot(2,16, i+1)
  plt.imshow(x_test [i,:,:], cmap='gray')
  plt.xticks([]); plt.yticks([])
  plt.title('map'+str(i))

plt.show()

'study > dacon' 카테고리의 다른 글

dacon 태양광  (0) 2021.01.02