본문 바로가기

학교공부/머신러닝

pytorch lstm

data는 dacon에 운동 동작 분류 AI 경진대회 사용했다. dacon.io/competitions/official/235689/data/

간단하게 각 user마다 600개의 움직임이 있고 이걸 61 label로 분류하는 거임ㅇㅇ

 

이걸 pytorch lstm으로 구현해보자

 

www.kaggle.com/omershect/learning-pytorch-lstm-deep-learning-with-m5-data

여기를 많이 참고했다

 

 

 

train=pd.read_csv(PATH+'train_features.csv')
train_labels=pd.read_csv(PATH+'train_labels.csv')
test=pd.read_csv(PATH+'test_features.csv')
submission=pd.read_csv(PATH+'sample_submission.csv')



X = train.iloc[:,2:].values.reshape(3125,600,6)
# y = tf.keras.utils.to_categorical(train_labels['label']) 
y = train_labels['label'].values 
test  = test.iloc[:,2:].values.reshape(-1,600,6)



from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)



X_train = Variable(torch.Tensor(X_train))
X_test = Variable(torch.Tensor(X_test))
y_train = Variable(torch.Tensor(y_train))
y_test = Variable(torch.Tensor(y_test))
test = Variable(torch.Tensor(test))

from torch.utils.data import TensorDataset, DataLoader

train_dataset = TensorDataset(X_train,y_train) # create your datset
test_dataset = TensorDataset(X_test,y_test) # create your datset

train_dataloader = DataLoader(train_dataset,batch_size= 200,shuffle= True) # create your dataloader
valid_dataloader = DataLoader(test_dataset) # create your dataloader




 

 

 

 

model

## many to many 로 구현해보자
# batch, seq, inputsize

#input of shape (seq_len, batch, input_size)



class LSTM2(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM2, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.LSTM2 = nn.LSTM(input_size=input_size, hidden_size=hidden_size, 
                             num_layers=num_layers,batch_first=True)
       
        self.fc1 = nn.Linear(hidden_size, int(num_classes))

        self.dropout = nn.Dropout(p=0.2)

        self.count = 0


    def forward(self, x):
        h_1 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size).to(device))
        
        c_1 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size).to(device))
        
       


        out1, (hn, cn) = self.LSTM2(x, (h_1, c_1))


        final_state = out1[:,-1,:]

        out1 = self.fc1(final_state)


        return out1
    
    
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        # nn.init.uniform_(param.data, -0.1, 0.1)

 

모델은 어차피 pytorch 라이브러리에 구현되어있어서 큰틀만 짜주면된다.

 

 

 

device = 'cuda'
SAVEPATH = '/content/drive/My Drive/Colab Notebooks/대회/dacon 동작분류/'

patience = 30

early_stopping = EarlyStopping(patience = patience, verbose = True)

num_epochs = 200
learning_rate = 1e-3
input_size = 6
hidden_size = 512
num_layers = 2

num_classes = 61
# quantile = 0.3


train_losses = []
valid_losses = []


lstm = LSTM2(num_classes, input_size, hidden_size, num_layers)
lstm.to(device)

lstm.apply(init_weights)

# criterion = quantile_loss    # mean-squared error for regression
criterion = torch.nn.NLLLoss()   # mean-squared error for regression
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate,weight_decay= 1e-5)
criterion = criterion.cuda()

m = nn.Softmax(dim = 1)

# optimizer = torch.optim.SGD(lstm.parameters(), lr=learning_rate,momentum=0.9)

# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,  patience= 60, 
#           factor =0.1 ,min_lr=1e-6, eps=1e-08)

# scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100,700,900,1000], gamma=0.1)

for epoch in progress_bar(range(num_epochs+1)): 


  # train

  losses = []
  for i, (input, target) in enumerate(train_dataloader):
 
    lstm.train()
    outputs = lstm(input.to(device))
    optimizer.zero_grad()


    # print(f'target shape: {target.shape}')
    # print(f'outputs shape: {outputs.shape}')
    # print(outputs[0])
    # print(target[0])
    loss = criterion(m(outputs), target.type(torch.long).to(device))
    loss.backward()

    # scheduler.step()
    # torch.nn.utils.clip_grad_norm_(lstm.parameters(), 10)

    optimizer.step()
    losses.append(loss.item())

  train_losses.append(np.mean(np.array(losses)))

  losses = []
  for i, (input, target) in enumerate(train_dataloader):

    lstm.eval()
    valid = lstm(input.to(device))

    #inverse
    # valid = torch.from_numpy(scaler.inverse_transform(valid.cpu().detach().numpy()))

    vall_loss = criterion(m(valid), target.type(torch.long).to(device))
    # scheduler.step(vall_loss)
    losses.append(vall_loss.item())

  valid_losses.append(np.mean(np.array(losses)))



  if epoch % 10 == 0:
    # print(criterion1(outputs, y_train.to(device),quantile))

    print("Epoch: %d, loss: %1.5f valid loss:  %1.5f lr: %1.5f " %(epoch, train_losses[-1],valid_losses[-1],
                                                                    optimizer.param_groups[0]["lr"]))

  torch.save(lstm.state_dict(), SAVEPATH+'model_weight.pth')
  # model.load_state_dict(torch.load(SAVEPATH+'model_weight.pth'))

  # early_stopping는 validation loss가 감소하였는지 확인이 필요하며,
    # 만약 감소하였을경우 현제 모델을 checkpoint로 만든다.
  early_stopping(round(valid_losses[-1],5), lstm)

  if early_stopping.early_stop:
    print("Epoch: %d, loss: %1.5f valid loss:  %1.5f lr: %1.5f " %(epoch, train_losses[-1],valid_losses[-1],
                                                                  optimizer.param_groups[0]["lr"]))
    break

 

 

submission이 61개의 label에 대한 확률을 각각 만들어야된다.

따라서 model의 output에 softmax +NLLLoss(cross entropy)를 더해 분류 작업에 들어간다.

 

 

 

 

'학교공부 > 머신러닝' 카테고리의 다른 글

머신러닝 3 (수학)  (0) 2020.09.17
머신러닝 2장 (규제...)  (0) 2020.09.16
머신러닝 1장  (2) 2020.09.11