temp - LOPES-HUFS/DeepLearningFromForR GitHub Wiki

bare backward propagation

μ—­μ „νŒŒλ₯Ό μ΄μš©ν•˜μ—¬ λ„€νŠΈμ›μ— MNIST λͺ¨λΈ ν•™μŠ΅μ‹œν‚€κΈ°

이 글은 ν™œμ„±ν™” ν•¨μˆ˜μΈ sigmoid(), softmax(), Relu()의 μˆœμ „νŒŒ, μ—­μ „νŒŒλ₯Ό μ œμ™Έν•œ λͺ¨λ“  것을 κ΅¬ν˜„ν•˜λŠ” λͺ¨λ“  μ½”λ“œλ₯Ό κ°€μ§€κ³  μžˆλ‹€. 이 μ½”λ“œλ₯Ό λ‹€ μ΄ν•΄ν•œλ‹€λ©΄, μ—­μ „νŒŒμ— λŒ€ν•œ 기초적인 λ‚΄μš©μ€ μ•Œκ²Œ λ˜μ—ˆλ‹€κ³  해도 λ¬΄λ°©ν•˜λ‹€. μš°μ„  μš°λ¦¬κ°€ μ‚¬μš©ν•  ν•¨μˆ˜λ“€μ˜ μ½”λ“œλ₯Ό κ°€μ§€κ³  μžˆλŠ” 것을 κ°€μ Έμ˜¨λ‹€.

source("./functions.R")
source("./utils.R")
source("./optimizer.R")

MNIST 자료λ₯Ό κ°€μ Έμ˜€λŠ” 방법에 λŒ€ν•œ λ‚΄μš©μ€ Mnist 손글씨 데이터 μ½μ–΄μ˜€λŠ” νŒ¨ν‚€μ§€ μ†Œκ°œμ„ μ°Έκ³ ν•œλ‹€. 자료λ₯Ό κ°€μ Έμ˜€λŠ” μ½”λ“œλŠ” μ•„λž˜μ™€ κ°™λ‹€. μ•„λž˜ μ½”λ“œμ— λŒ€ν•œ μ†Œκ°œλŠ” λ‹€μŒμ„ μ°Έκ³ ν•œλ‹€.

# install.packages("dslabs") 이미 μ„€μΉ˜ν•œ 것이 있으면 μƒλž΅
library(dslabs)

mnist_data <- get_data()

x_train_normalize <- mnist_data$x_train
x_test_normalize <- mnist_data$x_test

t_train_onehotlabel <- making_one_hot_label(mnist_data$t_train,60000, 10)
t_test_onehotlabel <- making_one_hot_label(mnist_data$t_test,10000, 10)

이제 본격적으둜 μš°λ¦¬κ°€ ν•™μŠ΅μ‹œν‚¬ λ„€νŠΈμ›μ„ λ§Œλ“ λ‹€.

TwoLayerNet <- function(input_size, hidden_size, output_size, weight_init_std = 0.01) {
  W1 <- weight_init_std*matrix(rnorm(n = input_size*hidden_size), nrow = input_size, ncol = hidden_size)
  b1 <- matrix(rep(0,hidden_size),nrow=1,ncol=hidden_size)
  W2 <- weight_init_std*matrix(rnorm(n = hidden_size*output_size), nrow = hidden_size, ncol = output_size)
  b2 <- matrix(rep(0,output_size),nrow=1,ncol=output_size)
  return (list(W1 = W1, b1 = b1, W2 = W2, b2 = b2))
}

μ•žμ—μ„œ λ§Œλ“  λ„€νŠΈμ›μ„ ν•™μŠ΅μ‹œν‚¬ λͺ¨λΈμ„ λ§Œλ“ λ‹€. 이 ν•¨μˆ˜λ₯Ό λ‹€ λ”°λ‘œ λ§Œλ“  μ΄μœ λŠ” μš°μ„  model.forward()은 μ˜ˆμΈ‘μ„ ν•˜κΈ° μœ„ν•΄ ν•„μš”ν•˜λ‹€. loss()은 λ‹Ήμ—°νžˆ 손싀값을 μ•Œμ•„λ³΄κΈ° μœ„ν•΄μ„œ ν•„μš”ν•˜λ‹€.

model.forward <- function(network, x){
  Affine_1 <- Affine.forward(network$W1, network$b1, x)
  Relu_1 <- Relu.forward(Affine_1$out)
  Affine_2 <- Affine.forward(network$W2, network$b2, Relu_1$out)
  return(list(x = Affine_2$out, Affine_1.forward = Affine_1, Affine_2.forward = Affine_2, Relu_1.forward = Relu_1))
}

loss <- function(model.forward, network, x, t){
  temp <- model.forward(network, x)
  y <- temp$x
  last_layer.forward <- SoftmaxWithLoss.forward(y, t)
  return(list(loss = last_layer.forward$loss, softmax = last_layer.forward, predict =  temp))
}


gradient <- function(model.forward, network, x, t) {
  # μˆœμ „νŒŒ
  temp <- loss(model.forward, network, x, t)
  # μ—­μ „νŒŒ
  dout <- 1
  last.backward <- SoftmaxWithLoss.backward(temp$softmax, dout)
  Affine_2.backward <- Affine.backward(temp$predict$Affine_2.forward, dout  =  last.backward$dx)
  Relu_1.backward <- Relu.backward(temp$predict$Relu_1.forward, dout  =  Affine_2.backward$dx)
  Affine_1.backward <- Affine.backward(temp$predict$Affine_1.forward, dout  =  Relu_1.backward$dx)
  grads  <- list(W1  =  Affine_1.backward$dW, b1  =  Affine_1.backward$db, W2  =  Affine_2.backward$dW, b2  =  Affine_2.backward$db)
  return(grads)
}

μ§€κΈˆκΉŒμ§€ λ§Œλ“  것을 ν…ŒμŠ€νŠΈν•΄λ³΄μž.

train_size <- dim(x_train_normalize)[1]
batch_size <- 100
train_loss_list <- data.frame(lossvalue  =  0)
train_acc_list <- data.frame(train_acc  =  0)
test_acc_list <- data.frame(test_acc  =  0)
iter_per_epoch <- max(train_size / batch_size)
<<<<<<< HEAD

temp_TwoLayerNet <- TwoLayerNet(input_size = 784, hidden_size = 50, output_size = 10)

grads <- gradient(model.forward=model.forward, network = temp_TwoLayerNet, x=x_train_normalize[1:batch_mask,], t= t_train_onehotlabel[1:batch_mask,])
loss_value <- loss(model.forward=model.forward, network = temp_TwoLayerNet, x=x_train_normalize[1:batch_mask,], t_train_onehotlabel[1:batch_mask,])$loss

μœ— μ½”λ“œλ₯Ό μ‹€ν–‰ν•˜λ©΄ λ‹€μŒκ³Ό 같은 κ²°κ³Όκ°€ λ‚˜μ˜΅λ‹ˆλ‹€.

>loss_value
=======
grads <- gradient(model.forward=model.forward, x=x_train_normalize[1:batch_size,], t= t_train_onehotlabel[1:batch_size,])
loss_value <- loss(model.forward=model.forward, x=x_train_normalize[1:batch_size,], t_train_onehotlabel[1:batch_size,])$loss
loss_value
>>>>>>> 1d79622fc87995d2f04d929befa789a06ab79210
[1] 2.302899

μ΄λ ‡κ²Œ λŒμ•„κ°€λŠ” λͺ¨λΈμ„ ν‰κ°€ν•˜λŠ” 평가 ν•¨μˆ˜λ₯Ό λ§Œλ“€κ² μŠ΅λ‹ˆλ‹€.

model.evaluate <- function(model, network, x, t){
    temp <- model(network, x)
    y <- max.col(temp$x)
    t <- max.col(t)
    accuracy <- (sum(ifelse(y == t,1,0))) / dim(x)[1]
    return(accuracy)
}

μ‹€μ œλ‘œ λ¬΄μ‹ν•˜κ²Œ λŒλ €λ΄…λ‹ˆλ‹€.

for(i in 1:2000){
  batch_mask <- sample(train_size ,batch_size)
  x_batch <- x_train_normalize[batch_mask,]
  t_batch <- t_train_onehotlabel[batch_mask,]

  grad <- gradient(model.forward = model.forward, network = temp_TwoLayerNet, x_batch, t_batch)
  
  temp_TwoLayerNet <- sgd.update(temp_TwoLayerNet, grad)
  
  loss_value <- loss(model.forward=model.forward, network = temp_TwoLayerNet, x_batch, t_batch)$loss
  train_loss_list <- rbind(train_loss_list,loss_value)
  
  if(i %% iter_per_epoch == 0){
    train_acc <- model.evaluate(model.forward, temp_TwoLayerNet, x_train_normalize, t_train_onehotlabel)
    test_acc <- model.evaluate(model.forward, temp_TwoLayerNet, x_test_normalize, t_test_onehotlabel)
    train_acc_list <- rbind(train_acc_list,train_acc)
    test_acc_list <- rbind(test_acc_list,test_acc)
    print(c(train_acc, test_acc))
  }
}