temp - LOPES-HUFS/DeepLearningFromForR GitHub Wiki
bare backward propagation
μμ νλ₯Ό μ΄μ©νμ¬ λ€νΈμμ MNIST λͺ¨λΈ νμ΅μν€κΈ°
μ΄ κΈμ νμ±ν ν¨μμΈ sigmoid()
, softmax()
, Relu()
μ μμ ν, μμ νλ₯Ό μ μΈν λͺ¨λ κ²μ ꡬννλ λͺ¨λ μ½λλ₯Ό κ°μ§κ³ μλ€. μ΄ μ½λλ₯Ό λ€ μ΄ν΄νλ€λ©΄, μμ νμ λν κΈ°μ΄μ μΈ λ΄μ©μ μκ² λμλ€κ³ ν΄λ 무방νλ€. μ°μ μ°λ¦¬κ° μ¬μ©ν ν¨μλ€μ μ½λλ₯Ό κ°μ§κ³ μλ κ²μ κ°μ Έμ¨λ€.
source("./functions.R")
source("./utils.R")
source("./optimizer.R")
MNIST μλ£λ₯Ό κ°μ Έμ€λ λ°©λ²μ λν λ΄μ©μ Mnist μκΈμ¨ λ°μ΄ν° μ½μ΄μ€λ ν¨ν€μ§ μκ°μ μ°Έκ³ νλ€. μλ£λ₯Ό κ°μ Έμ€λ μ½λλ μλμ κ°λ€. μλ μ½λμ λν μκ°λ λ€μμ μ°Έκ³ νλ€.
# install.packages("dslabs") μ΄λ―Έ μ€μΉν κ²μ΄ μμΌλ©΄ μλ΅
library(dslabs)
mnist_data <- get_data()
x_train_normalize <- mnist_data$x_train
x_test_normalize <- mnist_data$x_test
t_train_onehotlabel <- making_one_hot_label(mnist_data$t_train,60000, 10)
t_test_onehotlabel <- making_one_hot_label(mnist_data$t_test,10000, 10)
μ΄μ 본격μ μΌλ‘ μ°λ¦¬κ° νμ΅μν¬ λ€νΈμμ λ§λ λ€.
TwoLayerNet <- function(input_size, hidden_size, output_size, weight_init_std = 0.01) {
W1 <- weight_init_std*matrix(rnorm(n = input_size*hidden_size), nrow = input_size, ncol = hidden_size)
b1 <- matrix(rep(0,hidden_size),nrow=1,ncol=hidden_size)
W2 <- weight_init_std*matrix(rnorm(n = hidden_size*output_size), nrow = hidden_size, ncol = output_size)
b2 <- matrix(rep(0,output_size),nrow=1,ncol=output_size)
return (list(W1 = W1, b1 = b1, W2 = W2, b2 = b2))
}
μμμ λ§λ λ€νΈμμ νμ΅μν¬ λͺ¨λΈμ λ§λ λ€. μ΄ ν¨μλ₯Ό λ€ λ°λ‘ λ§λ μ΄μ λ μ°μ model.forward()
μ μμΈ‘μ νκΈ° μν΄ νμνλ€. loss()
μ λΉμ°ν μμ€κ°μ μμ보기 μν΄μ νμνλ€.
model.forward <- function(network, x){
Affine_1 <- Affine.forward(network$W1, network$b1, x)
Relu_1 <- Relu.forward(Affine_1$out)
Affine_2 <- Affine.forward(network$W2, network$b2, Relu_1$out)
return(list(x = Affine_2$out, Affine_1.forward = Affine_1, Affine_2.forward = Affine_2, Relu_1.forward = Relu_1))
}
loss <- function(model.forward, network, x, t){
temp <- model.forward(network, x)
y <- temp$x
last_layer.forward <- SoftmaxWithLoss.forward(y, t)
return(list(loss = last_layer.forward$loss, softmax = last_layer.forward, predict = temp))
}
gradient <- function(model.forward, network, x, t) {
# μμ ν
temp <- loss(model.forward, network, x, t)
# μμ ν
dout <- 1
last.backward <- SoftmaxWithLoss.backward(temp$softmax, dout)
Affine_2.backward <- Affine.backward(temp$predict$Affine_2.forward, dout = last.backward$dx)
Relu_1.backward <- Relu.backward(temp$predict$Relu_1.forward, dout = Affine_2.backward$dx)
Affine_1.backward <- Affine.backward(temp$predict$Affine_1.forward, dout = Relu_1.backward$dx)
grads <- list(W1 = Affine_1.backward$dW, b1 = Affine_1.backward$db, W2 = Affine_2.backward$dW, b2 = Affine_2.backward$db)
return(grads)
}
μ§κΈκΉμ§ λ§λ κ²μ ν μ€νΈν΄λ³΄μ.
train_size <- dim(x_train_normalize)[1]
batch_size <- 100
train_loss_list <- data.frame(lossvalue = 0)
train_acc_list <- data.frame(train_acc = 0)
test_acc_list <- data.frame(test_acc = 0)
iter_per_epoch <- max(train_size / batch_size)
<<<<<<< HEAD
temp_TwoLayerNet <- TwoLayerNet(input_size = 784, hidden_size = 50, output_size = 10)
grads <- gradient(model.forward=model.forward, network = temp_TwoLayerNet, x=x_train_normalize[1:batch_mask,], t= t_train_onehotlabel[1:batch_mask,])
loss_value <- loss(model.forward=model.forward, network = temp_TwoLayerNet, x=x_train_normalize[1:batch_mask,], t_train_onehotlabel[1:batch_mask,])$loss
μ μ½λλ₯Ό μ€ννλ©΄ λ€μκ³Ό κ°μ κ²°κ³Όκ° λμ΅λλ€.
>loss_value
=======
grads <- gradient(model.forward=model.forward, x=x_train_normalize[1:batch_size,], t= t_train_onehotlabel[1:batch_size,])
loss_value <- loss(model.forward=model.forward, x=x_train_normalize[1:batch_size,], t_train_onehotlabel[1:batch_size,])$loss
loss_value
>>>>>>> 1d79622fc87995d2f04d929befa789a06ab79210
[1] 2.302899
μ΄λ κ² λμκ°λ λͺ¨λΈμ νκ°νλ νκ° ν¨μλ₯Ό λ§λ€κ² μ΅λλ€.
model.evaluate <- function(model, network, x, t){
temp <- model(network, x)
y <- max.col(temp$x)
t <- max.col(t)
accuracy <- (sum(ifelse(y == t,1,0))) / dim(x)[1]
return(accuracy)
}
μ€μ λ‘ λ¬΄μνκ² λλ €λ΄ λλ€.
for(i in 1:2000){
batch_mask <- sample(train_size ,batch_size)
x_batch <- x_train_normalize[batch_mask,]
t_batch <- t_train_onehotlabel[batch_mask,]
grad <- gradient(model.forward = model.forward, network = temp_TwoLayerNet, x_batch, t_batch)
temp_TwoLayerNet <- sgd.update(temp_TwoLayerNet, grad)
loss_value <- loss(model.forward=model.forward, network = temp_TwoLayerNet, x_batch, t_batch)$loss
train_loss_list <- rbind(train_loss_list,loss_value)
if(i %% iter_per_epoch == 0){
train_acc <- model.evaluate(model.forward, temp_TwoLayerNet, x_train_normalize, t_train_onehotlabel)
test_acc <- model.evaluate(model.forward, temp_TwoLayerNet, x_test_normalize, t_test_onehotlabel)
train_acc_list <- rbind(train_acc_list,train_acc)
test_acc_list <- rbind(test_acc_list,test_acc)
print(c(train_acc, test_acc))
}
}