library(rhdf5)
library(dslabs)
params <- list()
MakingParams <- function(input_size, hidden_size1,hidden_size2, output_size, weight_init_std = 0.01) {
W1 <- weight_init_std*matrix(rnorm(n = input_size*hidden_size1), nrow = input_size, ncol = hidden_size1)
b1 <- matrix(rep(0,hidden_size1),nrow=1,ncol=hidden_size1)
W2 <- weight_init_std*matrix(rnorm(n = hidden_size1*hidden_size2), nrow = hidden_size1, ncol = hidden_size2)
b2 <- matrix(rep(0,hidden_size2),nrow=1,ncol=hidden_size2)
W3 <- weight_init_std*matrix(rnorm(n = hidden_size2*output_size), nrow = hidden_size2, ncol = output_size)
b3 <- matrix(rep(0,output_size),nrow=1,ncol=output_size)
params <<- list(W1=W1, b1=b1, W2=W2, b2=b2, W3=W3, b3=b3)
return(list(input_size, hidden_size1,hidden_size2, output_size,weight_init_std))
}
MakingParams(input_size=784, hidden_size1=100, hidden_size2=50, output_size=10)
Relu.forward <- function(x){
mask <- x<=0
out <- x
out[mask] <- 0
return(list(out = out, mask = mask))
}
# Relu.forward에서 생성한 것 중
# Relu.backward 에서 필요한 것은 mask 배열
# 이것을 가지고 mask인 곳이 TRUE이면 0을 넣는다
Relu.backward <- function(forward, dout){
dout[forward$mask] <- 0
return(list(dx = dout))
}
Affine.forward <- function(W, b, x){
out <- sweep((x %*% W),2, b,'+')
return(list(out = out, W = W, x = x))
}
Affine.backward <- function(forward, dout){
dx <- dout %*% t(forward$W)
dW <- t(forward$x) %*% dout
db <- matrix(colSums(dout), nrow=1)
return(list(dx = dx, dW = dW, db = db))
}
softmax <- function(a){
a <- sweep(t(a),2,matrix(apply(t(a),2,max),1),"-")
return(t(sweep(exp(a),2,colSums(exp(a)),"/")))
}
cross_entropy_error<- function(y, t){
batch_size <- dim(y)[1]
return(sum(-(t*log(y+1e-7)))/batch_size)
}
SoftmaxWithLoss.forward <- function(x, t){
y <- softmax(x)
loss <- cross_entropy_error(y, t)
return(list(loss = loss , y = y, t = t))
}
SoftmaxWithLoss.backward <- function(forward, dout=1){
dx <- (forward$y - forward$t) / dim(forward$t)[1]
return(list(dx = dx))
}
predict <- function(x){
Affine_1_layer <- Affine.forward(params$W1, params$b1, x)
Relu_1_layer <- Relu.forward(Affine_1_layer$out)
Affine_2_layer <- Affine.forward(params$W2, params$b2, Relu_1_layer$out)
Relu_2_layer <- Relu.forward(Affine_2_layer$out)
Affine_3_layer <- Affine.forward(params$W3, params$b3, Relu_2_layer$out)
return(list(x = Affine_3_layer$out, Relu_2.forward = Relu_2_layer ,Affine_3.forward = Affine_3_layer, Affine_1.forward = Affine_1_layer, Affine_2.forward = Affine_2_layer, Relu_1.forward = Relu_1_layer))
}
loss <- function(x, t){
temp <- predict(x)
y <- temp$x
last_layer.forward <- SoftmaxWithLoss.forward(y, t)
#return(list(loss = last_layer.forward$loss, forward = last_layer.forward, predict = temp))
return(list(loss = last_layer.forward$loss, softmax = last_layer.forward, predict = temp))
}
accuracy <- function(x,t){
y <- max.col(predict(x)$x)
t <- max.col(t)
#print(ifelse(y==t,paste(t-1,"숫자를 맞췄습니다"),paste(t-1,"숫자를 못 맞췄습니다")))
accuracy <- (sum(ifelse(y==t,1,0))) / dim(x)[1]
return(accuracy)
}
gradient <- function(x, t) {
# 순전파
temp_loss <- loss(x, t)
#역전파
dout <- 1
#last_layer.backward <- SoftmaxWithLoss.backward(temp_loss$forward, dout)
last_layer.backward <- SoftmaxWithLoss.backward(temp_loss$softmax, dout)
Affine_3_layer.backward <- Affine.backward(temp_loss$predict$Affine_3.forward, dout = last_layer.backward$dx)
Relu_2_layer.backward <- Relu.backward(temp_loss$predict$Relu_2.forward, dout = Affine_3_layer.backward$dx)
Affine_2_layer.backward <- Affine.backward(temp_loss$predict$Affine_2.forward, dout = Relu_2_layer.backward$dx)
Relu_1_layer.backward <- Relu.backward(temp_loss$predict$Relu_1.forward, dout = Affine_2_layer.backward$dx)
Affine_1_layer.backward <- Affine.backward(temp_loss$predict$Affine_1.forward, dout = Relu_1_layer.backward$dx)
grads <- list(W1 = Affine_1_layer.backward$dW, b1 = Affine_1_layer.backward$db, W2 = Affine_2_layer.backward$dW, b2 = Affine_2_layer.backward$db, W3=Affine_3_layer.backward$dW, b3=Affine_3_layer.backward$db)
return(grads)
}
## 앞에서 만든 것을 돌려보자!
## 사용할 자료 만들기
mnist <- read_mnist()
x_train <- mnist$train$images
t_train <- mnist$train$labels
x_test <- mnist$test$images
t_test <- mnist$test$labels
x_train_normalize <- x_train/255
x_test_normalize <- x_test/255
making_one_hot_label <-function(t_label,nrow,ncol){
data <- matrix(FALSE,nrow = nrow,ncol = ncol)
t_index <- t_label+1
for(i in 1:NROW(data)){
data[i, t_index[i]] <- TRUE
}
return(data)
}
t_train_onehotlabel <- making_one_hot_label(t_train,60000,10)
t_test_onehotlabel <- making_one_hot_label(t_test,10000,10)
iters_num <- 10000
train_size <- dim(x_train_normalize)[1]
batch_size <- 100
learning_rate <- 0.1
train_loss_list <- data.frame(lossvalue = rep(0,iters_num))
train_acc_list <- data.frame(train_acc = 0)
test_acc_list <- data.frame(test_acc = 0)
iter_per_epoch <- max(train_size / batch_size)
for(i in 1:iters_num){
batch_mask <- sample(train_size ,batch_size)
x_batch <- x_train_normalize[batch_mask,]
t_batch <- t_train_onehotlabel[batch_mask,]
grad <- gradient(x_batch, t_batch)
params$W1 <- params$W1 - (grad$W1 * learning_rate)
params$W2 <- params$W2 - (grad$W2 * learning_rate)
params$W3 <- params$W3 - (grad$W3 * learning_rate)
params$b1 <- params$b1 - (grad$b1 * learning_rate)
params$b2 <- params$b2 - (grad$b2 * learning_rate)
params$b3 <- params$b3 - (grad$b3 * learning_rate)
loss_value <- loss(x_batch, t_batch)$loss
train_loss_list <- rbind(train_loss_list,loss_value)
if(i %% iter_per_epoch == 0){
train_acc <- accuracy(x_train_normalize, t_train_onehotlabel)
test_acc <- accuracy(x_test_normalize, t_test_onehotlabel)
train_acc_list <- rbind(train_acc_list,train_acc)
test_acc_list <- rbind(test_acc_list,test_acc)
print(c(train_acc, test_acc))
}
}