파이썬과 다른 언어의 계산값을 비교하기 위한 파이썬 계산 코드 - LOPES-HUFS/DeepLearningFromForR GitHub Wiki
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.utils import np_utils
mnist = keras.datasets.mnist
(X_train, Y_train), (X_validation, Y_validation) = mnist.load_data()
X_train = X_train.reshape(X_train.shape[0], 784).astype('float64') / 255
X_validation = X_validation.reshape(X_validation.shape[0], 784).astype('float64') / 255
Y_train = np_utils.to_categorical(Y_train, 10)
Y_validation = np_utils.to_categorical(Y_validation, 10)
Using TensorFlow backend.
X_train.shape, Y_train.shape
((60000, 784), (60000, 10))
다음 x_100
와 t_100
는 마지막에 사용합니다.
x_100 = X_train[0:100]
x_100.shape
(100, 784)
t_100 = Y_train[0:100]
t_100.shape
(100, 10)
class TwoLayerNet:
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
# 가중치 초기화
self.params = {}
self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)
net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)
net.params['W1'].shape, net.params['b1'].shape, net.params['W2'].shape, net.params['b2'].shape
net.params['W1']
import h5py
hf = h5py.File('twoLayerNet.h5', 'w')
hf.create_dataset('W1',data=net.params['W1'], dtype=np.dtype('float64'))
hf.create_dataset('W2',data=net.params['W2'], dtype=np.dtype('float64'))
hf.create_dataset('b1',data=net.params['b1'], dtype=np.dtype('float64'))
hf.create_dataset('b2',data=net.params['b2'], dtype=np.dtype('float64'))
hf.create_dataset('x',data=x_100, dtype=np.dtype('float64'))
hf.create_dataset('t',data=t_100, dtype=np.dtype('float64'))
hf.close()
import h5py
hf = h5py.File('twoLayerNet.h5', 'r')
아래 것은 단지 읽어 온 것일 뿐입니다.
hf.get('W1')
<HDF5 dataset "W1": shape (784, 100), type "<f8">
이렇게 해야 읽어온 것을 가져온 것입니다.
W1 = hf.get('W1')
W1
<HDF5 dataset "W1": shape (784, 100), type "<f8">
np.array(W1)
array([[ 0.00879839, 0.00459003, -0.00200092, ..., 0.00044542,
0.0115502 , 0.00087598],
[-0.00712312, 0.00788025, -0.00559612, ..., 0.00350531,
0.00810035, -0.00440114],
[-0.01988711, 0.00370975, -0.0099757 , ..., 0.00205867,
-0.00292896, 0.01356232],
...,
[-0.0003736 , -0.01148628, -0.00034313, ..., -0.0095307 ,
0.00267268, 0.01485672],
[ 0.00295596, 0.00095837, -0.00857204, ..., 0.00177389,
-0.00386396, 0.00423653],
[-0.00682863, 0.01163568, 0.0126971 , ..., 0.01339846,
-0.00190958, 0.00430636]])
W2 = hf.get('W2')
W2
<HDF5 dataset "W2": shape (100, 10), type "<f8">
b1 = hf.get('b1')
b1
<HDF5 dataset "b1": shape (100,), type "<f8">
b2 = hf.get('b2')
b2
<HDF5 dataset "b2": shape (10,), type "<f8">
hf.close()
지금 이것을 돌리는 폴더 안에 common
폴더가 있어야 하고 거기에 functions.py
, gradient.py
가 있어야 합니다.
from common.functions import *
from common.gradient import numerical_gradient
class TwoLayerNet:
def __init__(self, W1, W2, hidden_size, output_size):
# 가중치 초기화
self.params = {}
self.params['W1'] = np.array(W1)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = np.array(W2)
self.params['b2'] = np.zeros(output_size)
def predict(self, x):
W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']
a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
y = softmax(a2)
return y
# x : 입력 데이터, t : 정답 레이블
def loss(self, x, t):
y = self.predict(x)
return cross_entropy_error(y, t)
def accuracy(self, x, t):
y = self.predict(x)
y = np.argmax(y, axis=1)
t = np.argmax(t, axis=1)
accuracy = np.sum(y == t) / float(x.shape[0])
return accuracy
# x : 입력 데이터, t : 정답 레이블
def numerical_gradient(self, x, t):
loss_W = lambda W: self.loss(x, t)
grads = {}
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
return grads
def gradient(self, x, t):
W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']
grads = {}
batch_num = x.shape[0]
# forward
a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
y = softmax(a2)
# backward
dy = (y - t) / batch_num
grads['W2'] = np.dot(z1.T, dy)
grads['b2'] = np.sum(dy, axis=0)
da1 = np.dot(dy, W2.T)
dz1 = sigmoid_grad(a1) * da1
grads['W1'] = np.dot(x.T, dz1)
grads['b1'] = np.sum(dz1, axis=0)
return grads
net = TwoLayerNet(W1=W1, W2=W2, hidden_size=100, output_size=10)
net.params['W1'].shape, net.params['b1'].shape, net.params['W2'].shape, net.params['b2'].shape
((784, 100), (100,), (100, 10), (10,))
앞에서 만든 x_100
, t_100
을 사용합니다.
grads = net.numerical_gradient(x_100, t_100)
grads['b1']
array([ 6.98853575e-05, -9.93783056e-06, -1.51344244e-04, 3.02627057e-05,
1.19115888e-04, 4.01761706e-04, 6.42105480e-06, 1.04230946e-04,
-8.34138825e-05, -3.03741188e-05, -1.07877385e-04, -1.05011453e-04,
2.81566479e-04, -1.37977751e-04, -1.16503505e-04, -8.12610002e-05,
1.22458683e-04, 6.00521566e-05, 1.61827540e-04, -1.03879663e-04,
-6.35472719e-05, -9.44183798e-05, -1.37084366e-05, -2.42925347e-05,
-1.47494839e-04, -3.01672352e-04, -1.63247087e-04, -1.28085789e-04,
3.28637739e-05, -6.10771700e-05, -1.94694862e-04, 3.15526572e-04,
1.62264535e-04, 3.32429817e-05, -9.94492266e-05, 4.46899837e-04,
4.83101485e-04, -2.57419652e-04, -2.26207253e-05, -1.16939267e-04,
8.71954708e-05, -2.87039639e-04, 3.74408462e-04, -3.16355078e-04,
-8.63232685e-05, -8.05881140e-05, 2.06869266e-04, -2.38109719e-04,
-8.50411430e-05, -1.44528833e-06, -2.97695810e-04, -5.99855521e-05,
1.90152205e-05, -5.68697134e-05, -1.18724253e-04, -1.64171210e-05,
3.95352417e-05, 4.26423479e-04, 8.63150706e-05, 3.79137843e-04,
-1.83472315e-04, -8.06906297e-05, 2.45654550e-04, -8.78894957e-05,
-1.09413336e-04, 2.62068138e-04, 7.98924993e-05, 2.03251282e-04,
2.28593995e-04, -6.12016082e-05, 9.21886145e-05, 1.76595960e-04,
-7.33194838e-06, -1.58501494e-04, -5.72298764e-05, 6.10165829e-05,
2.24733077e-04, 3.40156792e-06, 1.83184941e-04, 1.63520897e-04,
-2.90531814e-04, -2.48835101e-04, 2.21084784e-04, -4.49667865e-04,
-2.43824649e-05, -1.00161153e-04, 4.43502191e-05, -1.23017401e-04,
-2.25403043e-04, 7.30565808e-05, -1.64244205e-04, -1.09773592e-04,
2.24259500e-05, 6.20502094e-06, -3.70050046e-04, -1.16854308e-04,
-9.47266554e-05, -6.49549237e-05, 1.90885536e-04, -1.97253902e-05])
hf = h5py.File('TESTtwoLayerNet.h5', 'w')
hf.create_dataset('W1',data=grads['W1'], dtype=np.dtype('float64'))
<HDF5 dataset "W1": shape (784, 100), type "<f8">
hf.create_dataset('W2',data=grads['W2'], dtype=np.dtype('float64'))
<HDF5 dataset "W2": shape (100, 10), type "<f8">
hf.create_dataset('b2',data=grads['b2'], dtype=np.dtype('float64'))
<HDF5 dataset "b2": shape (10,), type "<f8">
hf.create_dataset('b1',data=grads['b1'], dtype=np.dtype('float64'))
<HDF5 dataset "b1": shape (100,), type "<f8">
hf.close()