DeepLearning_Lab04_01 - 8BitsCoding/RobotMentor GitHub Wiki


# Lab 4 Multi-variable linear regression
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # for reproducibility
xy = np.loadtxt('data-01-test-score.csv', delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]

numpy의 loadtxt를 이용한다.

np.loadtxt('data-01-test-score.csv', delimiter=',', dtype=np.float32) numpy의 loadtxt를 이용하여 ,를 기준으로 float32로 읽어와라

x_data = xy[:, 0:-1] x축은 전체 y축은 마지막 하나를 제외하고 모두

y_data = xy[:, [-1]] y는 마지막 하나만

이미지

# Make sure the shape and data are OK
print(x_data, "\nx_data shape:", x_data.shape)
print(y_data, "\ny_data shape:", y_data.shape)

해보자

이미지


나머지 코드

# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 3])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.Variable(tf.random_normal([3, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

# Hypothesis
hypothesis = tf.matmul(X, W) + b

# Simplified cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))

# Minimize
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())

for step in range(2001):
    cost_val, hy_val, _ = sess.run([cost, hypothesis, train], 
                                   feed_dict={X: x_data, Y: y_data})
    if step % 10 == 0:
        print(step, "Cost:", cost_val, "\nPrediction:\n", hy_val)

# Ask my score
print("Your score will be ", sess.run(hypothesis, 
                                      feed_dict={X: [100, 70, 101](/8BitsCoding/RobotMentor/wiki/100,-70,-101)}))

print("Other scores will be ", sess.run(hypothesis,
                                        feed_dict={X: [60, 70, 110], [90, 100, 80](/8BitsCoding/RobotMentor/wiki/60,-70,-110],-[90,-100,-80)}))

만약 데이터가 너무 커서 메모리에 올리지 못할경우???

이미지

이미지

queue runners 이용

import tensorflow as tf
tf.set_random_seed(777)  # for reproducibility
filename_queue = tf.train.string_input_producer(
    ['data-01-test-score.csv'], shuffle=False, name='filename_queue')

읽어올 파일을 목록화 하여 filename_queue에 저장

reader = tf.TextLineReader()
key, value = reader.read(filename_queue)

text file형태로 읽어온다

# Default values, in case of empty columns. Also specifies the type of the
# decoded result.
record_defaults = [0.], [0.], [0.], [0.](/8BitsCoding/RobotMentor/wiki/0.],-[0.],-[0.],-[0.)
xy = tf.decode_csv(value, record_defaults=record_defaults)

record_defaults = [0.], [0.], [0.], [0.](/8BitsCoding/RobotMentor/wiki/0.],-[0.],-[0.],-[0.)는 읽어올 데이터 형식 지정 [0.]는 float32를 의미

value값을 읽어오고 어떻게 파싱을 하는지는 decode_csv 로 파싱

# collect batches of csv in
train_x_batch, train_y_batch = \
    tf.train.batch([xy[0:-1], xy[-1:]], batch_size=10)

batch_size는 한번에 몇개씩 가져올지 정리

# Start populating the filename queue.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)

for step in range(2001):
    x_batch, y_batch = sess.run([train_x_batch, train_y_batch])
    cost_val, hy_val, _ = sess.run(
        [cost, hypothesis, train], feed_dict={X: x_batch, Y: y_batch})
    if step % 10 == 0:
        print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val)

coord.request_stop()
coord.join(threads)

coord는 통상적으로 이렇게 쓴다고 받아들이자