import numpy as np
import tensorflow as tf
sess = tf.Session()
x = [[1,0,0,0],# 4*4 #h
[0,1,0,0], #e
[0,0,1,0], #l
[0,0,0,1]] #o
y = [[0,1,0,0], # 5*4
[0,0,1,0],
[0,0,1,0],
[0,0,0,1]]
c_ = 0.5 * tf.constant([[1,0,0,0], # 4*4
[0,2,0,0],
[0,0,3,0],
[0,0,0,2]], dtype=tf.float32)
h_ = 0.5 * tf.constant([[1,0,0,0], # 4*4
[0,1,0,0],
[0,0,1,0],
[0,0,0,1]], dtype=tf.float32)
X = tf.placeholder(dtype=tf.float32, shape=[None, 4])
Y = tf.placeholder(dtype=tf.float32, shape=[None, 4])
W = tf.Variable(tf.random_normal([4, 1]))
b = tf.Variable(tf.random_normal([1]))
sess.run(tf.global_variables_initializer())
#he
#el
#ll
#lo
seq_len = 4
num_units = 4
class lstm:
def build(c, h):
args = tf.concat((X,h), axis=1)
out_size = 4 * num_units
proj_size = args.shape[-1]
weights = tf.ones([proj_size, out_size]) * 0.5
out = tf.matmul(args, weights)
bias = tf.ones([out_size]) * 0.5
concat = out + bias
i, j, f, o = tf.split(concat, 4, 1)
g = tf.tanh(j)
def sigmoid_array(x):
return 1 / (1 + tf.exp(-x))
forget_bias = 1.0
sigmoid_f = sigmoid_array(f + forget_bias)
sigmoid_array(i) * g
new_c = c * sigmoid_f + sigmoid_array(i) * g
new_h = tf.tanh(new_c) * sigmoid_array(o)
return new_c, new_h
ta_c = tf.TensorArray(size=seq_len, dtype=tf.float32)
ta_h = tf.TensorArray(size=seq_len, dtype=tf.float32)
def body(last_state, last_output, step, ta_c, ta_h):
output = lstm.build(last_state, last_output)[0]
state = lstm.build(last_state, last_output)[1]
ta_c = ta_c.write(step, state)
ta_h = ta_h.write(step, output)
return state, output, tf.add(step, 1), ta_c, ta_h
timesteps = seq_len
steps = lambda a, b, step, c, d: tf.less(step, timesteps)
lstm_output, lstm_state, step, ta_c, ta_h = tf.while_loop(steps, body, (c_, h_, 0, ta_c, ta_h), parallel_iterations=20)
output = lstm_output[-1]
output = tf.reshape(output, [-1, 4])
logits = tf.matmul(lstm_output, W) + b
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y))
train = tf.train.AdamOptimizer(0.1).minimize(cost)
is_correct = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
sess.run(tf.global_variables_initializer())
for i in range(1000):
sess.run(train, feed_dict={X:x, Y:y})
a, c = sess.run([accuracy, cost],feed_dict={X:x, Y:y})
print("##############",a)
print("##############",c)
It is a excuse, I have been holding this code for a few days, but I have no idea why the cost does not go down.
############## 0.0
############## 1.3862944
############## 0.0
############## 1.3862944
############## 0.0
############## 1.3862944
############## 0.0
############## 1.3862944
############## 0.0
############## 1.3862944
these are accuracy and cost. As you can see, it doesn't works at all.
I thought I put the X placeholder inside the "class" and connected the new output and rnn cell, but it was not. What should I fix?
please, please help me
Comments
Post a Comment