Donate. I desperately need donations to survive due to my health

Get paid by answering surveys Click here

Click here to donate

Remote/Work from Home jobs

FailedPreconditionErrorTraceback

I am new to tensorFlow and trying to define a RL algorithm,

class DQN(object): def init(self, state_shape, action_shape, lr=0.01): ''' Deep Q-Network Tensorflow model.

    Args:
        - state_shape: Input state shape 
        - action_shape: Output action shape
    '''

    self.states_input = tf.placeholder(tf.float32, shape=state_shape)
    self.Q_target     = tf.placeholder(tf.float32, shape=action_shape)

    self.inputs1      = tf.placeholder(tf.float32, shape=state_shape)
    self.Q_w         = self.define_network(self.inputs1, [80, 60, 4])
    self.Q_w_t       = self.define_network(self.inputs1, [80, 60, 4])

    self.predict     = self.predict_output(self.Q_w)
    self.nextQ       = self.Q_target


    self.loss        = self.compute_loss(self.nextQ, self.Q_w)
    self.trainer     = self.optimize_network(0.001)
    self.updateModel = self.trainer.minimize(self.loss)
    self.init        = tf.global_variables_initializer()



def define_network(self, input_layer, layer_shape):
    Q_out       = slim.stack(input_layer, slim.fully_connected, layer_shape)
    return Q_out

def predict_output(self, Q_out):
    best_action = tf.argmax(Q_out, 1)
    return best_action

def optimize_network(self, lr):
    opt         = tf.train.AdamOptimizer(learning_rate=lr)
    return opt

def compute_loss(self, Q_target, Q_estimate):
    loss        = tf.losses.huber_loss(Q_target, Q_estimate)
    return loss

now there is another function that is calling these objects and placeholder which is below

def optimize_model(session, policy_net, target_net, batch, gamma): ''' Calculates the target Q-values for the given batch and uses them to update the model.

Args:
    - session: Tensorflow session
    - policy_net: Policy DQN model
    - target_net: DQN model used to generate target Q-values
    - batch: Batch of experiences uesd to optimize model
    - gamma: Discount factor
'''

policyQw = policy_net.define_network(policy_net.inputs1, [80, 60, 4])


for b in batch:
    Qcurr   = session.run(policyQw,   feed_dict={policy_net.inputs1:np.identity(16)[b[0]:b[0]+1]})
    Q1      = session.run(target_net.Q_w_t, feed_dict={target_net.inputs1:np.identity(16)[b[2]:b[2]+1]})
    maxQ1   = np.max(Q1)
    targetQ = Qcurr
    if b[4]==1:
        targetQ[0, b[1]] = b[3]
    else:
        targetQ[0, b[1]] = b[3] + gamma * maxQ1
    session.run([policy_net.updateModel],feed_dict={policy_net.inputs1:np.identity(16)[b[0]:b[0]+1],policy_net.nextQ:targetQ})
pass

I have called these functions from the main function below:

def train(env, num_episodes=500, gamma=0.99, batch_size=64, annealing_steps=1000, s_epsilon=1.0, f_epsilon=0.1, max_episode_steps=200): ''' DQN algorithm

Args:
    - env: The environment to train the agent on
    - num_episodes: The number of episodes to train the agent for
    - gamma: The discount factor
    - batch_size: Number of experiences in a batch
    - annealing_steps: The number of steps to anneal epsilon over
    - s_epsilon: The initial epsilon value for e-greedy action selection
    - f_epsilon: The final epsilon value for the e-greedy action selection

Returns: (policy_net, episode_rewards)
    - policy_net: Trained DQN model
    - episode_rewards: Numpy array containing the reward of each episode during training
'''

tf.reset_default_graph()

policy_net  = DQN([1, env.nS], [1, env.action_space.n])
target_net  = DQN([1, env.nS], [1, env.action_space.n])
target_ops  = update_target_graph_op(tf.trainable_variables(), 0.7)

memory      = ReplayMemory(800)
epsilon     = LinearSchedule(annealing_steps, f_epsilon, s_epsilon)

total_steps = 0
episode_rewards = list()

## CODE STARTS:
inputs1     = policy_net.states_input

#Q_w         = policy_net.define_network(inputs1, [32, 16, 4])
#Q_w_t       = target_net.define_network(inputs1, [32, 16, 4])

#predict     = policy_net.predict_output(Q_w)
#nextQ       = target_net.Q_target


#loss        = policy_net.compute_loss(nextQ, Q_w)
#trainer     = policy_net.optimize_network(0.001)
#updateModel = trainer.minimize(loss)
init        = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for i in range(num_episodes):
        eps = epsilon.value(i)
        is_done = False

        state = env.reset()
        total_reward = 0
        total_steps = 0
        #print(sess.run(tf.trainable_variables()))
        #The Q-Network
        while  (total_steps < max_episode_steps):
            Qcurr = sess.run([policy_net.Q_w], feed_dict={policy_net.inputs1:np.identity(16)[state:state+1]})
            action = eGreedyActionSelection(Qcurr[0], eps,env)
            #print(action)
            next_state, reward, is_done, _ = env.step(action)

            memory.add(state, action, next_state, reward, is_done)           
            total_reward += reward

            if(total_steps % batch_size == batch_size-1):
                batch = memory.sample(batch_size)
                optimize_model(sess, policy_net, target_net, batch, gamma)
            update_target(sess, target_ops)

            if is_done==1:
                break

            state = next_state
            total_steps += 1

        episode_rewards.append(total_reward)
    print("Percent of succesful episodes: " + str(sum(episode_rewards)/num_episodes) + "%")

return policy_net, episode_rewards

I am trying to move the some placeholders and function objects to second part of code but I am getting an error of:

FailedPreconditionError: Attempting to use uninitialized value Stack_4/fully_connected_1/weights [[node Stack_4/fully_connected_1/weights/read (defined at /usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/variables.py:277) = IdentityT=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]]

which is happening on this line of second code block, policyQw = policy_net.define_network(policy_net.inputs1, [80, 60, 4])

Any idea how can I do it or remove the error. Thanks for your help in advance.

Comments