我试图在一个完全连接的神经网络的输出处找到一个码本,该网络选择的点使得如此产生的码本之间的最小距离(欧几里得范数)最大化。神经网络的输入是需要映射到输出空间更高维度的点。
例如,如果输入维度为 2,输出维度为 3,则以下映射(以及任何排列)效果最佳:00 - 000、01 - 011、10 - 101、11 - 110
import tensorflow as tf
import numpy as np
import itertools
input_bits = tf.placeholder(dtype=tf.float32, shape=[None, 2], name='input_bits')
code_out = tf.placeholder(dtype=tf.float32, shape=[None, 3], name='code_out')
np.random.seed(1331)
def find_code(message):
weight1 = np.random.normal(loc=0.0, scale=0.01, size=[2, 3])
init1 = tf.constant_initializer(weight1)
out = tf.layers.dense(inputs=message, units=3, activation=tf.nn.sigmoid, kernel_initializer=init1)
return out
code = find_code(input_bits)
distances = []
for i in range(0, 3):
for j in range(i+1, 3):
distances.append(tf.linalg.norm(code_out[i]-code_out[j]))
min_dist = tf.reduce_min(distances)
# avg_dist = tf.reduce_mean(distances)
loss = -min_dist
opt = tf.train.AdamOptimizer().minimize(loss)
init_variables = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_variables)
saver = tf.train.Saver()
count = int(1e4)
for i in range(count):
input_bit = [list(k) for k in itertools.product([0, 1], repeat=2)]
code_preview = sess.run(code, feed_dict={input_bits: input_bit})
sess.run(opt, feed_dict={input_bits: input_bit, code_out: code_preview})
由于损失函数本身不可微,我得到了错误
ValueError: No gradients provided for any variable, check your graph for ops that do not support gradients, between variables
我是在做一些愚蠢的事情还是有办法绕过这个?感谢您在这方面的任何帮助。提前致谢。
更新:
import tensorflow as tf
import numpy as np
import itertools
from random import shuffle
input_bits = tf.placeholder(dtype=tf.float32, shape=[None, 2], name='input_bits')
learning_rate_val = tf.placeholder(dtype=tf.float32, shape=(), name='learning_rate')
def find_code(message):
weight1 = np.random.normal(loc=0.0, scale=0.01, size=[2, 3])
init1 = tf.constant_initializer(weight1)
out1 = tf.layers.dense(inputs=message, units=3, activation=tf.nn.sigmoid, kernel_initializer=init1)
return out1
code = find_code(input_bits)
distances = []
for i in range(0, 4):
for j in range(i+1, 4):
distances.append(tf.linalg.norm(code[i]-code[j]))
min_dist = tf.reduce_min(distances)
# avg_dist = tf.reduce_mean(distances)
loss = - min_dist
opt = tf.train.AdamOptimizer().minimize(loss)
init_variables = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_variables)
saver = tf.train.Saver()
count = int(9e5)
threshold = 0.5
for i in range(count):
input_bit = [list(k) for k in itertools.product([0, 1], repeat=2)]
shuffle(input_bit)
input_bit = 2 * np.array(input_bit) - 1
code_preview = sess.run(code, feed_dict={input_bits: input_bit})
sess.run(opt, feed_dict={input_bits: input_bit, learning_rate_val: initial_learning_rate})
train_loss_track.append(sess.run(loss, feed_dict={input_bits: input_bit}))
if i % 5000 == 0 or i == 0 or i == count - 1:
input_bit = [list(k) for k in itertools.product([0, 1], repeat=2)]
input_bit = 2 * np.array(input_bit) - 1
output, train_loss = sess.run([code, loss], feed_dict={input_bits: input_bit,
learning_rate_val: initial_learning_rate})
print("\nEpoch: " + str(i))
print("Code: " + str(output))
output[output > threshold] = 1
output[output <= threshold] = 0
print("Code: " + str(output))
print("Loss: " + str(train_loss) + "\n")
这似乎工作正常。但最终的输出是
Code: [[9.9976158e-01 0.0000000e+00 1.0000000e+00]
[4.9997061e-01 0.0000000e+00 0.0000000e+00]
[5.0000829e-01 1.0000000e+00 1.0000000e+00]
[2.3837961e-04 1.0000000e+00 4.6849247e-11]]
Code: [[1. 0. 1.]
[0. 0. 0.]
[1. 1. 1.]
[0. 1. 0.]]
Loss: -1.1179142
虽然它接近预期的输出,但它卡在这里。反正有没有达到预期的输出?