Seq2Seq的 tensorflow-1.14 实现

Seq2Seq的 tensorflow-1.14 实现

原理自己找,这里就是用示例简单实现 S2S 的原理,主要用来进行单词的翻译。

import tensorflow as tf
import numpy as np

tf.reset_default_graph()

# 构建字符字典、字符和索引之间的映射关系
chars = [c for c in ‘SEPabcdefghijklmnopqrstuvwxyz’]
char2idx = {v:k for k,v in enumerate(chars)}
idx2char = {k:v for k,v in enumerate(chars)}

datas = [[‘man’, ‘women’], [‘black’, ‘white’], [‘king’, ‘queen’], [‘girl’, ‘boy’], [‘up’, ‘down’], [‘high’, ‘low’]]

step = 5 # 这里把输入的*大字符长度作为 step
hidden = 256
dim = 32
V = len(chars)

def make_batch(datas):
input_batch, output_batch, target_batch = [], [], []
for data in datas:
for i in range(2):
data[i] = data[i] + ‘P’ * (step – len(data[i])) # 将 datas 中的每个单词都用 P 补齐成 step 长度

input = [char2idx[c] for c in data[0]]
output = [char2idx[c] for c in ‘S’+data[1]] # decoder 的输入需要开始符号
target = [char2idx[c] for c in data[1]+’E’] # decoder 的输出需要结束符号

input_batch.append(input)
output_batch.append(output)
target_batch.append(target)
return input_batch, output_batch, target_batch
make_batch(datas)

#初始化词向量
embedding = tf.get_variable(name=”embedding”, shape=[V, dim], initializer=tf.random_normal_initializer())

# encoder 的输入进行词嵌入
enc_input = tf.placeholder(tf.int32, [None, None])
enc_input_embedding = tf.nn.embedding_lookup(embedding, enc_input) # [batch_size, step, dim]

# decoder 的输入进行词嵌入
dec_input = tf.placeholder(tf.int32, [None, None])
dec_input_embedding = tf.nn.embedding_lookup(embedding, dec_input) # [batch_size, step+1, dim]

# decoder 的目标输出
targets = tf.placeholder(tf.int64, [None, None]) # [batch_size, step+1]

# encoder 过程
with tf.variable_scope(‘encode’):
enc_cell = tf.nn.rnn_cell.BasicRNNCell(hidden)
enc_cell = tf.nn.rnn_cell.DropoutWrapper(enc_cell, output_keep_prob=0.5) # 进行 dropout
_, enc_states = tf.nn.dynamic_rnn(enc_cell, enc_input_embedding, dtype=tf.float32) # enc_states : [batch_size, hidden]

# decoder 过程
with tf.variable_scope(‘decode’):
dec_cell = tf.nn.rnn_cell.BasicRNNCell(hidden)
dec_cell = tf.nn.rnn_cell.DropoutWrapper(dec_cell, output_keep_prob=0.5) # 进行 dropout
outputs, _ = tf.nn.dynamic_rnn(dec_cell, dec_input_embedding, initial_state=enc_states, dtype=tf.float32) # outputs : [batch_size, step+1, hidden]

# 全连接层输出每个样本的每个步骤的结果概率矩阵
logits = tf.layers.dense(outputs, V, activation=None) # logits : [batch_size, step+1, V]
# 计算损失并优化
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets))
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)
# 预测
prediction = tf.argmax(logits, 2)

# 初始化 tf
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# 生产数据
input_batch, output_batch, target_batch = make_batch(datas)
# 训练模型
for epoch in range(10000):
_, loss = sess.run([optimizer, cost], feed_dict={enc_input:input_batch, dec_input:output_batch, targets:target_batch})
if (epoch+1)%1000 == 0:
print(‘epoch: ‘,’%04d’%(epoch+1),’ loss:’,’%6f’%(loss))

# 翻译
def translate(word):
tmp = [word, ‘P’*len(word)]
input_batch, output_batch, _ = make_batch([tmp])
result = sess.run(prediction, feed_dict={enc_input:input_batch, dec_input:output_batch})
decoded = [idx2char[i] for i in result[0]]
end = decoded.index(‘E’)
translated = ”.join(decoded[:end])
return translated.replace(‘P’,”)

# 翻译示例
print(‘man ->’, translate(‘man’))
print(‘mans ->’, translate(‘mans’))
print(‘king ->’, translate(‘king’))
print(‘black ->’, translate(‘black’))
print(‘upp ->’, translate(‘upp’))

结果打印:

epoch: 1000 loss: 0.000290
epoch: 2000 loss: 0.000112
epoch: 3000 loss: 0.000049
epoch: 4000 loss: 0.000015
epoch: 5000 loss: 0.000014
epoch: 6000 loss: 0.000003
epoch: 7000 loss: 0.000002
epoch: 8000 loss: 0.000001
epoch: 9000 loss: 0.000001
epoch: 10000 loss: 0.000000
man -> women
mans -> women
king -> queen
black -> white
upp -> down