1.How to model sequential data?

2.Simple RNN for Movie Review Analysis

Code

import numpy as np
from matplotlib import pyplot as plt

import tensorflow as tf
from tensorflow.keras.preprocessing import sequence

np.set_printoptions(threshold=np.inf)

# 情感分类
epochs = 3
batchsz = 32  # 批量大小

vocabulary = 10000  # 词汇表大小
embedding_dim = 32  # 词向量特征长度 shape(x) = 32
word_num = 500  # 句子最大长度，大于的句子部分将截断，小于的将填充  

state_dim = 32  # shape(h) = 32

# 加载 IMDB 数据集，此处的数据采用数字编码，一个数字代表一个单词
imdb = tf.keras.datasets.imdb
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocabulary)
x_train = sequence.pad_sequences(x_train, maxlen=word_num)
x_test = sequence.pad_sequences(x_test, maxlen=word_num)


# 搭建网络
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Embedding(vocabulary, embedding_dim, input_length=word_num))
model.add(tf.keras.layers.SimpleRNN(state_dim, return_sequences=False))
model.add(tf.keras.layers.Dense(1, activation="sigmoid"))

model.summary()

model.compile(
    optimizer=tf.optimizers.RMSprop(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['acc'],
)

history = model.fit(
    x_train, y_train, batch_size=batchsz, epochs=epochs, validation_split=0.2
)

# 显示训练集和验证集的acc和loss曲线
acc = history.history['acc']
val_acc = history.history['val_acc']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.subplot(1, 2, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()
plt.show()

# 在测试集上的 loss 和 acc
loss_and_acc = model.evaluate(x_test, y_test)
print('on test dataset, loss = ' + str(loss_and_acc[0]))
print('on test dataset, acc = ' + str(loss_and_acc[1]))