2-2. LSTM과 GRU
- LSTM
- RNN의 단기 기억(Short-Term Memory) 문제를 해결하기 위해 만들어진 것
- 최근의 기억은 유지하지만 오래된 기억은 전달되지 않는 문제를 해결하고자 하는 것
- GRU
- GRU(Gated Recurrent Unit) : LSTM의 게이트를 단순화시켜 속도를 빠르게 개선한 것
- GRU Stacking
- GRU를 쌓아 시계열 데이터의 학습 효과를 높일 수 있다.
from tensorflow.keras.datasets import imdb
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=10000) #Load the IMDB dataset
from tensorflow.keras.preprocessing.sequence import pad_sequences
X_train_pad = pad_sequences(X_train, maxlen=80, padding='post', truncating='post')
X_test_pad = pad_sequences(X_test, maxlen=80, padding='post', truncating='post')
from tensorflow.keras import Sequential, layers
model = Sequential([
layers.Input(shape=(80,)),
layers.Embedding(input_dim=10000, output_dim=32), # <OOV> 토큰, <PAD> 토큰
layers.LSTM(64), # LSTM
layers.Dense(2, activation='softmax') # 부정 또는 긍정 확률값이 출력
])
model.summary()
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_pad, y_train, epochs=10, batch_size=200)
model.evaluate(X_test_pad, y_test)
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ embedding (Embedding) │ (None, 80, 32) │ 320,000 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ lstm (LSTM) │ (None, 64) │ 24,832 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense) │ (None, 2) │ 130 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 344,962 (1.32 MB)
Trainable params: 344,962 (1.32 MB)
Non-trainable params: 0 (0.00 B)
Epoch 1/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 4s 27ms/step - accuracy: 0.7041 - loss: 0.5439
Epoch 2/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 3s 27ms/step - accuracy: 0.8501 - loss: 0.3514
Epoch 3/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 3s 27ms/step - accuracy: 0.8840 - loss: 0.2888
Epoch 4/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 3s 27ms/step - accuracy: 0.9008 - loss: 0.2531
Epoch 5/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 4s 28ms/step - accuracy: 0.9171 - loss: 0.2194
Epoch 6/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 4s 28ms/step - accuracy: 0.9310 - loss: 0.1830
Epoch 7/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 3s 28ms/step - accuracy: 0.9424 - loss: 0.1538
Epoch 8/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 3s 27ms/step - accuracy: 0.9037 - loss: 0.2837
Epoch 9/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 3s 28ms/step - accuracy: 0.7282 - loss: 0.5365
Epoch 10/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 3s 28ms/step - accuracy: 0.9020 - loss: 0.2438
782/782 ━━━━━━━━━━━━━━━━━━━━ 4s 5ms/step - accuracy: 0.7718 - loss: 0.6017
[0.601729154586792, 0.7717999815940857]
from tensorflow.keras.datasets import imdb
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=10000) #Load the IMDB dataset
from tensorflow.keras.preprocessing.sequence import pad_sequences
X_train_pad = pad_sequences(X_train, maxlen=80, padding='post', truncating='post')
X_test_pad = pad_sequences(X_test, maxlen=80, padding='post', truncating='post')
from tensorflow.keras import Sequential, layers
model = Sequential([
layers.Input(shape=(80,)),
layers.Embedding(input_dim=10000, output_dim=32), # <OOV> 토큰, <PAD> 토큰
layers.Bidirectional(layers.LSTM(64)), # 양방향 LSTM
layers.Dense(2, activation='softmax') # 부정 또는 긍정 확률값이 출력
])
model.summary()
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_pad, y_train, epochs=10, batch_size=200)
model.evaluate(X_test_pad, y_test)
Model: "sequential_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ embedding_1 (Embedding) │ (None, 80, 32) │ 320,000 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ bidirectional (Bidirectional) │ (None, 128) │ 49,664 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_1 (Dense) │ (None, 2) │ 258 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 369,922 (1.41 MB)
Trainable params: 369,922 (1.41 MB)
Non-trainable params: 0 (0.00 B)
Epoch 1/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 5s 33ms/step - accuracy: 0.7249 - loss: 0.5196
Epoch 2/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 4s 33ms/step - accuracy: 0.8589 - loss: 0.3395
Epoch 3/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 4s 33ms/step - accuracy: 0.8869 - loss: 0.2820
Epoch 4/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 4s 33ms/step - accuracy: 0.9033 - loss: 0.2475
Epoch 5/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 4s 35ms/step - accuracy: 0.9199 - loss: 0.2141
Epoch 6/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 4s 35ms/step - accuracy: 0.9315 - loss: 0.1863
Epoch 7/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 4s 35ms/step - accuracy: 0.9488 - loss: 0.1467
Epoch 8/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 4s 35ms/step - accuracy: 0.9620 - loss: 0.1143
Epoch 9/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 4s 35ms/step - accuracy: 0.9716 - loss: 0.0905
Epoch 10/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 4s 35ms/step - accuracy: 0.9734 - loss: 0.0822
782/782 ━━━━━━━━━━━━━━━━━━━━ 5s 6ms/step - accuracy: 0.7646 - loss: 0.9382
[0.9382433891296387, 0.7645999789237976]
from tensorflow.keras.datasets import imdb
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=10000) #Load the IMDB dataset
from tensorflow.keras.preprocessing.sequence import pad_sequences
X_train_pad = pad_sequences(X_train, maxlen=80, padding='post', truncating='post')
X_test_pad = pad_sequences(X_test, maxlen=80, padding='post', truncating='post')
from tensorflow.keras import Sequential, layers
model = Sequential([
layers.Input(shape=(80,)),
layers.Embedding(input_dim=10000, output_dim=32, mask_zero=True),
layers.LSTM(64, return_sequences=True),
layers.LSTM(128),
layers.Dense(2, activation='softmax')
])
model.summary()
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_pad, y_train, epochs=10, batch_size=200)
model.evaluate(X_test_pad, y_test)
Model: "sequential_2"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ embedding_2 (Embedding) │ (None, 80, 32) │ 320,000 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ lstm_2 (LSTM) │ (None, 80, 64) │ 24,832 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ lstm_3 (LSTM) │ (None, 128) │ 98,816 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_2 (Dense) │ (None, 2) │ 258 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 443,906 (1.69 MB)
Trainable params: 443,906 (1.69 MB)
Non-trainable params: 0 (0.00 B)
Epoch 1/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 17s 130ms/step - accuracy: 0.7191 - loss: 0.5255
Epoch 2/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 16s 130ms/step - accuracy: 0.8558 - loss: 0.3404
Epoch 3/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 16s 131ms/step - accuracy: 0.8845 - loss: 0.2835
Epoch 4/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 16s 130ms/step - accuracy: 0.9067 - loss: 0.2430
Epoch 5/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 16s 131ms/step - accuracy: 0.9216 - loss: 0.2108
Epoch 6/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 16s 132ms/step - accuracy: 0.9368 - loss: 0.1705
Epoch 7/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 16s 129ms/step - accuracy: 0.9560 - loss: 0.1261
Epoch 8/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 16s 130ms/step - accuracy: 0.9637 - loss: 0.0983
Epoch 9/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 16s 130ms/step - accuracy: 0.9766 - loss: 0.0713
Epoch 10/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 17s 133ms/step - accuracy: 0.9788 - loss: 0.0622
782/782 ━━━━━━━━━━━━━━━━━━━━ 11s 14ms/step - accuracy: 0.7514 - loss: 1.0825
[1.0824967622756958, 0.7513599991798401]