2-1. 순환신경망
- 순환신경망 개요
- 순환신경망은 1986년 데이비드 루멜하트(David Rumelhart)가 개발한 알고리즘
- 순환신경망은 시계열 데이터를 학습하는 딥러닝 기술
- 순환신경망은 기준 시점(t)과 다음 시점(t+1)에 네트워크를 연결
- 순환신경망은 AI 번역, 음성인식, 주가 예측의 대표적 기술.
- RNN 종류
- One to Many : 입력이 하나이고, 출력이 여러 개 생성
- One to Many는 영상에 캡션을 달아야 할 때 사용
- Many to One : 여러 입력이며, 하나의 출력을 생성
- 영화평을 분류할 경우 Many to One이 사용
- Many to Many : 여러 입력이며, 출력도 여러 개 생성
- 3번째 Many to Many 구조는 번역에 사용
model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_pad, y_train, epochs=10, batch_size=200)
model.evaluate(X_test_pad, y_test)
Epoch 1/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 2s 9ms/step - accuracy: 0.4989 - loss: 0.6990
Epoch 2/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - accuracy: 0.5051 - loss: 0.6945
Epoch 3/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - accuracy: 0.5067 - loss: 0.6937
Epoch 4/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - accuracy: 0.5121 - loss: 0.6929
Epoch 5/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - accuracy: 0.5134 - loss: 0.6926
Epoch 6/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - accuracy: 0.5170 - loss: 0.6921
Epoch 7/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - accuracy: 0.5184 - loss: 0.6921
Epoch 8/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - accuracy: 0.5241 - loss: 0.6915
Epoch 9/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - accuracy: 0.5276 - loss: 0.6908
Epoch 10/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - accuracy: 0.5323 - loss: 0.6903
782/782 ━━━━━━━━━━━━━━━━━━━━ 2s 2ms/step - accuracy: 0.5207 - loss: 0.6925
[0.6925301551818848, 0.5207200050354004]
from tensorflow.keras.datasets import imdb
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=1000) #Load the IMDB dataset
from tensorflow.keras.preprocessing.sequence import pad_sequences
X_train_pad = pad_sequences(X_train, maxlen=80, padding='post', truncating='post')
X_test_pad = pad_sequences(X_test, maxlen=80, padding='post', truncating='post')
from tensorflow.keras import Sequential, layers
model = Sequential([
layers.Input(shape=(80,)),
layers.Embedding(input_dim=1000, output_dim=32), # <OOV> 토큰, <PAD> 토큰
layers.SimpleRNN(64), # 32x64: X에 대한 weight, 64x64: h에 대한 weight + 64개 bias
layers.Dense(2, activation='softmax') # 부정 또는 긍정 확률값이 출력
])
model.summary()
Model: "sequential_3"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ embedding_3 (Embedding) │ (None, 80, 32) │ 32,000 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ simple_rnn_2 (SimpleRNN) │ (None, 64) │ 6,208 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_4 (Dense) │ (None, 2) │ 130 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 38,338 (149.76 KB)
Trainable params: 38,338 (149.76 KB)
Non-trainable params: 0 (0.00 B)
model_dnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_dnn.fit(X_train_pad, y_train, epochs=10, batch_size=200)
model_dnn.evaluate(X_test_pad, y_test)
Epoch 1/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8818 - loss: 0.3331
Epoch 2/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.9425 - loss: 0.1491
Epoch 3/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.9722 - loss: 0.0926
Epoch 4/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.9905 - loss: 0.0553
Epoch 5/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.9965 - loss: 0.0336
Epoch 6/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.9998 - loss: 0.0177
Epoch 7/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.9999 - loss: 0.0102
Epoch 8/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.9999 - loss: 0.0071
Epoch 9/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 1.0000 - loss: 0.0048
Epoch 10/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 1.0000 - loss: 0.0036
782/782 ━━━━━━━━━━━━━━━━━━━━ 1s 967us/step - accuracy: 0.7182 - loss: 1.6277
[1.6277166604995728, 0.7182400226593018]
from tensorflow.keras.datasets import imdb
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=10000) #Load the IMDB dataset
from tensorflow.keras.preprocessing.sequence import pad_sequences
X_train_pad = pad_sequences(X_train, maxlen=200, padding='post', truncating='post')
X_test_pad = pad_sequences(X_test, maxlen=200, padding='post', truncating='post')
from tensorflow.keras import Sequential, layers
model_dnn = Sequential([
layers.Input(shape=(200,)),
layers.Embedding(input_dim=10000, output_dim=32),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(2, activation='softmax')
])
model_dnn.summary()
Model: "sequential_4"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ embedding_4 (Embedding) │ (None, 200, 32) │ 320,000 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ flatten_1 (Flatten) │ (None, 6400) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_5 (Dense) │ (None, 64) │ 409,664 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_6 (Dense) │ (None, 2) │ 130 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 729,794 (2.78 MB)
Trainable params: 729,794 (2.78 MB)
Non-trainable params: 0 (0.00 B)
model_dnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_dnn.fit(X_train_pad, y_train, epochs=10, batch_size=200)
model_dnn.evaluate(X_test_pad, y_test)
Epoch 1/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.6979 - loss: 0.5411
Epoch 2/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.9112 - loss: 0.2279
Epoch 3/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.9834 - loss: 0.0662
Epoch 4/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.9981 - loss: 0.0154
Epoch 5/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.9991 - loss: 0.0060
Epoch 6/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.9999 - loss: 0.0023
Epoch 7/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 1.0000 - loss: 0.0012
Epoch 8/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 1.0000 - loss: 8.0424e-04
Epoch 9/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 1.0000 - loss: 5.9816e-04
Epoch 10/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 1.0000 - loss: 4.0381e-04
782/782 ━━━━━━━━━━━━━━━━━━━━ 1s 1ms/step - accuracy: 0.8356 - loss: 0.6639
[0.663922905921936, 0.8356000185012817]
- pad_sequence의 truncating과 padding을 pre로 변경
from tensorflow.keras.datasets import imdb
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=10000) #Load the IMDB dataset
from tensorflow.keras.preprocessing.sequence import pad_sequences
X_train_pad = pad_sequences(X_train, maxlen=80, padding='pre', truncating='pre')
X_test_pad = pad_sequences(X_test, maxlen=80, padding='pre', truncating='pre')
from tensorflow.keras import Sequential, layers
model_dnn = Sequential([
layers.Input(shape=(80,)),
layers.Embedding(input_dim=10000, output_dim=32),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(2, activation='softmax')
])
model_dnn.summary()
Model: "sequential_5"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ embedding_5 (Embedding) │ (None, 80, 32) │ 320,000 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ flatten_2 (Flatten) │ (None, 2560) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_7 (Dense) │ (None, 64) │ 163,904 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_8 (Dense) │ (None, 2) │ 130 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 484,034 (1.85 MB)
Trainable params: 484,034 (1.85 MB)
Non-trainable params: 0 (0.00 B)
model_dnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_dnn.fit(X_train_pad, y_train, epochs=10, batch_size=200)
model_dnn.evaluate(X_test_pad, y_test)
Epoch 1/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.6951 - loss: 0.5530
Epoch 2/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.9005 - loss: 0.2479
Epoch 3/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.9788 - loss: 0.0831
Epoch 4/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.9982 - loss: 0.0165
Epoch 5/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.9994 - loss: 0.0053
Epoch 6/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 1.0000 - loss: 0.0019
Epoch 7/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 1.0000 - loss: 0.0011
Epoch 8/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 1.0000 - loss: 7.6666e-04
Epoch 9/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 1.0000 - loss: 5.6370e-04
Epoch 10/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 1.0000 - loss: 4.3530e-04
782/782 ━━━━━━━━━━━━━━━━━━━━ 1s 951us/step - accuracy: 0.8227 - loss: 0.7148
[0.7148440480232239, 0.8227199912071228]
from tensorflow.keras.datasets import imdb
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=10000) #Load the IMDB dataset
from tensorflow.keras.preprocessing.sequence import pad_sequences
X_train_pad = pad_sequences(X_train, maxlen=80, padding='post', truncating='post')
X_test_pad = pad_sequences(X_test, maxlen=80, padding='post', truncating='post')
from tensorflow.keras import Sequential, layers
model_dnn = Sequential([
layers.Input(shape=(80,)),
layers.Embedding(input_dim=10000, output_dim=32),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(128, activation='relu'),
layers.Dense(2, activation='softmax')
])
model_dnn.summary()
Model: "sequential_8"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ embedding_8 (Embedding) │ (None, 80, 32) │ 320,000 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ flatten_5 (Flatten) │ (None, 2560) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_15 (Dense) │ (None, 64) │ 163,904 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_16 (Dense) │ (None, 128) │ 8,320 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_17 (Dense) │ (None, 2) │ 258 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 492,482 (1.88 MB)
Trainable params: 492,482 (1.88 MB)
Non-trainable params: 0 (0.00 B)
model_dnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_dnn.fit(X_train_pad, y_train, epochs=10, batch_size=200)
model_dnn.evaluate(X_test_pad, y_test)
Epoch 1/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.6804 - loss: 0.5714
Epoch 2/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.8971 - loss: 0.2612
Epoch 3/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.9844 - loss: 0.0540
Epoch 4/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.9990 - loss: 0.0055
Epoch 5/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.9999 - loss: 9.5676e-04
Epoch 6/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 1.0000 - loss: 1.9888e-04
Epoch 7/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 1.0000 - loss: 1.1777e-04
Epoch 8/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 1.0000 - loss: 7.9630e-05
Epoch 9/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 1.0000 - loss: 5.7610e-05
Epoch 10/10
125/125 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 1.0000 - loss: 4.3359e-05
782/782 ━━━━━━━━━━━━━━━━━━━━ 1s 961us/step - accuracy: 0.7634 - loss: 1.3598
[1.3597831726074219, 0.7633600234985352]