import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
from sklearn.model_selection import train_test_split
messages = [
("Get a free iPhone now!", 1), # 1 for spam
("Meeting at 3pm tomorrow", 0), # 0 for ham
("Click here to claim your prize", 1),
("Don't forget to submit your assignment", 0),
]
X = [message[0] for message in messages]
y = [message[1] for message in messages]
max_words = 1000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X)
X_sequences = tokenizer.texts_to_sequences(X)
max_len = 50
X_padded = pad_sequences(X_sequences, maxlen=max_len)
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42)
embedding_dim = 50
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=embedding_dim,
input_length=max_len))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
epochs = 10
batch_size = 64
model
.fit
(X_train
, np
.array(y_train
), epochs
=epochs
, batch_size
=batch_size
,validation_split=0.1)
loss
, accuracy
= model
.evaluate
(X_test
, np
.array(y_test
))print("Test Accuracy:", accuracy)
aW1wb3J0IG51bXB5IGFzIG5wCmZyb20gdGVuc29yZmxvdy5rZXJhcy5wcmVwcm9jZXNzaW5nLnRleHQgaW1wb3J0IFRva2VuaXplcgpmcm9tIHRlbnNvcmZsb3cua2VyYXMucHJlcHJvY2Vzc2luZy5zZXF1ZW5jZSBpbXBvcnQgcGFkX3NlcXVlbmNlcwpmcm9tIHRlbnNvcmZsb3cua2VyYXMubW9kZWxzIGltcG9ydCBTZXF1ZW50aWFsCmZyb20gdGVuc29yZmxvdy5rZXJhcy5sYXllcnMgaW1wb3J0IEVtYmVkZGluZywgTFNUTSwgRGVuc2UsIFNwYXRpYWxEcm9wb3V0MUQKZnJvbSBza2xlYXJuLm1vZGVsX3NlbGVjdGlvbiBpbXBvcnQgdHJhaW5fdGVzdF9zcGxpdAptZXNzYWdlcyA9IFsKICgiR2V0IGEgZnJlZSBpUGhvbmUgbm93ISIsIDEpLCAjIDEgZm9yIHNwYW0KICgiTWVldGluZyBhdCAzcG0gdG9tb3Jyb3ciLCAwKSwgIyAwIGZvciBoYW0KICgiQ2xpY2sgaGVyZSB0byBjbGFpbSB5b3VyIHByaXplIiwgMSksCiAoIkRvbid0IGZvcmdldCB0byBzdWJtaXQgeW91ciBhc3NpZ25tZW50IiwgMCksCgpdClggPSBbbWVzc2FnZVswXSBmb3IgbWVzc2FnZSBpbiBtZXNzYWdlc10KeSA9IFttZXNzYWdlWzFdIGZvciBtZXNzYWdlIGluIG1lc3NhZ2VzXQptYXhfd29yZHMgPSAxMDAwCnRva2VuaXplciA9IFRva2VuaXplcihudW1fd29yZHM9bWF4X3dvcmRzKQp0b2tlbml6ZXIuZml0X29uX3RleHRzKFgpClhfc2VxdWVuY2VzID0gdG9rZW5pemVyLnRleHRzX3RvX3NlcXVlbmNlcyhYKQptYXhfbGVuID0gNTAKWF9wYWRkZWQgPSBwYWRfc2VxdWVuY2VzKFhfc2VxdWVuY2VzLCBtYXhsZW49bWF4X2xlbikKWF90cmFpbiwgWF90ZXN0LCB5X3RyYWluLCB5X3Rlc3QgPSB0cmFpbl90ZXN0X3NwbGl0KFhfcGFkZGVkLCB5LCB0ZXN0X3NpemU9MC4yLCByYW5kb21fc3RhdGU9NDIpCmVtYmVkZGluZ19kaW0gPSA1MAptb2RlbCA9IFNlcXVlbnRpYWwoKQptb2RlbC5hZGQoRW1iZWRkaW5nKGlucHV0X2RpbT1tYXhfd29yZHMsIG91dHB1dF9kaW09ZW1iZWRkaW5nX2RpbSwKaW5wdXRfbGVuZ3RoPW1heF9sZW4pKQptb2RlbC5hZGQoU3BhdGlhbERyb3BvdXQxRCgwLjIpKQptb2RlbC5hZGQoTFNUTSgxMDAsIGRyb3BvdXQ9MC4yLCByZWN1cnJlbnRfZHJvcG91dD0wLjIpKQptb2RlbC5hZGQoRGVuc2UoMSwgYWN0aXZhdGlvbj0nc2lnbW9pZCcpKQptb2RlbC5jb21waWxlKGxvc3M9J2JpbmFyeV9jcm9zc2VudHJvcHknLCBvcHRpbWl6ZXI9J2FkYW0nLCBtZXRyaWNzPVsnYWNjdXJhY3knXSkKZXBvY2hzID0gMTAKYmF0Y2hfc2l6ZSA9IDY0Cm1vZGVsLmZpdChYX3RyYWluLCBucC5hcnJheSh5X3RyYWluKSwgZXBvY2hzPWVwb2NocywgYmF0Y2hfc2l6ZT1iYXRjaF9zaXplLAp2YWxpZGF0aW9uX3NwbGl0PTAuMSkKbG9zcywgYWNjdXJhY3kgPSBtb2RlbC5ldmFsdWF0ZShYX3Rlc3QsIG5wLmFycmF5KHlfdGVzdCkpCnByaW50KCJUZXN0IEFjY3VyYWN5OiIsIGFjY3VyYWN5KQ==
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
from sklearn.model_selection import train_test_split
messages = [
("Get a free iPhone now!", 1), # 1 for spam
("Meeting at 3pm tomorrow", 0), # 0 for ham
("Click here to claim your prize", 1),
("Don't forget to submit your assignment", 0),
]
X = [message[0] for message in messages]
y = [message[1] for message in messages]
max_words = 1000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X)
X_sequences = tokenizer.texts_to_sequences(X)
max_len = 50
X_padded = pad_sequences(X_sequences, maxlen=max_len)
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42)
embedding_dim = 50
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=embedding_dim,
input_length=max_len))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
epochs = 10
batch_size = 64
model.fit(X_train, np.array(y_train), epochs=epochs, batch_size=batch_size,
validation_split=0.1)
loss, accuracy = model.evaluate(X_test, np.array(y_test))
print("Test Accuracy:", accuracy)