fork download
  1. import numpy as np
  2. from tensorflow.keras.preprocessing.text import Tokenizer
  3. from tensorflow.keras.preprocessing.sequence import pad_sequences
  4. from tensorflow.keras.models import Sequential
  5. from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
  6. from sklearn.model_selection import train_test_split
  7. messages = [
  8. ("Get a free iPhone now!", 1), # 1 for spam
  9. ("Meeting at 3pm tomorrow", 0), # 0 for ham
  10. ("Click here to claim your prize", 1),
  11. ("Don't forget to submit your assignment", 0),
  12.  
  13. ]
  14. X = [message[0] for message in messages]
  15. y = [message[1] for message in messages]
  16. max_words = 1000
  17. tokenizer = Tokenizer(num_words=max_words)
  18. tokenizer.fit_on_texts(X)
  19. X_sequences = tokenizer.texts_to_sequences(X)
  20. max_len = 50
  21. X_padded = pad_sequences(X_sequences, maxlen=max_len)
  22. X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42)
  23. embedding_dim = 50
  24. model = Sequential()
  25. model.add(Embedding(input_dim=max_words, output_dim=embedding_dim,
  26. input_length=max_len))
  27. model.add(SpatialDropout1D(0.2))
  28. model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
  29. model.add(Dense(1, activation='sigmoid'))
  30. model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
  31. epochs = 10
  32. batch_size = 64
  33. model.fit(X_train, np.array(y_train), epochs=epochs, batch_size=batch_size,
  34. validation_split=0.1)
  35. loss, accuracy = model.evaluate(X_test, np.array(y_test))
  36. print("Test Accuracy:", accuracy)
Success #stdin #stdout 0.02s 25892KB
stdin
Standard input is empty
stdout
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
from sklearn.model_selection import train_test_split
messages = [
 ("Get a free iPhone now!", 1), # 1 for spam
 ("Meeting at 3pm tomorrow", 0), # 0 for ham
 ("Click here to claim your prize", 1),
 ("Don't forget to submit your assignment", 0),

]
X = [message[0] for message in messages]
y = [message[1] for message in messages]
max_words = 1000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X)
X_sequences = tokenizer.texts_to_sequences(X)
max_len = 50
X_padded = pad_sequences(X_sequences, maxlen=max_len)
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42)
embedding_dim = 50
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=embedding_dim,
input_length=max_len))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
epochs = 10
batch_size = 64
model.fit(X_train, np.array(y_train), epochs=epochs, batch_size=batch_size,
validation_split=0.1)
loss, accuracy = model.evaluate(X_test, np.array(y_test))
print("Test Accuracy:", accuracy)