Machine Learning with Python Projects - Neural Network SMS Text Classifier

I have been trying to adjust various things to get the “sale today! to stop texts call 98912460324” to be detected as spam but no matter how I tweek I can’t seem to make my model detect it as spam. Any suggestions?

Your code so far

  # %tensorflow_version only exists in Colab.
  !pip install tf-nightly
except Exception:
import tensorflow as tf
import pandas as pd
from tensorflow import keras
!pip install tensorflow-datasets
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt
from keras.preprocessing import sequence
from keras.layers import TextVectorization
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier


# get data files

train_file_path = "train-data.tsv"
test_file_path = "valid-data.tsv"

train_data = pd.read_csv(train_file_path, sep='\t', header=None)
test_data = pd.read_csv(test_file_path, sep='\t', header=None)

train_data.columns = ['type', 'text']  # Change column names to 'type' and 'text'
test_data.columns = ['type', 'text']  # Change column names to 'type' and 'text'

df = pd.concat([train_data, test_data], ignore_index=True)

df.loc[df['type'] == 'spam', 'type',] = 1
df.loc[df['type'] == 'ham', 'type',] = 0

X = df['text']
Y = df['type']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

feature_extraction = TfidfVectorizer(min_df = 3, stop_words = "english", lowercase = True)

X_train_features = feature_extraction.fit_transform(X_train)
X_test_features = feature_extraction.transform(X_test)

Y_train = Y_train.astype('int')
Y_test = Y_test.astype('int')

model = MultinomialNB(), Y_train)

# function to predict messages based on model
# (should return list containing prediction and label, ex. [0.008318834938108921, 'ham'])
def predict_message(pred_text):
    pred_text_arr = [pred_text]  # Wrap the input text in a list
    input_data_features = feature_extraction.transform(pred_text_arr)
    prediction = model.predict(input_data_features)

    prediction_arr = []

    if prediction[0] > 0.5:
        prediction_arr = [prediction[0], 'spam']
        prediction_arr = [prediction[0], 'ham']

    return prediction_arr

pred_text = "sale today! to stop texts call 98912460324"

prediction = predict_message(pred_text)

# Run this cell to test your function and model. Do not modify contents.
def test_predictions():
  test_messages = ["how are you doing today",
                   "sale today! to stop texts call 98912460324",
                   "i dont want to go. can we try it a different day? available sat",
                   "our new mobile video service is live. just install on your phone to start watching.",
                   "you have won £1000 cash! call to claim your prize.",
                   "i'll bring it tomorrow. don't forget the milk.",
                   "wow, is your arm alright. that happened to me one time too"

  test_answers = ["ham", "spam", "ham", "spam", "spam", "ham", "ham"]
  passed = True

  for msg, ans in zip(test_messages, test_answers):
    prediction = predict_message(msg)
    if prediction[1] != ans:
      passed = False

  if passed:
    print("You passed the challenge. Great job!")
    print("You haven't passed yet. Keep trying.")


Your browser information:

User Agent is: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/ Safari/537.36

Challenge Information:

Machine Learning with Python Projects - Neural Network SMS Text Classifier

Do you choose to use Naive Bayes for exploration? The project is supposed to use Neural Network.

This topic was automatically closed 182 days after the last reply. New replies are no longer allowed.