No matter what i do I keep getting this same error message. ValueError: Labels dtype should be integer. Instead got <dtype: ‘float32’>.
Your code so far
Import libraries. You may or may not use all of these.
!pip install -q git+https://github.com/tensorflow/docs
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
try:
%tensorflow_version only exists in Colab.
%tensorflow_version 2.x
except Exception:
pass
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling
from sklearn.model_selection import train_test_split
Import data
!wget https://cdn.freecodecamp.org/project-data/health-costs/insurance.csv
dataset = pd.read_csv(‘insurance.csv’)
dataset.tail()
train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)
y_train = train_data.pop(‘expenses’)
y_eval = test_data.pop(‘expenses’)
y_train = y_train.astype(‘int32’)
y_eval = y_eval.astype(‘int32’)
CATEGORICAL_COLUMNS = [‘sex’, ‘smoker’, ‘region’]
NUMERIC_COLUMNS = [‘age’, ‘bmi’, ‘children’]
feature_columns =
for feature_name in CATEGORICAL_COLUMNS:
vocabulary = train_data[feature_name].unique() # gets a list of all unique values from given feature column
feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))
for feature_name in NUMERIC_COLUMNS:
feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))
def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
def input_function(): # inner function, this will be returned
label_df = tf.cast(label_df, tf.int32) # convert labels to integer
ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df)) # create tf.data.Dataset object with data and its label
if shuffle:
ds = ds.shuffle(1000) # randomize order of data
ds = ds.batch(batch_size).repeat(num_epochs) # split dataset into batches of 32 and repeat process for number of epochs
return ds # return a batch of the dataset
return input_function # return a function object for use
linear_est = tf.estimator.LinearClassifier(
feature_columns=feature_columns,
n_classes=len(set(y_train)),
optimizer=tf.keras.optimizers.legacy.Ftrl(
learning_rate=0.1,
l1_regularization_strength=0.001
)
)
We create a linear estimtor by passing the feature columns we created earlier
linear_est.train(train_input_fn) # train
result = linear_est.evaluate(eval_input_fn) # get model metrics/stats by testing on tetsing data
print(result[‘accuracy’]) # the result variable is simply a dict of stats about our model
print(result)
Your browser information:
User Agent is: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36
Challenge: {{challengeTitle}} Machine Learning with Python Projects - Linear Regression Health Costs Calculator
Link to the challenge: