hey guys, i am completely failing to make this linear regression model for survival. someone help me please. the datasets are:
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv') # training data
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')
I keep getting this error:
Input 0 of layer “my_first_dense” is incompatible with the layer: expected axis -1 of input shape to have value 16, but received input with shape (None, 1256)
this is my code :
import numpy as np
import pandas as pd
import tensorflow as tf
# Load data
dtrain = pd.read_csv('train.csv')
deval = pd.read_csv('eval.csv')
# Drop missing values before splitting labels
dtrain = dtrain.dropna()
deval = deval.dropna()
# Separate labels after dropping NaNs
correct_training_answers = np.array(dtrain.pop('survived'))
print(correct_training_answers.shape)
correct_eval_answers = deval.pop('survived')
print(correct_training_answers)
# Define features
categorylikefeatures = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck', 'embark_town', 'alone']
numericfeatures = ['age', 'fare']
# Create model inputs and encoded features
inputs = {}
encoded_features = []
# Normalize numeric features
for col in numericfeatures:
dtrain[col] = dtrain[col].astype(np.float32)
deval[col] = deval[col].astype(np.float32)
inputs[col] = tf.keras.Input(shape=(1,), name=col)
normalizer = tf.keras.layers.Normalization()
normalizer.adapt(np.array(dtrain[col]))
encoded_features.append(normalizer(inputs[col]))
# Encode categorical features using embeddings
for col in categorylikefeatures:
dtrain[col] = dtrain[col].astype(str)
deval[col] = deval[col].astype(str)
inputs[col] = tf.keras.Input(shape=(1,), name=col, dtype=tf.string)
lookup = tf.keras.layers.StringLookup(vocabulary=np.unique(dtrain[col]))
encoded = lookup(inputs[col])
# Embedding
embed_dim = 2
embedding = tf.keras.layers.Embedding(input_dim=lookup.vocabulary_size(), output_dim=embed_dim)
embedded = embedding(encoded)
encoded_features.append(tf.keras.layers.Flatten()(embedded))
# Combine all features
all_features = tf.keras.layers.concatenate(encoded_features)
print(all_features)
x = tf.keras.layers.Dense(32, activation='relu', name='my_first_dense')(all_features)
output = tf.keras.layers.Dense(1, activation='sigmoid')(x)
model = tf.keras.Model(inputs=inputs, outputs=output)
print(inputs)
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Create tf.data datasets
train_ds = tf.data.Dataset.from_tensor_slices((dict(dtrain), correct_training_answers)).batch(32)
val_ds = tf.data.Dataset.from_tensor_slices((dict(deval), correct_eval_answers))
val_ds = val_ds.batch(32)
# Train the model
model.fit(train_ds,epochs=10)
# Evaluate the model
result = model.evaluate(val_ds)
print(f"Evaluation result (loss, accuracy): {result}")