import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# Import data
df = pd.read_csv('medical_examination.csv', header = 0)
print(df)
df1 =df
# Add 'overweight' column
#a['one']= df
#print(a)
df['overweight'] = ((df['weight']/(df['height']*0.01)**2)>25).astype(int)
#print(type(df['overweight']))
# Normalize data by making 0 always good and 1 always bad. If the value of 'cholesterol' or 'gluc' is 1, make the value 0. If the value is more than 1, make the value 1.
df['cholesterol'] = (df['cholesterol']>1).astype(int)
df['gluc'] = (df['gluc']>1).astype(int)
#print(df)
# Draw Categorical Plot
def draw_cat_plot():
# Create DataFrame for cat plot using `pd.melt` using just the values from 'cholesterol', 'gluc', 'smoke', 'alco', 'active', and 'overweight'.
df_cat = pd.melt(df, id_vars = ['cardio'], value_vars = ['cholesterol', 'gluc', 'smoke', 'alco', 'active', 'overweight'])
#print(df_cat)
# Group and reformat the data to split it by 'cardio'. Show the counts of each feature. You will have to rename one of the columns for the catplot to work correctly.
#df_cat = df_cat.DataFrame('count':df_cat.groupby['cardio','variable','value'])
#df_cat['count']=
#print(df_cat.dtypes)
# Draw the catplot with 'sns.catplot()'
df_cat = df_cat.groupby(['cardio', 'variable', 'value']).size().reset_index()
#print(df_cat)
df_cat = df_cat.rename(columns={0: 'count'})
#print(df_cat)
# Draw the catplot with 'sns.catplot()'
fig = sns.catplot(data=df_cat, kind="bar", x="variable", y="count", hue="value", col="cardio")
#print(fig)
# Get the figure for the output
#fig = sns.catplot(x='variable',col='cardio',hue='value',kind='count',data=df_cat)
# Do not modify the next two lines
fig.savefig('catplot.png')
return fig
# Draw Heat Map
def draw_heat_map():
# Clean the data
df_heat = df[
(df["ap_lo"] <= df["ap_hi"])
& (df["height"] >= df["height"].quantile(0.025))
& (df["height"] >= df["height"].quantile(0.975))
& (df["weight"] <= df["weight"].quantile(0.025))
& (df["weight"] >= df["weight"].quantile(0.975))
]
# Calculate the correlation matrix
corr = df_heat.corr()
# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr))
# Set up the matplotlib figure
fig, ax = plt.subplots(figsize=(8,6))
# Draw the heatmap with 'sns.heatmap()'
ax = sns.heatmap(corr, center=0, cmap='coolwarm', annot=True, vmax=1.0, vmin=-1.0,
fmt=".2f", square=True, linewidth =.5)
# Do not modify the next two lines
fig.savefig('heatmap.png')
return fig
Your browser information:
User Agent is: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36
Challenge Information:
Data Analysis with Python Projects - Medical Data Visualizer
gitpod /workspace/boilerplate-medical-data-visualizer (main) $ python main.py
id age sex height weight ap_hi ap_lo cholesterol gluc smoke alco active cardio
0 0 18393 2 168 62.0 110 80 1 1 0 0 1 0
1 1 20228 1 156 85.0 140 90 3 1 0 0 1 1
2 2 18857 1 165 64.0 130 70 3 1 0 0 0 1
3 3 17623 2 169 82.0 150 100 1 1 0 0 1 1
4 4 17474 1 156 56.0 100 60 1 1 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ...
69995 99993 19240 2 168 76.0 120 80 1 1 1 0 1 0
69996 99995 22601 1 158 126.0 140 90 2 2 0 0 1 1
69997 99996 19066 2 183 105.0 180 90 3 1 0 1 0 1
69998 99998 22431 1 163 72.0 135 80 1 2 0 0 0 1
69999 99999 20540 1 170 72.0 120 80 2 1 0 0 1 0
[70000 rows x 13 columns]
EE.[]
F
======================================================================
ERROR: test_bar_plot_number_of_bars (test_module.CatPlotTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/workspace/boilerplate-medical-data-visualizer/test_module.py", line 26, in test_bar_plot_number_of_bars
actual = len([rect for rect in self.ax.get_children() if isinstance(rect, mpl.patches.Rectangle)])
AttributeError: 'numpy.ndarray' object has no attribute 'get_children'
======================================================================
ERROR: test_line_plot_labels (test_module.CatPlotTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/workspace/boilerplate-medical-data-visualizer/test_module.py", line 13, in test_line_plot_labels
actual = self.ax.get_xlabel()
AttributeError: 'numpy.ndarray' object has no attribute 'get_xlabel'
======================================================================
FAIL: test_heat_map_values (test_module.HeatMapTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/workspace/boilerplate-medical-data-visualizer/test_module.py", line 55, in test_heat_map_values
self.assertEqual(actual, expected, "Expected different values in heat map.")
AssertionError: Lists differ: [] != ['0.0', '0.0', '-0.0', '0.0', '-0.1', '0.5[616 chars]0.1']
Second list contains 91 additional elements.
First extra element 0:
'0.0'
Diff is 941 characters long. Set self.maxDiff to None to see it. : Expected different values in heat map.
----------------------------------------------------------------------
Ran 4 tests in 2.139s
FAILED (failures=1, errors=2)