Data Analysis with Python Projects - Medical Data Visualizer

durga.kaka · March 18, 2024, 5:23pm

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Import data
df = pd.read_csv('medical_examination.csv', header = 0)
print(df)
df1 =df
# Add 'overweight' column
#a['one']= df
#print(a)
df['overweight'] = ((df['weight']/(df['height']*0.01)**2)>25).astype(int)
#print(type(df['overweight']))

# Normalize data by making 0 always good and 1 always bad. If the value of 'cholesterol' or 'gluc' is 1, make the value 0. If the value is more than 1, make the value 1.
df['cholesterol'] = (df['cholesterol']>1).astype(int)
df['gluc'] = (df['gluc']>1).astype(int)
#print(df)
# Draw Categorical Plot
def draw_cat_plot():
    # Create DataFrame for cat plot using `pd.melt` using just the values from 'cholesterol', 'gluc', 'smoke', 'alco', 'active', and 'overweight'.
    df_cat = pd.melt(df, id_vars = ['cardio'], value_vars = ['cholesterol', 'gluc', 'smoke', 'alco', 'active', 'overweight'])
    
    #print(df_cat)

    # Group and reformat the data to split it by 'cardio'. Show the counts of each feature. You will have to rename one of the columns for the catplot to work correctly.
    #df_cat = df_cat.DataFrame('count':df_cat.groupby['cardio','variable','value'])
    #df_cat['count']= 
    #print(df_cat.dtypes)
    # Draw the catplot with 'sns.catplot()'

    df_cat = df_cat.groupby(['cardio', 'variable', 'value']).size().reset_index()
    #print(df_cat)

    df_cat = df_cat.rename(columns={0: 'count'})
    #print(df_cat)
    # Draw the catplot with 'sns.catplot()'
    fig = sns.catplot(data=df_cat, kind="bar", x="variable", y="count", hue="value", col="cardio")
    #print(fig)
    # Get the figure for the output
    #fig =  sns.catplot(x='variable',col='cardio',hue='value',kind='count',data=df_cat)


    # Do not modify the next two lines
    fig.savefig('catplot.png')
    return fig


# Draw Heat Map
def draw_heat_map():
    # Clean the data
    df_heat = df[
        (df["ap_lo"] <= df["ap_hi"])
        & (df["height"] >= df["height"].quantile(0.025))
        & (df["height"] >= df["height"].quantile(0.975))
        & (df["weight"] <= df["weight"].quantile(0.025))
        & (df["weight"] >= df["weight"].quantile(0.975))
    ]

    # Calculate the correlation matrix
    corr = df_heat.corr()

    # Generate a mask for the upper triangle
    mask = np.triu(np.ones_like(corr))



    # Set up the matplotlib figure
    fig, ax = plt.subplots(figsize=(8,6))

    # Draw the heatmap with 'sns.heatmap()'
    ax = sns.heatmap(corr, center=0, cmap='coolwarm', annot=True, vmax=1.0, vmin=-1.0, 
                     fmt=".2f", square=True, linewidth =.5)

    # Do not modify the next two lines
    fig.savefig('heatmap.png')
    return fig

Your browser information:

User Agent is: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36

Challenge Information:

Data Analysis with Python Projects - Medical Data Visualizer



gitpod /workspace/boilerplate-medical-data-visualizer (main) $ python main.py
          id    age  sex  height  weight  ap_hi  ap_lo  cholesterol  gluc  smoke  alco  active  cardio
0          0  18393    2     168    62.0    110     80            1     1      0     0       1       0
1          1  20228    1     156    85.0    140     90            3     1      0     0       1       1
2          2  18857    1     165    64.0    130     70            3     1      0     0       0       1
3          3  17623    2     169    82.0    150    100            1     1      0     0       1       1
4          4  17474    1     156    56.0    100     60            1     1      0     0       0       0
...      ...    ...  ...     ...     ...    ...    ...          ...   ...    ...   ...     ...     ...
69995  99993  19240    2     168    76.0    120     80            1     1      1     0       1       0
69996  99995  22601    1     158   126.0    140     90            2     2      0     0       1       1
69997  99996  19066    2     183   105.0    180     90            3     1      0     1       0       1
69998  99998  22431    1     163    72.0    135     80            1     2      0     0       0       1
69999  99999  20540    1     170    72.0    120     80            2     1      0     0       1       0

[70000 rows x 13 columns]
EE.[]
F
======================================================================
ERROR: test_bar_plot_number_of_bars (test_module.CatPlotTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/workspace/boilerplate-medical-data-visualizer/test_module.py", line 26, in test_bar_plot_number_of_bars
    actual = len([rect for rect in self.ax.get_children() if isinstance(rect, mpl.patches.Rectangle)])
AttributeError: 'numpy.ndarray' object has no attribute 'get_children'

======================================================================
ERROR: test_line_plot_labels (test_module.CatPlotTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/workspace/boilerplate-medical-data-visualizer/test_module.py", line 13, in test_line_plot_labels
    actual = self.ax.get_xlabel()
AttributeError: 'numpy.ndarray' object has no attribute 'get_xlabel'

======================================================================
FAIL: test_heat_map_values (test_module.HeatMapTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/workspace/boilerplate-medical-data-visualizer/test_module.py", line 55, in test_heat_map_values
    self.assertEqual(actual, expected, "Expected different values in heat map.")
AssertionError: Lists differ: [] != ['0.0', '0.0', '-0.0', '0.0', '-0.1', '0.5[616 chars]0.1']

Second list contains 91 additional elements.
First extra element 0:
'0.0'

Diff is 941 characters long. Set self.maxDiff to None to see it. : Expected different values in heat map.

----------------------------------------------------------------------
Ran 4 tests in 2.139s

FAILED (failures=1, errors=2)

pkdvalis · March 18, 2024, 6:54pm

I’ve edited your code for readability. When you enter a code block into a forum post, please precede it with a separate line of three backticks and follow it with a separate line of three backticks to make it easier to read.

You can also use the “preformatted text” tool in the editor (</>) to add backticks around text.

See this post to find the backtick on your keyboard.
Note: Backticks (`) are not single quotes (').

system · September 17, 2024, 6:55am

This topic was automatically closed 182 days after the last reply. New replies are no longer allowed.

Topic		Replies	Views
Data Analysis with Python Projects - Medical Data Visualizer Python	1	365	April 21, 2023
Data Analysis with Python Projects - Medical Data Visualizer Python	2	563	April 21, 2023
Data Analysis with Python Projects - Medical Data Visualizer Python	2	236	June 16, 2024
Data Analysis with Python Projects - Medical Data Visualizer Python	9	1450	March 23, 2024
Medical Data Analyzer - Data Analysis with Python Python	2	692	June 2, 2023

Data Analysis with Python Projects - Medical Data Visualizer

Your browser information:

Challenge Information:

Related topics