please I’ll need your full assistance as regards the project. i don’t even get how to load it. we can use google meet or zoom. My watsapp number is +2348176125605 and mail is amanabolu@gmail.com
import pandas as pd
def demographic_data_analyzer():
“”“Analyze demographic data from the 1994 Census database.”“”
df = pd.read_csv('adult.csv')
# How many people of each race are represented in this dataset?
race_counts = df['race'].value_counts()
# What is the average age of men?
average_age_men = df.loc[df['sex'] == 'Male', 'age'].mean()
# What is the percentage of people who have a Bachelor's degree?
bachelors_degree = df['education'] == 'Bachelors'
percentage_bachelors = round(
100 * (bachelors_degree.sum() / df.shape[0]), 1
)
# What percentage of people with advanced education (Bachelors, Masters, or Doctorate) make more than 50K?
advanced_education = ['Bachelors', 'Masters', 'Doctorate']
high_income = df['salary'] == '>50K'
percentage_high_income_advanced = round(
100 * (high_income[advanced_education].sum() / advanced_education.sum()), 1
)
# What percentage of people without advanced education make more than 50K?
low_education = ~advanced_education
percentage_high_income_low = round(
100 * (high_income[low_education].sum() / low_education.sum()), 1
)
# What is the minimum number of hours a person works per week?
min_hours = df['hours-per-week'].min()
# What percentage of the people who work the minimum number of hours per week have a salary of more than 50K?
percentage_high_income_min = round(
100 * (high_income[df['hours-per-week'] == min_hours].sum() / df['hours-per-week'].value_counts()[min_hours]), 1
)
# What country has the highest percentage of people that earn >50K and what is that percentage?
country_to_percentage = (
df.groupby('native-country')['salary'].value_counts(normalize=True) * 100
)
highest_earning_country = country_to_percentage.sort_values(ascending=False).index[0]
highest_percentage = country_to_percentage[highest_earning_country]
# Identify the most popular occupation for those who earn >50K in India.
india_high_income = df.loc[
(df['salary'] == '>50K') & (df['native-country'] == 'India')
]
most_popular_occupation = india_high_income['occupation'].value_counts().index[0]
return (
race_counts,
average_age_men,
percentage_bachelors,
percentage_high_income_advanced,
percentage_high_income_low,
min_hours,
percentage_high_income_min,
highest_earning_country,
highest_percentage,
most_popular_occupation,
)
if name == ‘main’:
results = demographic_data_analyzer()
print(results)