Hi people,
Despite being almost a copy-paste from the first part that is correctly executing, in the second keep getting the following: LinAlgError: SVD did not converge in Linear Least Squares
CSV file: https://github.com/datasets/sea-level-rise/blob/master/data/epa-sea-level.csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import linregress
df =pd.read_csv(r'')
#Use matplotlib to create a scatter plot using the Year column as the x-
#axis and the CSIRO Adjusted Sea Level column as the y-axis
#fig, ax = plt.subplots(figsize=(16, 9))
plt.scatter(df['Year'], df['CSIRO Adjusted Sea Level'])
#plt.show() #Unhide if you wanna plot separately
#Use the linregress function from scipy.stats to get the slope and y-
#intercept of the line of best fit. Plot the line of best fit over the
#top of the scatter plot. Make the line go through the year 2050 to
#predict the sea level rise in 2050.
#result = linregress(df["Year"], df["CSIRO Adjusted Sea Level"])
extra_years = {"Year": pd.Series(range(2014,2051))}
df1 = pd.concat([df,pd.DataFrame(extra_years)])
df1 = df1.drop(columns=['Lower Error Bound','Upper Error Bound','NOAA Adjusted Sea Level'])
df2 = df1.fillna(0)
mymodel = np.poly1d(np.polyfit(df["Year"], df['CSIRO Adjusted Sea Level'], 3)) #m,b = np.polyfit(x, y, 1)
#myline = np.linspace(1880, 2050)
df2 = plt.plot(df2['Year'], m*df1['Year'] + b, color='r') #linear regression
df3 = plt.plot(myline, mymodel(myline), color='g') #polinomial regression is more accurate in this case
plt.show() #Unhide if you wanna plot separately
#Plot a new line of best fit just using the data from year 2000 through
#the most recent year in the dataset. Make the line also go through the
#year 2050 to predict the sea level rise in 2050 if the rate of rise
#continues as it has since the year 2000.
df2000 = df1.copy()
df2000 = df2000[(df2000['Year'] >= 2000) & (df2000['Year'] <= 2050)]
plt.scatter(df2000['Year'], df2000['CSIRO Adjusted Sea Level'])
m,b = np.polyfit(df2000['Year'], df2000['CSIRO Adjusted Sea Level'], 1) #m,b = np.polyfit(x, y, 1)
#myline2000 = np.linspace(1880, 2050)
df3000 = plt.plot(df2000['Year'], m*df2000['Year'] + b, color='r') #linear regression
#df4000 = plt.plot(myline, mymodel2000(myline), color='g') #polinomial regression is more accurate in this case
plt.show()
Thanks