Python Importing Stock Data

Python Importing Stock Data
0

#1

Hey guys, this is a python question. Not sure why this error “TypeError: dates1() missing 3 required positional arguments: ‘start_date’, ‘end_date’, and ‘dates’” keeps occurring. I have tried to fix it, but it wont work. I want to avoid using global variables.

I changed the code to include parameters, as P1xt suggested. Now, this is the error that occurs NameError: name ‘start_date’ is not defined

    import pandas as pd
    import matplotlib.pyplot as plt

def dates1(start_date, end_date,dates):
    # Define date range
    start_date = '2016-01-12'
    end_date = '2016-03-22'
    dates = pd.date_range(start_date, end_date)
    # print(dates[0])
    return dates


def data_frame(df1, df, symbols, symbol, df_temp):
    # Create an empty dataframe

    df1 = pd.DataFrame(index=dates)
    # Read C data into temporary dataframe
    df = pd.read_csv("BX.csv", index_col="Date", parse_dates=True,
                     usecols=['Date', 'Adj Close'], na_values=['nan'])

    df = df.rename(columns={'Adj Close': 'BX'})  # Rename 'Adj Close' column to 'BX' to prevent confusion/error
    df1 = df1.join(df, how='inner')  # Join two dataframes
    symbols = ['GOOGL', 'IBM', 'C']  # Read in more stocks



    for symbol in symbols:
        df_temp = pd.read_csv("{}.csv".format(symbol), index_col='Date',
                              parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan'])

        # rename to prevent clash
        df_temp = df_temp.rename(columns={'Adj Close': symbol})
        df1 = df1.join(df_temp)
        return df1



def normalize_data(df1, df2):
    # global df2  # creating new variable df2, which will be the placeholder for normalized data.
    df2 = df1 / df1.ix[0, :]
    return df2


def plot_data(df2):
    ax = df2.plot(title="Stock Prices", fontsize=12)
    # ax = df1.plot(title="Stock Prices", fontsize=12)
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    plt.show()

    # slice by row via .ix[] selector


def slice():
    print(df1.ix['2016-01-01':'2016-01-31'])


# Use a list of labels to select multiple columns
def slice2():
    # print(df1['BX'])
    print(df1[['IBM', 'BX', 'GOOGL']])


def slice_row_column():
    print(df1.ix['2016-02-10':'2016-02-15', ['BX', 'GOOGL']]
          )



def main():
    dates1(start_date, end_date,dates)
    data_frame(df1, df, symbols, symbol, df_temp)
    # slice()
    # slice2()
    # slice_row_column()
    normalize_data(df1, df2)
    plot_data(df2)

main()
Here is my solution that works, but I have to use global variables. I want to avoid using global variables.

import pandas as pd
import matplotlib.pyplot as plt


class DataFrame:
    def dates1():
        # Define date range
        start_date = '2016-01-12'
        end_date = '2016-03-22'
        global dates
        dates = pd.date_range(start_date, end_date)
        # print(dates[0])


    def data_frame():
        # Create an empty dataframe
        global df1
        df1 = pd.DataFrame(index=dates)
        # Read C data into temporary dataframe
        df = pd.read_csv("BX.csv", index_col="Date", parse_dates=True,
                         usecols=['Date', 'Adj Close'], na_values=['nan'])

        df = df.rename(columns={'Adj Close': 'BX'})  # Rename 'Adj Close' column to 'BX' to prevent confusion/error
        df1 = df1.join(df, how='inner')  # Join two dataframes
        symbols = ['GOOGL', 'IBM', 'C']  # Read in more stocks

        for symbol in symbols:
            df_temp = pd.read_csv("{}.csv".format(symbol), index_col='Date',
                                  parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan'])

            # rename to prevent clash
            df_temp = df_temp.rename(columns={'Adj Close': symbol})
            df1 = df1.join(df_temp)

    def normalize_data():
        global df2  # creating new variable df2, which will be the placeholder for normalized data.
        df2 = df1 / df1.ix[0, :]

    def plot_data():
        ax = df2.plot(title="Stock Prices", fontsize=12)
        # ax = df1.plot(title="Stock Prices", fontsize=12)
        ax.set_xlabel("Date")
        ax.set_ylabel("Price")
        plt.show()

        # slice by row via .ix[] selector

    def slice():
        print(df1.ix['2016-01-01':'2016-01-31'])

    # Use a list of labels to select multiple columns
    def slice2():
        # print(df1['BX'])
        print(df1[['IBM', 'BX', 'GOOGL']])

    def slice_row_column():
        print(df1.ix['2016-02-10':'2016-02-15', ['BX', 'GOOGL']]
              )

    dates1()
    data_frame()
    # slice()
    # slice2()
    # slice_row_column()
    normalize_data()
    plot_data()



DataFrame

#3

I tried that, and it produces NameError: name ‘start_date’ is not defined

  import pandas as pd
    import matplotlib.pyplot as plt

def dates1(start_date, end_date,dates):
    # Define date range
    start_date = '2016-01-12'
    end_date = '2016-03-22'
    dates = pd.date_range(start_date, end_date)
    # print(dates[0])
    return dates


def data_frame(df1, df, symbols, symbol, df_temp):
    # Create an empty dataframe

    df1 = pd.DataFrame(index=dates)
    # Read C data into temporary dataframe
    df = pd.read_csv("BX.csv", index_col="Date", parse_dates=True,
                     usecols=['Date', 'Adj Close'], na_values=['nan'])

    df = df.rename(columns={'Adj Close': 'BX'})  # Rename 'Adj Close' column to 'BX' to prevent confusion/error
    df1 = df1.join(df, how='inner')  # Join two dataframes
    symbols = ['GOOGL', 'IBM', 'C']  # Read in more stocks



    for symbol in symbols:
        df_temp = pd.read_csv("{}.csv".format(symbol), index_col='Date',
                              parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan'])

        # rename to prevent clash
        df_temp = df_temp.rename(columns={'Adj Close': symbol})
        df1 = df1.join(df_temp)
        return df1



def normalize_data(df1, df2):
    # global df2  # creating new variable df2, which will be the placeholder for normalized data.
    df2 = df1 / df1.ix[0, :]
    return df2


def plot_data(df2):
    ax = df2.plot(title="Stock Prices", fontsize=12)
    # ax = df1.plot(title="Stock Prices", fontsize=12)
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    plt.show()

    # slice by row via .ix[] selector


def slice():
    print(df1.ix['2016-01-01':'2016-01-31'])


# Use a list of labels to select multiple columns
def slice2():
    # print(df1['BX'])
    print(df1[['IBM', 'BX', 'GOOGL']])


def slice_row_column():
    print(df1.ix['2016-02-10':'2016-02-15', ['BX', 'GOOGL']]
          )



def main():
    dates1(start_date, end_date,dates)
    data_frame(df1, df, symbols, symbol, df_temp)
    # slice()
    # slice2()
    # slice_row_column()
    normalize_data(df1, df2)
    plot_data(df2)

main()

#5

So I commented the dates1 section out, and made dates a class variables instead of function variables- so that they could be accessed by other functions.

“You need to pass to functions the data they need to do their work, and capture the return value they send back in a variable so that you can then pass that variable to other functions.”

Does this mean every variable I declare? In previous experiments I have returned symbols, symbol, df etc., however, the result is erroneous.

import pandas as pd
import matplotlib.pyplot as plt

start_date = '2016-01-12'
end_date = '2016-03-22'
dates = pd.date_range(start_date, end_date)

# def dates1(start_date, end_date,dates):
#     # Define date range
#     start_date = '2016-01-12'
#     end_date = '2016-03-22'
#     dates = pd.date_range(start_date, end_date)
#     # print(dates[0])
#     return dates


def data_frame(df1, df, symbols, symbol, df_temp):
    # Create an empty dataframe

    df1 = pd.DataFrame(index=dates)
    # Read C data into temporary dataframe
    df = pd.read_csv("BX.csv", index_col="Date", parse_dates=True,
                     usecols=['Date', 'Adj Close'], na_values=['nan'])

    df = df.rename(columns={'Adj Close': 'BX'})  # Rename 'Adj Close' column to 'BX' to prevent confusion/error
    df1 = df1.join(df, how='inner')  # Join two dataframes
    symbols = ['GOOGL', 'IBM', 'C']  # Read in more stocks



    for symbol in symbols:
        df_temp = pd.read_csv("{}.csv".format(symbol), index_col='Date',
                              parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan'])

        # rename to prevent clash
        df_temp = df_temp.rename(columns={'Adj Close': symbol})
        df1 = df1.join(df_temp)
        return df1



def normalize_data(df1, df2):
    # global df2  # creating new variable df2, which will be the placeholder for normalized data.
    df2 = df1 / df1.ix[0, :]
    return df2


def plot_data(df2):
    ax = df2.plot(title="Stock Prices", fontsize=12)
    # ax = df1.plot(title="Stock Prices", fontsize=12)
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    plt.show()

    # slice by row via .ix[] selector


def slice():
    print(df1.ix['2016-01-01':'2016-01-31'])


# Use a list of labels to select multiple columns
def slice2():
    # print(df1['BX'])
    print(df1[['IBM', 'BX', 'GOOGL']])


def slice_row_column():
    print(df1.ix['2016-02-10':'2016-02-15', ['BX', 'GOOGL']]
          )



def main():
    # dates1(start_date, end_date,dates)
    data_frame(df1, df, symbols, symbol, df_temp)
    # slice()
    # slice2()
    # slice_row_column()
    normalize_data(df1, df2)
    plot_data(df2)

main()