Pull Financial Data for Free from Yahoo Finance using Python (Full Code)

This was a project I worked when I first learned to code. It’s not the most efficient code, but it does it’s job pulling all financial statements and dividend data for all stocks.

The project primarily use the yfinance python library. The below code will get Balance Sheet, Income Statement, Cash Flow, Dividend Data and Ex-Dividend Dates. If you wish to get stock prices, analyst estimates and much more, I would recommend reviewing the documentation here.

How the code works:

  • Get all symbols from Financial Modeling Prep. Note: As of 2020, FMP requires an API key to access their data. The API is free to use and you can sign up here
  • Loops through all the symbols to pull Financial Data from Yahoo Finance
  • Inserts all the data into a table into a MySQL database
  • Prints a message in the Python console that the code has ran

Before you run, you will need to:

Create the tables necessary in the file to store the file with the corresponding column. SQL code provided below.

-- CREATE BALANCE SHEET TABLE
CREATE TABLE IF NOT EXISTS data.y_balance_sheet (
    symbol varchar(5),
    date DATE,
    total_liability BIGINT,
    total_shareholder_equity BIGINT,
    otherCurrentLiab BIGINT,
    total_assets BIGINT,
    common_stock BIGINT,
    other_current_assets BIGINT,
    retained_earnings BIGINT,
    other_liability BIGINT,
    treasury_stock BIGINT,
    other_assets BIGINT,
    cash BIGINT,
    total_current_liabilities BIGINT,
    short_longterm_debt BIGINT,
    other_shareholder_equity BIGINT,
    property_plant_equip BIGINT,
    total_current_assets BIGINT,
    longterm_investments BIGINT,
    nettangible_assets BIGINT,
    shortterm_investments BIGINT,
    net_receivables BIGINT,
    longterm_debt BIGINT,
    inventory BIGINT,
    account_payable BIGINT,
    primary KEY (symbol, date)
)

-- CREATE INCOME STATEMENT TABLE
CREATE TABLE IF NOT EXISTS data.y_income_statement (
    symbol varchar(5),
    date DATE,
    research_development BIGINT,
    account_charges BIGINT,
    income_before_tax BIGINT,
    minority_interest BIGINT,
    net_income BIGINT,
    general_admin BIGINT,
    gross_profit BIGINT,
    ebit BIGINT,
    operating_income BIGINT,
    other_operating_expense BIGINT,
    interest_expense BIGINT,
    extraordinary_items BIGINT,
    non_recurring BIGINT,
    other_items BIGINT,
    income_tax_expense BIGINT,
    total_revenue BIGINT,
    total_operating_expense BIGINT,
    cost_of_revenue BIGINT,
    other_income_expense BIGINT,
    discontinued_ops BIGINT,
    net_income_continuing_ops BIGINT,
    net_income_common_shares BIGINT,
    primary KEY (symbol, date)
)

-- CREATE CASH FLOW TABLE
CREATE TABLE IF NOT EXISTS data.y_cash_flow (
    symbol varchar(5),
    date DATE,
    investments BIGINT,
    change_to_liabilities BIGINT,
    change_to_investing_activities BIGINT,
    net_borrowing BIGINT,
    change_to_financing_activities BIGINT,
    change_to_operating_activities BIGINT,
    issuance_of_stock BIGINT,
    net_income BIGINT,
    cash_in_cash BIGINT,
    repurchase_of_stock BIGINT,
    total_cash_operating_activities BIGINT,
    depreciation BIGINT,
    cashflow_from_investing_activities BIGINT,
    dividends_paid BIGINT,
    change_to_inventory BIGINT,
    change_to_account_receivables BIGINT,
    cashflow_from_financing_activities BIGINT,
    change_to_net_income BIGINT,
    capital_expenditures BIGINT,
    primary KEY (symbol, date)
)

--- CREATE DIVIDEND TABLE
CREATE TABLE IF NOT EXISTS data.y_dividend (
        symbol varchar(5),
        date DATE,
        amount decimal(20,2),
        primary KEY (symbol, date)
    )

--- CREATE EX-DIVIDEND DATE TABLE
    CREATE TABLE IF NOT EXISTS data.y_exdividend (
        symbol varchar(5),
        exdiv_date date,
        primary KEY (symbol, exdiv_date)
    )

Install the python libraries required: yfinance, mysql-connector-python

pip install yfinance
pip install mysql-connector-python

Add your MySQL connection details in the code below under MYSQL WITH MYSQL CONNECTOR.

Full Python Code

Function List:

UpdateYahoo.balance_sheet()

UpdateYahoo.income_statement()

UpdateYahoo.cash_flow()

UpdateYahoo.dividend()

UpdateYahoo.exdividend()

import json
from urllib.request import urlopen
from datetime import date, datetime, timedelta

import mysql.connector

user = 'demo'
password = 'demo'
host = '111.111.1.11'

""" MYSQL WITH MYSQLCONNECTOR """
proddb = mysql.connector.connect(
    host=host,
    user=user,
    password=password,
)

prodcursor = proddb.cursor()

# MODULE FROM FMP TO GET STOCK TICKERS
def get_jsonparsed_data(url):
    response = urlopen(url)
    data = response.read().decode("utf-8")
    return json.loads(data)

class UpdateYahoo:
    """
    *
    * METHODS TO UPDATE YAHOO FINANCIAL STATEMENTS
    * Include all methods to update income statement, balance sheet, cashflow
    *
    """
    @staticmethod
    def balance_sheet():
        start = datetime.today()
        print("Starting function to update Y balance sheet table...")

        for x in get_jsonparsed_data("https://financialmodelingprep.com/api/v3/company/stock/list").get('symbolsList'):
            symbol = x.get('symbol').upper()

            try:
                d = YahooFinancials(symbol).get_financial_stmts('quarterly', 'balance')
            except:
                continue

            if not d['balanceSheetHistoryQuarterly'][symbol]:
                continue

            list = []

            for a in d['balanceSheetHistoryQuarterly'][symbol]:

                for b in a.values():
                    total_liability = b.get('totalLiab')
                    total_shareholder_equity = b.get('totalStockholderEquity')
                    other_current_liab = b.get('otherCurrentLiab')
                    total_assets = b.get('totalAssets')
                    common_stock = b.get('commonStock')
                    other_current_assets = b.get('otherCurrentAssets')
                    retained_earnings = b.get('retainedEarnings')
                    other_liability = b.get('otherLiab')
                    treasury_stock = b.get('treasuryStock')
                    other_assets = b.get('otherAssets')
                    cash = b.get('cash')
                    total_current_liabilities = b.get('totalCurrentLiabilities')
                    short_longterm_debt = b.get('shortLongTermDebt')
                    other_shareholder_equity = b.get('otherStockholderEquity')
                    property_plant_equip = b.get('propertyPlantEquipment')
                    total_current_assets = b.get('totalCurrentAssets')
                    longterm_investments = b.get('longTermInvestments')
                    nettangible_assets = b.get('netTangibleAssets')
                    shortterm_investments = b.get('shortTermInvestments')
                    net_receivables = b.get('netReceivables')
                    longterm_debt = b.get('longTermDebt')
                    inventory = b.get('inventory')
                    account_payable = b.get('accountsPayable')

                for c in a:
                    date = c

                list.append((symbol, date, total_liability, total_shareholder_equity, other_current_liab, total_assets, common_stock, other_current_assets,
                retained_earnings, other_liability, treasury_stock, other_assets, cash, total_current_liabilities,
                short_longterm_debt, other_shareholder_equity, property_plant_equip, total_current_assets,
                longterm_investments, nettangible_assets, shortterm_investments, net_receivables, longterm_debt,
                inventory, account_payable))

            prodcursor.executemany("""insert ignore into data.y_balance_sheet (symbol, date, total_liability, total_shareholder_equity, other_current_liab, total_assets, common_stock, other_current_assets,
                        retained_earnings, other_liability, treasury_stock, other_assets, cash, total_current_liabilities,
                        short_longterm_debt, other_shareholder_equity, property_plant_equip, total_current_assets,
                        longterm_investments, nettangible_assets, shortterm_investments, net_receivables, longterm_debt,
                        inventory, account_payable) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""",
                                   list)
            proddb.commit()

        end = datetime.today()
        dur_min = round((end - start).total_seconds() / 60, 2)
        print("Complete: Update Y balance sheet function took " + str(dur_min) + " minutes")

    @staticmethod
    def income_statement():
        start = datetime.today()
        print("Starting function to update Y income statement table...")

        for x in get_jsonparsed_data("https://financialmodelingprep.com/api/v3/company/stock/list").get('symbolsList'):
            symbol = x.get('symbol').upper()

            try:
                d = YahooFinancials(symbol).get_financial_stmts('quarterly', 'income')
            except:
                continue

            if not d['incomeStatementHistoryQuarterly'][symbol]:
                continue

            list = []

            for a in d['incomeStatementHistoryQuarterly'][symbol]:

                for b in a.values():
                    research_development = b.get('researchDevelopment')
                    account_charges = b.get('effectOfAccountingCharges')
                    income_before_tax = b.get('incomeBeforeTax')
                    minority_interest = b.get('minorityInterest')
                    net_income = b.get('netIncome')
                    general_admin = b.get('sellingGeneralAdministrative')
                    gross_profit = b.get('grossProfit')
                    ebit = b.get('ebit')
                    operating_income = b.get('operatingIncome')
                    other_operating_expense = b.get('otherOperatingExpenses')
                    interest_expense = b.get('interestExpense')
                    extraordinary_items = b.get('extraordinaryItems')
                    non_recurring = b.get('nonRecurring')
                    other_items = b.get('otherItems')
                    income_tax_expense = b.get('incomeTaxExpense')
                    total_revenue = b.get('totalRevenue')
                    total_operating_expense = b.get('totalOperatingExpenses')
                    cost_of_revenue = b.get('costOfRevenue')
                    other_income_expense = b.get('totalOtherIncomeExpenseNet')
                    discontinued_ops = b.get('discontinuedOperations')
                    net_income_continuing_ops = b.get('netIncomeFromContinuingOps')
                    net_income_common_shares = b.get('netIncomeApplicableToCommonShares')

                for c in a:
                    date = c

                list.append((symbol, date, research_development, account_charges, income_before_tax, minority_interest,
                net_income, general_admin, gross_profit, ebit, operating_income, other_operating_expense,
                interest_expense, extraordinary_items, non_recurring, other_items, income_tax_expense, total_revenue,
                total_operating_expense, cost_of_revenue, other_income_expense, discontinued_ops,
                net_income_continuing_ops, net_income_common_shares))

            prodcursor.executemany("""insert ignore into data.y_income_statement (symbol, date, research_development, account_charges, income_before_tax, minority_interest,
            net_income, general_admin, gross_profit, ebit, operating_income, other_operating_expense,
            interest_expense, extraordinary_items, non_recurring, other_items, income_tax_expense, total_revenue,
            total_operating_expense, cost_of_revenue, other_income_expense, discontinued_ops,
            net_income_continuing_ops, net_income_common_shares) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""",
            list)
            proddb.commit()

        end = datetime.today()
        dur_min = round((end - start).total_seconds() / 60, 2)
        print("Complete: Update Y balance sheet function took " + str(dur_min) + " minutes")


    @staticmethod
    def cash_flow():
        start = datetime.today()
        print("Starting function to update Y cash flow table...")

        for x in get_jsonparsed_data("https://financialmodelingprep.com/api/v3/company/stock/list").get('symbolsList'):
            symbol = x.get('symbol').upper()

            try:
                d = YahooFinancials(symbol).get_financial_stmts('quarterly', 'cash')
            except:
                continue

            if not d['cashflowStatementHistoryQuarterly'][symbol]:
                continue

            list = []

            for a in d['cashflowStatementHistoryQuarterly'][symbol]:
                for b in a.values():
                    investments = b.get('investments')
                    change_to_liabilities = b.get('changeToLiabilities')
                    change_to_investing_activities = b.get('totalCashflowsFromInvestingActivities')
                    net_borrowing = b.get('netBorrowings')
                    change_to_financing_activities = b.get('totalCashFromFinancingActivities')
                    change_to_operating_activities = b.get('changeToOperatingActivities')
                    issuance_of_stock = b.get('issuanceOfStock')
                    net_income = b.get('netIncome')
                    cash_in_cash = b.get('changeInCash')
                    repurchase_of_stock = b.get('repurchaseOfStock')
                    total_cash_operating_activities = b.get('totalCashFromOperatingActivities')
                    depreciation = b.get('depreciation')
                    cashflow_from_investing_activities = b.get('otherCashflowsFromInvestingActivities')
                    dividends_paid = b.get('dividendsPaid')
                    change_to_inventory = b.get('changeToInventory')
                    change_to_account_receivables = b.get('changeToAccountReceivables')
                    cashflow_from_financing_activities = b.get('otherCashflowsFromFinancingActivities')
                    change_to_net_income = b.get('changeToNetincome')
                    capital_expenditures = b.get('capitalExpenditures')

                for c in a:
                    date = c

                list.append((symbol, date, investments, change_to_liabilities, change_to_investing_activities,
                    net_borrowing, change_to_financing_activities, change_to_operating_activities, issuance_of_stock,
                    net_income, cash_in_cash, repurchase_of_stock, total_cash_operating_activities, depreciation,
                    cashflow_from_investing_activities, dividends_paid, change_to_inventory, change_to_account_receivables,
                    cashflow_from_financing_activities, change_to_net_income, capital_expenditures))

            prodcursor.executemany("""insert ignore into data.y_cash_flow (symbol, date, investments, change_to_liabilities, change_to_investing_activities,
                net_borrowing, change_to_financing_activities, change_to_operating_activities, issuance_of_stock,
                net_income, cash_in_cash, repurchase_of_stock, total_cash_operating_activities, depreciation,
                cashflow_from_investing_activities, dividends_paid, change_to_inventory, change_to_account_receivables,
                cashflow_from_financing_activities, change_to_net_income, capital_expenditures) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""",
                list)
            proddb.commit()

        end = datetime.today()
        dur_min = round((end - start).total_seconds() / 60, 2)
        print("Complete: Update Y cash flow function took " + str(dur_min) + " minutes")

    """
    *
    * METHODS TO UPDATE YAHOO DIVIDEND DATA
    * Include all methods to update dividend payout and dividend exdates
    *
    """
    @staticmethod
    def dividend(input = "update"):
        start = datetime.today()
        print("Starting function to update Y dividend table...")

        if input == 'new':
            s = '1990-01-01'
        elif input == 'update':
            s = str(date.today() - timedelta(days=1))
        else:
            s = str(date.today() - timedelta(days=1))

        for x in get_jsonparsed_data("https://financialmodelingprep.com/api/v3/company/stock/list").get('symbolsList'):
            symbol = x.get('symbol').upper()
            e = str(date.today())

            try:
                d = YahooFinancials(symbol).get_daily_dividend_data(s, e)
            except:
                continue

            if not d[symbol]:
                continue

            list = []

            for b in d[symbol]:
                date_input = b.get('formatted_date')
                amount = b.get('amount')

                list.append((symbol, date_input, amount))

            prodcursor.executemany("""insert ignore into data.y_dividend (symbol, date, amount) values (%s,%s,%s)""", list)
            proddb.commit()

        end = datetime.today()
        dur_min = round((end - start).total_seconds() / 60, 2)
        print("Complete: Update Y dividend function took " + str(dur_min) + " minutes")

    @staticmethod
    def exdividend():
        start = datetime.today()
        print("Starting function to update Y exdividend table...")

        ticker = get_jsonparsed_data("https://financialmodelingprep.com/api/v3/company/stock/list")

        prodcursor.execute("delete from data.y_exdividend")
        proddb.commit()

        for x in ticker.get('symbolsList'):
            symbol = x.get('symbol').upper()
            list = []

            try:
                exdiv_date = YahooFinancials(symbol).get_exdividend_date()
            except:
                continue

            if not exdiv_date or exdiv_date == '-':
                continue

            list.append((symbol, exdiv_date))

            prodcursor.executemany("""
                insert ignore into data.y_exdividend (symbol, exdiv_date) 
                values (%s,%s)""", list)
            proddb.commit()

        end = datetime.today()
        dur_min = round((end - start).total_seconds() / 60, 2)
        print("Complete: Update Y exdividend function took " + str(dur_min) + " minutes")

Related