Friday, October 23, 2020

Python Study Notes: how to load stock data, manipulate data, find patterns for profit?

#================================================

from pandas_datareader import data as pdr
#run the upgrade if see error: pandas_datareader, ImportError: cannot import name 'urlencode'
#pip install pandas-datareader --upgrade

#import fix_yahoo_finance as yf
#yf.pdr_override() # -- that's all it takes :-)

import matplotlib.pyplot as plt
#from matplotlib.finance import  candlestick2_ohlc
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import minmax_scale
from sklearn.preprocessing import quantile_transform

import numpy as np
#from pandas.io.data import DataReader
from pandas_datareader import DataReader
from datetime import timedelta
from stockstats import StockDataFrame
from datetime import datetime
import time
import matplotlib.pyplot as plt
import pandas as pd
import random
#import tabula

from pandasql import sqldf
pysql = lambda q: sqldf(q, globals())
#pysql = lambda q: sqldf(q, locals())

pd.set_option('display.max_rows', None) 
pd.set_option('display.max_columns', None)  
pd.set_option('display.max_colwidth', 150)
pd.set_option('display.width', 150)
#pd.reset_option('display.max_colwidth')

import datetime
date0=datetime.date.today()
date1=date0+pd.to_timedelta(1,unit='D')
date2=date0-pd.to_timedelta(11999,unit='D')

z0=pd.DataFrame(index=range(1))
curr0=pd.DataFrame(index=range(1))
summ_year=pd.DataFrame(index=range(1))
daily0=pd.DataFrame(index=range(1))
#symb='VXX'
#symb='^VIX'

#here is the code to see the change of VIX  
for symb in ('SPY','VXX','^VIX'):
    panel_data = pdr.get_data_yahoo(symb, start=date2, end=date1)
    time.sleep(1.5)
    panel_data.head()
    panel_data.tail()
    panel_data.shape
    list(panel_data)
    df0=pd.DataFrame(panel_data)
    num0=len(df0)
    list(df0.columns)
    df0['date2'] = df0.index
    df0.columns = ['high','low','open', 'close','volume','adj_close','date2'] 
    df0['ind']=np.where(df0.date2.isin(['2016-09-26','2012-10-03','2008-09-26','2004-09-30','2000-10-03','1996-10-07']),1,0)
    df0['on']=np.where(df0.date2.isin(['2016-11-09','2012-11-06','2008-11-05','2004-11-03','2000-11-08','1996-11-06']),1,0)
    df0['ind'].value_counts()
    df0['end']=np.where(df0.ind==1,df0.date2+pd.to_timedelta(50,unit='D'),df0.date2)
    df0['symb']=symb
    df1=df0[(df0.ind==1)][['symb','end','date2','close']]
    df2=pd.merge(df0,df1,left_on='symb',right_on='symb',how='inner')
    df2.head(20)
    df3=df2[((df2.end_x>=df2.end_y-pd.to_timedelta(50,unit='D')) & (df2.end_x

Here is the output you can see from SPY performance after the 1st presidential debate:

Here is the output you can see from VIX performance after the 1st presidential debate:
for symb in ('SPY','^VIX'): panel_data = pdr.get_data_yahoo(symb, start=date2, end=date1) time.sleep(1.5) panel_data.head() panel_data.tail() panel_data.shape list(panel_data) df0=pd.DataFrame(panel_data) num0=len(df0) list(df0.columns) df0['date2'] = df0.index df0.columns = ['high','low','open', 'close','volume','adj_close','date2'] ################################################## #file=r'C:\Users\skuang\Google_Drive\Python\data\LABU.csv' #test = pd.read_csv(file) #test.shape #test.tail() #test.columns=['date2','open','high','low','close','adj_close','volume'] #test['symbol']='LABU' #test.dtypes #df0=test[['date2','open','low','high','close','volume','symbol']][test['symbol']=='LABU'] #df0.dtypes import datetime df0['symb']=symb df0['weekday']=pd.to_datetime(df0['date2']).dt.weekday+1 df0['year']=pd.to_datetime(df0['date2']).dt.year df0['L9'] = df0['low'].rolling(window=9).min() df0['H9'] = df0['high'].rolling(window=9).max() df0['RSV'] = 100*((df0['close'] - df0['L9']) / (df0['H9'] - df0['L9']) ) df0['k'] = df0['RSV'].rolling(window=3).mean() df0['d'] = df0['k'].rolling(window=3).mean() #df0['close10'] = df0['close'].rolling(window=8).mean() df0['close22'] = df0['close'].rolling(window=22).mean() df0['close60'] = df0['close'].rolling(window=60).mean() df0['close250'] = df0['close'].rolling(window=250).mean() df0['change_22']=df0.close/df0.close22-1 df0['change_60']=df0.close/df0.close60-1 df0['change_250']=df0.close/df0.close250-1 # Converting date to pandas datetime format #df0['Date'] = pd.to_datetime(df['Date']) # Getting week number df0['week'] = df0['date2'].dt.week df0['gap_250']=round(df0.change_250/0.05,0)*0.05 df0['daily_change']=df0.close/df0.close.shift(1)-1 df0['daily_change1']=round(df0['daily_change'],2) df0['ret_mon_1']=df0.close.shift(-22)/df0.close-1 df0['ret_mon_2']=df0.close.shift(-45)/df0.close-1 df0['ret_mon_3']=df0.close.shift(-66)/df0.close-1 df0['ret_mon_6']=df0.close.shift(-130)/df0.close-1 df0['ret_mon_9']=df0.close.shift(-200)/df0.close-1 df0['ret_mon_12']=df0.close.shift(-255)/df0.close-1 df0['ret_mon_24']=df0.close.shift(-560)/df0.close-1 df0['ret_mon_36']=df0.close.shift(-765)/df0.close-1 df0['ret_mon_48']=df0.close.shift(-1020)/df0.close-1 df0['ret_mon_60']=df0.close.shift(-1270)/df0.close-1 df0[['date2','daily_change','daily_change1','close']].tail() df0.columns #check the return distribution after 22/45/66/130,250,500, 750,1000,1250 business days r0=df0.copy() r0[(r0.weekday==5)].shape ret_mean=r0[(r0.weekday==5)].groupby(['symb','gap_250'],as_index=False).agg({ "week": "count", "ret_mon_3": [("avg",'mean')],"ret_mon_6": [("avg",'mean')],"ret_mon_12": [("avg",'mean')], "ret_mon_24": [("avg",'mean')], "ret_mon_36": [("avg",'mean')], "ret_mon_48": [("avg",'mean')], "ret_mon_60": [("avg",'mean')] }) ret_mean.columns = ['_'.join(col) for col in ret_mean.columns] list(ret_mean.columns) ret_mean z0=z0.append(ret_mean) curr0=curr0.append(df0[( (df0.year>=2020) & (df0.weekday==5) )][['symb','date2','year','close', 'close250', 'change_22', 'change_60', 'change_250','gap_250']].tail(1)) df1 = StockDataFrame.retype(df0) macd=pd.DataFrame(df1.get('macd')) macd.head() daily0=daily0.append(df1[['symb','date2','week', 'weekday','year','open','low','high','close','daily_change1','k','d','macd','macds', 'macdh' ]]) # daily0=daily0.append(df0[ (df0.weekday==5) ][['symb','date2','week', 'weekday','year','close', 'close250', 'change_22', 'change_60', 'change_250','gap_250']]) #t1=df0[(df0.weekday==5) & (df0.week % 2 == 0 ) ][['date2','symb','close', 'week', 'weekday', 'year', 'close60', 'close250', 'change_250']] #t1['upper250']=np.where(t1.change_250< -0.099, -t1.change_250*10,1) t1=df0[['symb','date2','week', 'weekday','year','close', 'gap_250','daily_change','daily_change1']] t1['flag']=np.where(t1['daily_change']<-0.01, 1,0) t1['input']=np.where(t1['daily_change']<-0.01,abs(t1['daily_change1'])*50000,0) t1.tail(10) t1a=t1.groupby(by=['year'],as_index=False).agg({"input":"sum"}).sort_values('year') t1a #t1['input']=1000*t1['upper250'] # t1['input']=1000 t1['share']=t1.input/t1.close #rollnum=26*5 rollnum=250*15 t1['shares'] = t1['share'].rolling(min_periods=1,window=rollnum).sum() t1['cost'] = t1['input'].rolling(min_periods=1,window=rollnum).sum() t1['shares_year_early']=t1['shares'].shift(250) t1['value_now']=t1['shares'] *t1.close t1['value_change']=t1['value_now']-t1.cost t1['pcnt_gain']=t1['value_change']/t1['cost'] t1['cummu_return_per_year']=np.power(t1['pcnt_gain']+1,1/15)-1 t1['value_b4']=(t1['value_now'].shift(250)) t1['gl_peryear']=(t1['value_now']-t1['value_now'].shift(250)).fillna(1).astype(int).apply(lambda x: f'{x:,}') t1['value_now']=t1['value_now'].fillna(1).astype(int).apply(lambda x: f'{x:,}') t1['value_b4']=t1['value_b4'].fillna(1).astype(int).apply(lambda x: f'{x:,}') t1['shares']=t1['shares'].fillna(1).astype(int).apply(lambda x: f'{x:,}') t1['value_change']=t1['value_change'].fillna(1).astype(int).apply(lambda x: f'{x:,}') t1.tail() t1[(t1.week==2) & (t1.weekday==5)].tail(30)[['symb','year','shares','cost', 'value_now','value_change','pcnt_gain','cummu_return_per_year']] #t1[(t1.week==2)].tail(17).pcnt_gain.describe() t1[(t1.week==14)].tail(30)[['symb','year','shares','cost', 'value_now','value_change','pcnt_gain','value_b4', 'gl_peryear']] summ_year=summ_year.append(t1[(t1.week==2) &(t1.cost>50000)].tail(30)[['symb','year','shares','cost', 'value_now','value_change','pcnt_gain','value_b4', 'gl_peryear']]) print('size for symbol is ',symb, ' : ',ret_mean.shape) z0.head() z0.tail() daily0.shape #================================================

No comments:

Post a Comment

Python Study notes: example of using H2O/AutoML

here is the instruction to install H2O in python: Use H2O directly from Python 1. Prerequisite: Python 2.7.x, 3.5.x, or 3.6.x 2. Insta...