Tuesday, March 24, 2020

Python study notes 10: Plot all kinds of graphs in Python

How do we simply plot by histogram?
How do we plot with 2nd y axis?
How do we scatter plot by group in python?
How do we plot heat map by state county in US in python?


Question: How do we simply plot by histogram? and plot with 2nd y axis?
#===========================================================
#simple line plot with auto legend
import matplotlib.pyplot as plt
plt.figure(figsize=(10,8))
plt.plot(data1.gap_250_,data1.ret_mon_12_avg12, '*-',color='green')
plt.plot(data1.gap_250_,data1.ret_mon_24_avg24, 'v-',color='green')
plt.plot(data1.gap_250_,data1.ret_mon_36_avg36, '^-',color='red')
plt.plot(data1.gap_250_,data1.ret_mon_48_avg48, 's-',color='yellow')
plt.plot(data1.gap_250_,data1.ret_mon_60_avg60, 'o-',color='blue')

legend = plt.legend(loc='upper center', shadow=True, fontsize='x-large')
# Put a nicer background color on the legend.
legend.get_frame().set_facecolor('C0')

plt.show()
#==============

#simply plot by histogram
plt.figure(figsize=(10, 7))
series1.hist()
plt.show()

#or use kind=bar for the histogram
plt.figure(figsize=(10, 7));
df.iloc[5].plot(kind='bar');

#use kde plot for density plot:  
ser = pd.Series(np.random.randn(1000))
ser.plot.kde()

f4, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 5))
f4.tight_layout()
series1.hist(ax=ax1)
ax1.set_title('title1');
series2.hist(ax=ax2)
ax2.set_title('title2');
plt.show()
#===========================================================

Plot with 2nd x-axis:
#===========================================================
import matplotlib.pyplot as plt
figure, ax1 = plt.subplots(figsize=(20,10))
ax2 = ax1.twinx()
ax1.plot(df3.row1, df3.close_std, 'g-')
ax2.plot(df3.row1, df3.close_change, 'b-')
#===========================================================

Question: How do we scatter plot by group in python?
#===========================================================
##simply try the plot overlay first 
import matplotlib.pyplot as plt
plt.figure(figsize=(10,8))
plt.scatter(data1.var_x,data1.var_y1,color='green',marker='.')
plt.scatter(data1.var_x,data1.var_y2,color='red',marker='d')
plt.scatter(data1.var_x,data1.var_y2,color='blue',marker='o')
plt.scatter(data1.var_x,data1.var_y2,color='k',marker='*')
plt.show()
#=========================================================== 
#Method 1: use matplotlib to scatter plot by group 
import matplotlib.pylab as plt
groups = data_input.groupby('plot_by_this_var_group')
fig,ax = plt.subplots()  #==fig is the parent figure
fig, [[ax1, ax2], [ax3, ax4]] = plt.subplots(nrows=2, ncols=2,figsize=(20, 10))
#== output to 2*2 plots
fig, [ax1, ax2, ax3, ax4] = plt.subplots(nrows=1, ncols=4)
#== output to 1 row *4 columns plots

ax.margins(0.05) # Optional, just adds 5% padding to the autoscaling
for name, group in groups:
    ax.plot(group.x_axis_name, group.y_axis_name, marker='.', 
    linestyle='', ms=12, label=name)
ax.legend()
plt.show()
 
#===========================================================    
Method 2: use seaborn package: a graphic library built on top of Matplotlib 
import seaborn as sns
import matplotlib.pylab as plt
sns.lmplot( x="x_axis_name", y="y_axis_name",palette=["red", "blue", "black", "orange", 
"pupple"], data=data_input, fit_reg=False, hue='plot_by_this_var_group', legend=False)
plt.legend(loc='lower right')
plt.show()
#===========================================================

Simple histogram plot to see the data distribution:
#===========================================================
plt.figure(figsize=(6,4))
plt.hist(train.category, bins=25, density=True, alpha=0.6, color='g')
plt.title("The distribution of the train: categorial variable")
plt.show()  
#===========================================================

We can also fit the one dimensional data via normal distribution:
#===========================================================
data=dataname
var='varname'
from scipy.stats import norm
import matplotlib.pyplot as plt
plt.figure(figsize=(10,8))
# Fit a normal distribution to the data:
mu, std = norm.fit(data[var])
# Plot the histogram.
plt.hist(data[var], bins=25, density=True, alpha=0.6, color='g')
# Plot the PDF.
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, mu, std)
plt.plot(x, p, 'r', linewidth=2)
title = "var : Avg = %.2f,  std = %.2f" % (mu, std)
plt.title(title)

plt.show() 
#===========================================================

Question: How do we plot candlestick for stock data in python?
#===========================================================
import random
from datetime import date
from datetime import timedelta
date=datetime.today()
pdffile=r'C:\Users\directory\test1.pdf'

for x in range(10):
  ran_num=random.randint(30,4001)
  ran_num
  start = date-timedelta(ran_num)
  start
  end=start+timedelta(59)
  end
  quotes = test[(pd.to_datetime(test['date1']) >= start) &(test['symbol']==Symbol)
  & (pd.to_datetime(test['date1'])<= end) ] 
  
  fig, ax = plt.subplots()
  candlestick2_ohlc(ax,quotes['open'],quotes['high'],quotes['low'],
  quotes['close'],colorup='g', colordown='r',width=.8)
  plt.show()
  fig.savefig(pdffile, bbox_inches='tight')
#===========================================================

What if we want to have mulitple plot into several pdf pages in one pdf file? we have to use the package: backend_pdf.PdfPages
#===========================================================
pdffile=r'C:\Users\...\Python\data\test1.pdf'
import matplotlib.backends.backend_pdf
pdf = matplotlib.backends.backend_pdf.PdfPages(pdffile)

for x in range(2):
    ran_num=random.randint(30,4001)
    ran_num
    day_start = date-timedelta(ran_num)
    day_end=day_start+timedelta(59)
    day_end1=day_start+timedelta(69)
    quotes = pred[(pd.to_datetime(pred['date1']) >= day_start) 
       & (pd.to_datetime(pred['date1'])<= day_end) & (pred['symbol']==Symbol) ] 
    quotes=quotes.reset_index(drop=True)
    quotes1 = pred[(pd.to_datetime(pred['date1']) >= day_start) 
       & (pd.to_datetime(pred['date1'])<= day_end1) & (pred['symbol']==Symbol) ] 
    quotes1=quotes1.reset_index(drop=True)
    
    fig1, [[ax1, ax2], [ax3, ax4]] = plt.subplots(nrows=2, ncols=2,figsize=(20, 16))
    #== output to 2*2 plots
    
    ax1.plot(quotes.index,quotes.k,color='green',marker='d',linestyle='-')
    ax1.plot(quotes.index,quotes.d,color='red',marker='.',linestyle='dashed')
    ax1.plot(quotes.index,quotes.k_20,color='c',linestyle='--')
    ax1.plot(quotes.index,quotes.k_80,color='c',linestyle='--')
    ax1.set_ylabel('K D', color='g')
    ax1.title('This is Randome Test {num} '.format(num=x+1))

    ax2.set_ylabel('MACD', color='b')
    ax2.plot(quotes.index,quotes.macd,color='green',marker='d',linestyle='-')
    ax2.plot(quotes.index,quotes.macds,color='red',marker='.',linestyle='dashed')
    ax2.plot(quotes.index,quotes.macdh,color='c',linestyle='--')
    ax2.plot(quotes.index,quotes.ref_0,color='c',linestyle='--')
    
    candlestick2_ohlc(ax4,quotes['open'],quotes['high'],quotes['low'],
                  quotes['close'],colorup='g', colordown='r',width=.8)
    ax4.plot(quotes.index,quotes.close10,color='r',marker='.',linestyle='-')
    ax4.plot(quotes.index,quotes.close22,color='k',marker='.',linestyle='-')
    ax4.plot(quotes.index,quotes.close60,color='b',marker='.',linestyle='-')
    plt.show()
    
    ax1.plot(quotes1.index,quotes1.k,color='green',marker='d',linestyle='-')
    ax1.plot(quotes1.index,quotes1.d,color='red',marker='.',linestyle='dashed')
    ax1.plot(quotes1.index,quotes1.k_20,color='c',linestyle='--')
    ax1.plot(quotes1.index,quotes1.k_80,color='c',linestyle='--')
    ax1.title('This is Randome Validataion {num} '.format(num=x+1))
    ax1.set_ylabel('K D', color='g')
    
    ax2.set_ylabel('MACD', color='b')
    ax2.plot(quotes1.index,quotes1.macd,color='green',marker='d',linestyle='-')
    ax2.plot(quotes1.index,quotes1.macds,color='red',marker='.',linestyle='dashed')
    ax2.plot(quotes1.index,quotes1.macdh,color='c',linestyle='--')
    ax2.plot(quotes1.index,quotes1.ref_0,color='c',linestyle='--')
    
    ax3.plot(quotes1.index,quotes1.k_d,color='black',linestyle='--')
    ax3.plot(quotes1.index,quotes1.ref_0,color='c',linestyle='--')
    #plt.scatter(pred.index,pred.var_y2,color='blue',marker='o')
    #plt.scatter(pred.index,pred.var_y2,color='k',marker='*')
    ax3.set_ylabel('K_D diff', color='black')
    
    candlestick2_ohlc(ax4,quotes1['open'],quotes1['high'],quotes1['low'],
               quotes1['close'],colorup='g', colordown='r',width=.8)
    ax4.plot(quotes1.index,quotes1.close10,color='r',marker='.',linestyle='-')
    ax4.plot(quotes1.index,quotes1.close22,color='k',marker='.',linestyle='-')
    ax4.plot(quotes1.index,quotes1.close60,color='b',marker='.',linestyle='-')
    plt.show()
    
    #fig.savefig(pdffile, bbox_inches='tight')
    for fig in (fig1,fig3): ## will open an empty extra figure :(
        pdf.savefig( fig )

pdf.close() 
#===========================================================

How do we plot heat map by US state or counties in Python?
First to install the package from the commind line:
conda install -c anaconda plotly
conda install -c plotly plotly-geo
conda install -c plotly plotly-orca

#===========================================================
#here is an example plot by state in US:
import gzip,  pickle #use gzip to compress the file.
import _pickle as cPickle #cPickle is much more fast     

import plotly.graph_objects as go

# Load data frame and tidy it.
import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_us_ag_exports.csv')
df.shape
df.head()

file=r'/Users/*****/onedrive/data_dictionary/state_data'
with gzip.open(file, 'wb') as f_model_pkl:
    pickle.dump(df, f_model_pkl)
    
file=r'/Users/*****/onedrive/data_dictionary/state_data'
test1 = cPickle.load(gzip.open(file, 'rb'), encoding="ASCII")  
test1.head()    

fig = go.Figure(data=go.Choropleth(
    locations=df['code'], # Spatial coordinates
    z = df['total exports'].astype(float), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Reds',
    colorbar_title = "Millions USD",
))

fig.update_layout(
    title_text = '2011 US Agriculture Exports by State',
    geo_scope='usa', # limite map scope to USA
)

fig.show(renderer="jpg")
#usually we just use fig.show(), sometimes nothing showing up. 

#to see all different kind of renderers
import plotly.io as pio
pio.renderers
pio.renderer='notebook'
#===========================================================

Here is another example plotting graph by counties in US:
#===========================================================
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)
#counties is the dict here gives the boundaries. 

file=r'/Users/****/onedrive/data_dictionary/county_data'
with gzip.open(file, 'wb') as f_model_pkl:
    pickle.dump(counties, f_model_pkl)
    
file=r'/Users/****/onedrive/data_dictionary/county_data'
counties = cPickle.load(gzip.open(file, 'rb'), encoding="ASCII")   

import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/fips-unemp-16.csv",
                   dtype={"fips": str})
df.head()
#df has two columns: fips, unemp
import plotly.express as px

fig = px.choropleth(df, geojson=counties, locations='fips', color='unemp',
                           color_continuous_scale="Viridis",
                           range_color=(0, 12),
                           scope="usa",
                           labels={'unemp':'unemployment rate'}
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show(renderer="jpg")
#===========================================================


No comments:

Post a Comment

GCP Study Notes 4: GCP Big Data and Machine Learning Fundamentals (coursera notes)

In this lab, you will: Create Cloud SQL instance Create database tables by importing .sql files from Cloud Storage Populate the tables b...