Source code for plot_utils.time_series

# -*- coding: utf-8 -*-

import numpy as np
import pandas as pd
import datetime as dt
import matplotlib as mpl
import matplotlib.pyplot as plt
from distutils.version import LooseVersion

# Explicitly register matplotlib converters:
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

from . import helper as hlp
from . import colors_and_lines as cl

#%%============================================================================
[docs]def plot_timeseries( time_series, date_fmt=None, fig=None, ax=None, figsize=(10,3), dpi=100, xlabel='Time', ylabel=None, label=None, color=None, lw=2, ls=None, marker=None, fontsize=12, xgrid_on=True, ygrid_on=True, title=None, zorder=None, alpha=1.0, month_grid_width=None, ): ''' Plot time series (i.e., values a function of dates). You can plot multiple time series by supplying a multi-column pandas Dataframe, but you cannot use custom line specifications (colors, width, and styles) for each time series. It is recommended to use :func:`~plot_multiple_timeseries` in stead. Parameters ---------- time_series : pandas.Series or pandas.DataFrame A pandas Series, with index being date; or a pandas DataFrame, with index being date, and each column being a different time series. date_fmt : str Date format specifier, e.g., '%Y-%m' or '%d/%m/%y'. fig : matplotlib.figure.Figure or ``None`` Figure object. If None, a new figure will be created. ax : matplotlib.axes._subplots.AxesSubplot or ``None`` Axes object. If None, a new axes will be created. figsize: (float, float) Figure size in inches, as a tuple of two numbers. The figure size of ``fig`` (if not ``None``) will override this parameter. dpi : float Figure resolution. The dpi of ``fig`` (if not ``None``) will override this parameter. xlabel : str Label of X axis. Usually "Time" or "Date". ylabel : str Label of Y axis. Usually the meaning of the data, e.g., "Gas price [$]". label : str Label of data, for plotting legends. color : list<float> or str Color of line. If None, let Python decide for itself. xgrid_on : bool Whether or not to show vertical grid lines (default: ``True``). ygrid_on : bool Whether or not to show horizontal grid lines (default: ``True``). title : str Figure title (optional). zorder : float Set the zorder for lines. Higher zorder are drawn on top. alpha : float Opacity of the line. month_grid_width : float the on-figure "horizontal width" that each time interval occupies. This value determines how X axis labels are displayed (e.g., smaller width leads to date labels being displayed with 90 deg rotation). Do not change this unless you really know what you are doing. Returns ------- fig : matplotlib.figure.Figure The figure object being created or being passed into this function. ax : matplotlib.axes._subplots.AxesSubplot The axes object being created or being passed into this function. See also -------- :func:`~plot_multiple_timeseries` : Plot multiple time series, with the ability to specify different line specifications for each line. ''' if not isinstance(time_series, (pd.Series, pd.DataFrame)): raise TypeError('`time_series` must be a pandas Series or DataFrame.') fig, ax = hlp._process_fig_ax_objects(fig, ax, figsize, dpi) ax_size = hlp._get_ax_size(fig, ax) ts = time_series.copy() # shorten the name + avoid changing input ts.index = _as_date(ts.index, date_fmt) # batch-convert index to Timestamp format of pandas if zorder: ax.plot( ts.index, ts, color=color, lw=lw, ls=ls, marker=marker, label=label, zorder=zorder, alpha=alpha, ) else: ax.plot( ts.index, ts, color=color, lw=lw, ls=ls, marker=marker, label=label, alpha=alpha, ) ax.set_label(label) # set label for legends using argument 'label' if xlabel: ax.set_xlabel(xlabel) if ylabel: ax.set_ylabel(ylabel) if month_grid_width == None: # width of each month in inches month_grid_width = float(ax_size[0])/_calc_month_interval(ts.index) ax = _format_xlabel(ax,month_grid_width) if ygrid_on == True: ax.yaxis.grid(ls=':', color=[0.75]*3) if xgrid_on == True: ax.xaxis.grid(False, 'major') ax.xaxis.grid(xgrid_on, 'minor', ls=':', color=[0.75]*3) ax.set_axisbelow(True) if title is not None: ax.set_title(title) for o in fig.findobj(mpl.text.Text): o.set_fontsize(fontsize) return fig, ax
#%%============================================================================
[docs]def plot_multiple_timeseries( multiple_time_series, show_legend=True, fig=None, ax=None, figsize=(10,3), dpi=100, ncol_legend=5, **kwargs, ): ''' Plot multiple time series. Note that setting keyword arguments such as ``color`` or ``linestyle`` will force all time series to have the same color or line style. So we recommend letting this function generate distinguishable line specifications (color/ linestyle/linewidth combinations) by itself. (Although the more time series, the less the distinguishability. 240 time series or less is recommended.) Parameters ---------- multiple_time_series : pandas.DataFrame or pandas.Series If it is a pandas DataFrame, its index is the date, and each column is a different time series. If it is a pandas Series, it will be internally converted into a 1-column pandas DataFrame. fig : matplotlib.figure.Figure or ``None`` Figure object. If None, a new figure will be created. ax : matplotlib.axes._subplots.AxesSubplot or ``None`` Axes object. If None, a new axes will be created. figsize: (float, float) Figure size in inches, as a tuple of two numbers. The figure size of ``fig`` (if not ``None``) will override this parameter. dpi : float Figure resolution. The dpi of ``fig`` (if not ``None``) will override this parameter. ncol_legend : int Number of columns of the legend. **kwargs : Other keyword arguments to be passed to :func:`~plot_timeseries()`, such as color, marker, fontsize, alpha, etc. Returns ------- fig : matplotlib.figure.Figure The figure object being created or being passed into this function. ax : matplotlib.axes._subplots.AxesSubplot The axes object being created or being passed into this function. See also -------- :func:`~plot_timeseries` : Plot a single set of time series. ''' if not isinstance(multiple_time_series, (pd.Series, pd.DataFrame)): raise TypeError( '`multiple_time_series` must be a pandas Series or DataFrame.' ) fig, ax = hlp._process_fig_ax_objects(fig, ax, figsize, dpi) if not show_legend: # if no need to show legends, just pass everything fig, ax = plot_timeseries(multiple_time_series, fig, ax, dpi, **kwargs) else: if isinstance(multiple_time_series,pd.Series): nr_timeseries = 1 multiple_time_series = pd.DataFrame(multiple_time_series,copy=True) else: nr_timeseries = multiple_time_series.shape[1] if nr_timeseries <= 40: # 10 colors x 4 linestyles = 40, so use lw=2 linespecs = cl.get_linespecs(range_linewidth=[2]) elif nr_timeseries <= 120: # need multiple line widths linespecs = cl.get_linespecs(range_linewidth=[1,3,5]) elif nr_timeseries <= 240: linespecs = cl.get_linespecs( color_scheme='tab20', range_linewidth=[1,3,5], ) else: linespecs = cl.get_linespecs( color_scheme='tab20', # use more line widths range_linewidth=range(1, (nr_timeseries - 1) // 240 + 5, 2), ) for j in range(nr_timeseries): tmp_dict = linespecs[j % nr_timeseries].copy() tmp_dict.update(kwargs) # kwargs overwrites tmp_dict if key already exists in tmp_dict if 'lw' in tmp_dict: # thinner lines above thicker lines zorder = 1 + 1.0/tmp_dict['lw'] # and "+1" to put all lines above grid line plot_timeseries( multiple_time_series.iloc[:,j], fig=fig, ax=ax, zorder=zorder, label=multiple_time_series.columns[j], **tmp_dict, ) if 'title' not in kwargs: bbox_anchor_loc = (0., 1.02, 1., .102) else: bbox_anchor_loc = (0., 1.08, 1., .102) ax.legend( bbox_to_anchor=bbox_anchor_loc, loc='lower center', ncol=ncol_legend, ) ax.set_axisbelow(True) return fig, ax
#%%============================================================================
[docs]def fill_timeseries( time_series, upper_bound, lower_bound, date_fmt=None, fig=None, ax=None, figsize=(10,3), dpi=100, xlabel='Time', ylabel=None, line_label=None, shade_label=None, color='orange', lw=3, ls='-', fontsize=12, title=None, xgrid_on=True, ygrid_on=True, ): ''' Plot time series as a line and then plot the upper and lower bounds as shaded areas. Parameters ---------- time_series : pandas.Series A pandas Series, with index being date. upper_bound : pandas.Series Upper bounds of the time series, must have the same length as ``time_series``. lower_bound : pandas.Series Lower bounds of the time series, must have the same length as ``time_series``. date_fmt : str Date format specifier, e.g., '%Y-%m' or '%d/%m/%y'. fig : matplotlib.figure.Figure or ``None`` Figure object. If None, a new figure will be created. ax : matplotlib.axes._subplots.AxesSubplot or ``None`` Axes object. If None, a new axes will be created. figsize: (float, float) Figure size in inches, as a tuple of two numbers. The figure size of ``fig`` (if not ``None``) will override this parameter. dpi : float Figure resolution. The dpi of ``fig`` (if not ``None``) will override this parameter. xlabel : str Label of X axis. Usually "Time" or "Date". ylabel : str Label of Y axis. Usually the meaning of the data (e.g., "Gas price [$]"). line_label : str Label of the line, for plotting legends. shade_label : str Label of the shade, for plotting legends. color : str or list or tuple Color of line. If None, let Python decide for itself. lw : scalar Line width of the line that represents time_series. ls : str Line style of the line that represents time_series. fontsize : scalar Font size of the texts in the figure. title : str Figure title. xgrid_on : bool Whether or not to show vertical grid lines (default: ``True``). ygrid_on : bool Whether or not to show horizontal grid lines (default: ``True``). Returns ------- fig : matplotlib.figure.Figure The figure object being created or being passed into this function. ax : matplotlib.axes._subplots.AxesSubplot The axes object being created or being passed into this function. ''' if not isinstance(time_series, pd.Series): raise TypeError( '`time_series` must be a pandas Series with index being dates.' ) fig, ax = hlp._process_fig_ax_objects(fig, ax, figsize, dpi) ts = time_series.copy() # shorten the name + avoid changing some_time_series ts.index = _as_date(ts.index, date_fmt) # batch-convert index to Timestamp format of pandas lb = lower_bound.copy() ub = upper_bound.copy() ax.fill_between( ts.index, lb, ub, color=color, facecolor=color, linewidth=0.01, alpha=0.25, interpolate=True, label=shade_label, ) ax.plot(ts.index, ts, color=color, lw=lw, ls=ls, label=line_label) if xlabel: ax.set_xlabel(xlabel) if ylabel: ax.set_ylabel(ylabel) month_grid_width = float(figsize[0])/_calc_month_interval(ts.index) # width of each month in inches ax = _format_xlabel(ax, month_grid_width) if ygrid_on == True: ax.yaxis.grid(ygrid_on, ls=':', color=[0.75]*3) if xgrid_on == True: ax.xaxis.grid(False, 'major') ax.xaxis.grid(xgrid_on, 'minor', ls=':', color=[0.75]*3) ax.set_axisbelow(True) if title is not None: ax.set_title(title) for o in fig.findobj(mpl.text.Text): o.set_fontsize(fontsize) return fig, ax
#%%============================================================================ def _calc_month_interval(date_array): ''' Calculate how many months are there between the first month and the last month of the given date_array. ''' date9 = list(date_array)[-1] date0 = list(date_array)[0] delta_days = (date9 - date0).days if delta_days < 30: # within one month delta_months = delta_days/30.0 # return a float between 0 and 1 else: delta_months = delta_days//30 return delta_months #%%============================================================================ def _format_xlabel(ax, *args): locator = mpl.dates.AutoDateLocator() formatter = mpl.dates.ConciseDateFormatter(locator) ax.xaxis.set_major_locator(locator) ax.xaxis.set_major_formatter(formatter) ax.tick_params(labelright=True) # also show y axis on right edge of figure return ax #%%============================================================================ def _as_date(raw_date, date_fmt=None): ''' Convert raw_date to datetime array. It can handle: (A) A list of str, int, or float, such as: [1] ['20150101', '20150201', '20160101'] [2] ['2015-01-01', '2015-02-01', '2016-01-01'] [3] [201405, 201406, 201407] [4] [201405.0, 201406.0, 201407.0] (B) A list of just a single element, such as: [1] [201405] [2] ['2014-05-25'] [3] [201412.0] (C) A single element of: str, int, float, such as: [1] 201310 [2] 201210.0 (D) A pandas Series, of length 1 or length larger than 1 (E) A list of Python datetime object Parameters ---------- raw_date : (see above for acceptable formats) The raw date information to be processed date_fmt : str The format of each individual date entry, e.g., '%Y-%m-%d' or '%m/%d/%y'. To be passed directly to pd.to_datetime() (https://pandas.pydata.org/pandas-docs/stable/generated/pandas.to_datetime.html) Returns ------- date_list : A variable with the same structure (list or scaler-like) as raw_date, whose contents have the data type "pandas._libs.tslib.Timestamp". Reference --------- https://docs.python.org/2/library/datetime.html#strftime-strptime-behavior ''' if LooseVersion(pd.__version__) <= LooseVersion('0.17.1'): timestamp_type = pd.tslib.Timestamp else: timestamp_type = pd._libs.tslib.Timestamp if isinstance(raw_date,timestamp_type): # if already a pandas Timestamp obj date_list = raw_date # return raw_date as is else: # ----------- Convert to list for pd.Series or np.ndarray objects ------- if isinstance(raw_date,(pd.Series,np.ndarray,pd.Index)): raw_date = list(raw_date) # ---------- Element-wise checks and conversion ------------------------- if isinstance(raw_date,list): # if input is a list if len(raw_date) == 0: # empty list date_list = None # return an empty object elif len(raw_date) == 1: # length of string is 1 date_ = str(int(raw_date[0])) # unpack and convert to str date_list = pd.to_datetime(date_, format=date_fmt) else: # length is larger than 1 nr = len(raw_date) date_list = [[None]] * nr for j in range(nr): # loop every element in raw_date j_th = raw_date[j] if isinstance(j_th, str) and j_th.isdigit(): date_ = str(int(j_th)) elif isinstance(j_th, str) and not j_th.isdigit(): date_ = j_th elif isinstance(j_th,(int,np.integer,np.float)): date_ = str(int(j_th)) # robustness not guarenteed! elif isinstance(j_th, dt.datetime): date_ = j_th.strftime('%Y-%m-%d') else: raise TypeError('Invalid data type in `raw_date') date_list[j] = pd.to_datetime(date_, format=date_fmt) elif type(raw_date) == dt.date: # if a datetime.date object date_list = raw_date # no need for conversion elif isinstance(raw_date, hlp._scalar_like): date_ = str(int(raw_date)) date_list = pd.to_datetime(date_, format=date_fmt) elif isinstance(raw_date, str): # a single string, such as '2015-04' date_ = raw_date # no conversion needed date_list = pd.to_datetime(date_, format=date_fmt) else: raise TypeError('Input data type of `raw_date` not recognized.') print('\ntype(raw_date) is: %s' % type(raw_date)) try: print('Length of raw_date is: %s' % len(raw_date)) except TypeError: print('raw_date has no length.') return date_list #%%============================================================================ def _str2date(date_): ''' Convert date_ into a datetime object. date_ must be a string (not a list of strings). Currently accepted date formats: (1) Aug-2014 (2) August 2014 (3) 201407 (4) 2016-07 (5) 2015-02-21 Note: This subroutine is no longer being used. ''' day = None if ('-' in date_) and (len(date_) == 8): # for date style 'Aug-2014' month, year = date_.split('-') # split string by character month = dt.datetime.strptime(month,'%b').month # from 'Mar' to '3' elif ' ' in date_: # for date style 'August 2014' month, year = date_.split(' ') # split string by character month = dt.datetime.strptime(month,'%B').month # from 'March' to '3' year = int(year) elif (len(date_) == 6) and date_.isdigit(): # for cases like '201205' year = int(date_[:4]) # first four characters month = int(date_[4:]) # remaining characters elif (len(date_) == 7) and (date_[4]=='-') and not date_.isdigit(): # such as '2015-03' [NOT 100% ROBUST!] year, month = date_.split('-') year = int(year) month = int(month) elif (len(date_) == 10) and not date_.isdigit(): # such as '2012-02-01' [NOT 100% ROBUST!!] year, month, day = date_.split('-') # split string by character year = int(year) month = int(month) day = int(day) elif (len(date_)==6) and (date_[3]=='-') and (date_[:3].isalpha()) \ and (date_[4:].isdigit()): # such as 'May-12' month, year = date_.split('-') month = dt.datetime.strptime(month,'%b').month # from 'Mar' to '3' year = int(year) + 2000 # from '13' to '2013' else: print('***** Edge case encountered! (Date format not recognized.) *****') print('\nUser supplied %s, which is not recognized.\n' % date_) if day is None: # if day is not defined in the if statements return dt.date(year,month,1) else: return dt.date(year,month,day)