# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection
from . import helper as hlp
#%%============================================================================
[docs]def plot_ranking(
ranking, fig=None, ax=None, figsize='auto', dpi=100,
barh=True, top_n=None, score_ax_label=None, name_ax_label=None,
invert_name_ax=False, grid_on=True,
):
'''
Plot rankings as a bar plot (in descending order), such as::
^
|
dolphin |||||||||||||||||||||||||||||||
|
cat |||||||||||||||||||||||||
|
rabbit ||||||||||||||||
|
dog |||||||||||||
|
-|------------------------------------> Age of pet
0 1 2 3 4 5 6 7 8 9 10 11
Parameters
----------
ranking : dict or pandas.Series
The ranking information, for example:
{'rabbit': 5, 'cat': 8, 'dog': 4, 'dolphin': 10}
It does not need to be sorted externally.
fig : matplotlib.figure.Figure or ``None``
Figure object. If None, a new figure will be created.
ax : matplotlib.axes._subplots.AxesSubplot or ``None``
Axes object. If None, a new axes will be created.
figsize: (float, float)
Figure size in inches, as a tuple of two numbers. The figure
size of ``fig`` (if not ``None``) will override this parameter.
dpi : float
Figure resolution. The dpi of ``fig`` (if not ``None``) will override
this parameter.
barh : bool
Whether or not to show the bars as horizontal (otherwise, vertical)
top_n : int
If ``None``, show all categories. ``top_n`` > 0 means showing the
highest ``top_n`` categories. ``top_n`` < 0 means showing the lowest
|``top_n``| categories.
score_ax_label : str
Label of the score axis (e.g., "Age of pet").
name_ax_label : str
Label of the "category name" axis (e.g., "Pet name").
invert_name_ax : bool
Whether to invert the "category name" axis. For example, if
``invert_name_ax`` is ``False``, then higher values are shown on the
top if ``barh`` is ``True``.
grid_on : bool
Whether or not to show grids on the plot.
Returns
-------
fig : matplotlib.figure.Figure
The figure object being created or being passed into this function.
ax : matplotlib.axes._subplots.AxesSubplot
The axes object being created or being passed into this function.
'''
if not isinstance(ranking, (dict, pd.Series)):
raise TypeError('`ranking` must be a Python dict or pandas Series.')
if top_n is not None and not isinstance(top_n, (int, np.integer)):
raise ValueError('`top_n` must be an integer of None.')
if top_n == None:
nr_classes = len(ranking)
top_n = len(ranking)
else:
nr_classes = np.abs(top_n)
if figsize == 'auto':
if barh:
figsize = (5, nr_classes * 0.26) # 0.26 inch = height for each category
else:
figsize = (nr_classes * 0.26, 5)
fig, ax = hlp._process_fig_ax_objects(fig, ax, figsize, dpi)
if isinstance(ranking,dict):
ranking = pd.Series(ranking)
if barh:
kind = 'barh'
xlabel, ylabel = score_ax_label, name_ax_label
ax = ranking.sort_values(
ascending=(top_n >= 0)
).iloc[-np.abs(top_n):].plot(kind=kind, ax=ax)
else:
kind = 'bar'
xlabel, ylabel = name_ax_label, score_ax_label
ax = ranking.sort_values(
ascending=(top_n < 0)
).iloc[:np.abs(top_n) if top_n != 0 else None].plot(
kind=kind, ax=ax,
)
if invert_name_ax:
if barh is True:
ax.invert_yaxis()
else:
ax.invert_xaxis()
if xlabel: ax.set_xlabel(xlabel)
if ylabel: ax.set_ylabel(ylabel)
if grid_on:
ax.grid(ls=':')
ax.set_axisbelow(True)
return fig, ax
#%%============================================================================
[docs]def plot_with_error_bounds(
x, y, upper_bound, lower_bound,
fig=None, ax=None, figsize=None, dpi=100,
line_color=[0.4]*3, shade_color=[0.7]*3,
shade_alpha=0.5, linewidth=2.0, legend_loc='best',
line_label='Data', shade_label='$\mathregular{\pm}$STD',
logx=False, logy=False, grid_on=True,
):
'''
Plot a graph with one line and its upper and lower bounds, with areas between
bounds shaded. The effect is similar to this illustration below::
y ^ ... _____________________
| ... .......... | |
| . ______ . | --- Mean value |
| ... / \ .. | ... Error bounds |
| ... ___/ \ ... |_____________________|
| . / ... \ ........
| . __/ ... .... \________ .
| / .... ... \ .
| / .... ..... \_
| ... ..........
-|---------------------------------------> x
Parameters
----------
x : list, numpy.ndarray, or pandas.Series
X data points to be plotted as a line.
y : list, numpy.ndarray, or pandas.Series
Y data points to be plotted as a line.
fig : matplotlib.figure.Figure or ``None``
Figure object. If None, a new figure will be created.
ax : matplotlib.axes._subplots.AxesSubplot or ``None``
Axes object. If None, a new axes will be created.
figsize: (float, float)
Figure size in inches, as a tuple of two numbers. The figure
size of ``fig`` (if not ``None``) will override this parameter.
dpi : float
Figure resolution. The dpi of ``fig`` (if not ``None``) will override
this parameter.
upper_bound : list, numpy.ndarray, or pandas.Series
Upper bound of the Y values.
lower_bound : list, numpy.ndarray, or pandas.Series
Lower bound of the Y values.
line_color : str, list, or tuple
Color of the line.
shade_color : str, list, or tuple
Color of the underlying shades.
shade_alpha : float
Opacity of the shades.
linewidth : float
Width of the line.
legend_loc : int, str
Location of the legend, to be passed directly to ``plt.legend()``.
line_label : str
Label of the line, to be used in the legend.
shade_label : str
Label of the shades, to be used in the legend.
logx : bool
Whether or not to show the X axis in log scale.
logy : bool
Whether or not to show the Y axis in log scale.
grid_on : bool
Whether or not to show grids on the plot.
Returns
-------
fig : matplotlib.figure.Figure
The figure object being created or being passed into this function.
ax : matplotlib.axes._subplots.AxesSubplot
The axes object being created or being passed into this function.
'''
if not isinstance(x, hlp._array_like) or not isinstance(y, hlp._array_like):
raise TypeError('`x` and `y` must be arrays.')
if len(x) != len(y):
raise hlp.LengthError('`x` and `y` must have the same length.')
fig, ax = hlp._process_fig_ax_objects(fig, ax, figsize, dpi)
hl1 = ax.fill_between(
x, lower_bound, upper_bound,
color=shade_color, facecolor=shade_color,
linewidth=0.01, alpha=shade_alpha, interpolate=True,
label=shade_label,
)
hl2, = ax.plot(x, y, color=line_color, linewidth=linewidth, label=line_label)
if logx: ax.set_xscale('log')
if logy: ax.set_yscale('log')
if grid_on:
ax.grid(ls=':',lw=0.5)
ax.set_axisbelow(True)
plt.legend(handles=[hl2,hl1],loc=legend_loc)
return fig, ax
#%%============================================================================
[docs]def visualize_cv_scores(
fig=None, ax=None, dpi=100, n_folds=5,
cv_scores=None, box_height=0.6, box_width=0.9,
gap_frac=0.05, metric_name='AUC', avg_cv_score=None,
no_holdout_set=False, holdout_score=None, fontsize=9,
flip_yaxis=True,
):
'''
Visualize K-fold cross-validation scores as well as hold-out set performance
in an intuitive way.
Parameters
----------
fig : matplotlib.figure.Figure or ``None``
Figure object. If None, a new figure will be created.
ax : matplotlib.axes._subplots.AxesSubplot or ``None``
Axes object. If None, a new axes will be created.
dpi : float
Figure resolution. The dpi of ``fig`` (if not ``None``) will override
this parameter.
n_folds : int
Number of CV folds.
cv_scores : list<float> or ``None``
The validation score of each fold. If ``None``, no scores will be shown
on the small boxes.
box_height : float
The height of the the small box, in inches.
box_width : float
The width of the small box, in inches.
gap_frac : float
How much gap should there be between each small box.
metric_name : str
The name of the metric to be shown in the figure.
avg_cv_score : float or ``None``
The average cross-validation score. If ``None`` (recommended), it will
be calculated by numpy.mean(cv_scores).
no_holdout_set : bool
If ``False``, the hold-out data set will be visualized alongside the
training data set. This parameter supersedes ``holdout_score``.
holdout_score : float or ``None``
The performance on the hold-out data set. If ``no_holdout_set`` is
``True``, this parameter has no effect.
fontsize : float
The font size of all the texts.
flip_yaxis : bool
If ``True``, everything will be flipped upside down. This parameter is
for diagnosis and and debugging purpose only. It is recommended to leave
it as ``True``.
Returns
-------
fig : matplotlib.figure.Figure
The figure object being created or being passed into this function.
ax : matplotlib.axes._subplots.AxesSubplot
The axes object being created or being passed into this function.
'''
hlp.assert_type(n_folds, int, name='n_folds')
hlp.assert_type(cv_scores, (type(None), list), name='cv_scores')
if cv_scores is not None:
hlp.assert_element_type(cv_scores, hlp._scalar_like, name='cv_scores')
# END IF
hlp.assert_type(avg_cv_score, (type(None), hlp._scalar_like), name='avg_cv_score')
hlp.assert_type(gap_frac, hlp._scalar_like, name='gap_frac')
if gap_frac < 0 or gap_frac > 1:
raise ValueError('`gap_frac` must be within (0, 1).')
# END IF
hlp.assert_type(metric_name, str, name='metric_name')
hlp.assert_type(holdout_score, (type(None), hlp._scalar_like), name='holdout_score')
GRAY_COLOR_ALPHA = 0.25
OTHER_COLOR_ALPHA = 0.5
total_width = n_folds * box_width
total_height = n_folds * box_height
fig_width = total_width * 1.5
fig_height = total_height * 1.5
if metric_name is None:
metric_name = 'score'
# END IF
if cv_scores is not None:
assert(len(cv_scores) == n_folds)
# END IF
figsize = (fig_width, fig_height)
fig, ax = hlp._process_fig_ax_objects(fig, ax, figsize, dpi)
for j in range(n_folds):
text_ = [''] * n_folds
if cv_scores is not None:
if j == 0:
text_[j] = '%s\n= %.4g' % (metric_name, cv_scores[j])
else:
text_[j] = '%.4g' % cv_scores[j]
else:
for k in range(n_folds):
if k == j:
text_[k] = 'eval.'
else:
text_[k] = 'train'
# END IF-ELS
# END FOR
# END IF-ELSE
ax = _plot_one_row_of_rectangles(
ax, n_boxes=n_folds,
southwest_corner=(0, j * box_height),
box_height=box_height, fontsize=fontsize,
box_width=box_width, gap_frac=gap_frac,
show_which_box_as_test=j, text=text_,
)
# END FOR
text_list = ['Fold %d' % (_ + 1) for _ in range(n_folds)]
ax = _plot_one_row_of_rectangles(
ax, n_boxes=n_folds,
southwest_corner=(0, n_folds * box_height),
box_height=box_height, box_width=box_width,
gap_frac=gap_frac, fontsize=fontsize,
show_which_box_as_test=-1,
train_set_color='gray', alpha=GRAY_COLOR_ALPHA,
text=text_list,
)
ax = _plot_one_row_of_rectangles(
ax, n_boxes=1,
southwest_corner=(0, -1.5 * box_height),
box_height=box_height,
box_width=total_width,
gap_frac=0.0, text=['Training data'],
fontsize=fontsize,
train_set_color='#6baed6',
alpha=OTHER_COLOR_ALPHA,
)
if not no_holdout_set:
if holdout_score is not None:
holdout_txt = 'Hold-out data\n%s = %.4g' % (metric_name, holdout_score)
else:
holdout_txt = 'Hold-out data'
# END IF-ELSE
holdout_box_gap_frac = 0.01
r1 = 1 + holdout_box_gap_frac
holdout_box_width = total_width * 0.5
ax = _plot_one_row_of_rectangles(
ax, n_boxes=1,
southwest_corner=(total_width * r1, -1.5 * box_height),
box_width=holdout_box_width,
box_height=box_height,
gap_frac=0.0, text=[holdout_txt],
fontsize=fontsize,
train_set_color='yellow',
alpha=OTHER_COLOR_ALPHA,
)
ax = _plot_one_row_of_rectangles(
ax, n_boxes=1,
southwest_corner=(0, -2.7 * box_height),
box_height=box_height,
box_width=total_width * r1 + holdout_box_width,
gap_frac=0.0, text=['All data'],
fontsize=fontsize,
train_set_color='gray',
alpha=GRAY_COLOR_ALPHA,
)
# END IF
if avg_cv_score is not None or cv_scores is not None:
avg_cv_score = np.mean(cv_scores) if avg_cv_score is None else avg_cv_score
char = '\n' if n_folds <= 4 else ' ' # too few folds: display text in two lines
avg_score_txt = 'Mean %s%s= %.4g' % (metric_name, char, avg_cv_score)
else:
avg_score_txt = 'Take average'
# END IF-ELSE
# END IF-ELSE
_plot_bracket(
ax, n_folds, total_width, total_height, avg_score_txt, fontsize=fontsize,
)
if flip_yaxis:
ax.invert_yaxis()
# END IF
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
ax.axis('off')
return fig, ax
#------------------------------------------------------------------------------
def _plot_one_row_of_rectangles(
ax, n_boxes=5, southwest_corner=(0, 0),
box_height=0.6, box_width=0.9, gap_frac=0.05,
show_which_box_as_test=-1,
train_set_color='green', test_set_color='orange',
alpha=0.3, text=None, fontsize=None,
):
'''
Plot one row of rectangles (small boxes).
Parameters
----------
ax :
Figure axes object.
n_boxes : int
Number of boxes to plot on this row.
southwest_corner : (float, float)
A tuple of two floats. The south-west corner coordinate of the box.
box_height : float
Height of a small box.
box_width : float
Width of a small box.
gap_frac : float
How much gap should there be between each small box.
show_which_box_as_test : int
The 0-based index of one of the ``n_boxes`` boxes to show as the
"test" box. If -1, treat all boxes as the "train" boxes.
train_set_color : str or tuple<float>
The color of the "train" boxes. Can be a color name or rgb.
test_set_color : str or tuple<float>
The color of the "test" boxes. Can be a color name or rgb.
alpha : float
Opacity of the box color.
text : list<str> or ``None``
The text to show on each box. If ``None``, do not show text.
fontsize : float
The font size of the texts.
Returns
-------
ax :
Figure axes object.
'''
patches_train = []
patches_test = []
for i in range(n_boxes):
x0, y0 = southwest_corner
x1, y1, width, height = __add_gap_to_coord(
x0 + i * box_width, y0, box_width, box_height, gap_frac=gap_frac,
)
rect = Rectangle((x1, y1), width, height)
if i == show_which_box_as_test:
patches_test.append(rect)
else:
patches_train.append(rect)
# END IF
box_edge_width = 0.7
pc_train = PatchCollection(
patches_train, edgecolor='k', lw=box_edge_width,
facecolor=train_set_color, alpha=alpha,
)
pc_test = PatchCollection(
patches_test, edgecolor='k', lw=box_edge_width,
facecolor=test_set_color, alpha=alpha,
)
ax.add_collection(pc_train)
ax.add_collection(pc_test)
if text is not None:
__add_text(
ax, text, n_boxes=n_boxes, southwest_corner=southwest_corner,
box_height=box_height, box_width=box_width, fontsize=fontsize,
)
# END IF
return ax
#------------------------------------------------------------------------------
def __add_gap_to_coord(x0, y0, width, height, gap_frac=0.05):
x1 = x0 + width * gap_frac / 2.0
y1 = y0 + height * gap_frac / 2.0
new_width = width * (1 - gap_frac)
new_height = height * (1 - gap_frac)
return x1, y1, new_width, new_height
#------------------------------------------------------------------------------
def __add_text(
ax, text, n_boxes=5, southwest_corner=(0, 0),
box_height=0.6, box_width=0.9, fontsize=10,
):
assert(len(text) == n_boxes)
x_mid, y_mid = ___get_mid_points(
n_boxes=n_boxes, southwest_corner=southwest_corner,
box_height=box_height, box_width=box_width,
)
for i in range(n_boxes):
ax.text(x_mid[i], y_mid[i], text[i], ha='center', va='center', fontsize=fontsize)
# END FOR
return ax
#------------------------------------------------------------------------------
def ___get_mid_points(n_boxes=5, southwest_corner=(0, 0), box_height=0.6, box_width=0.9):
x_mid = []
y_mid = []
x0, y0 = southwest_corner
for i in range(n_boxes):
x_mid.append(x0 + box_width / 2.0 + i * box_width)
y_mid.append(y0 + box_height / 2.0)
# END FOR
return x_mid, y_mid
#------------------------------------------------------------------------------
def _plot_bracket(
ax, n_boxes, total_width, total_height, text, gap_frac=0.02,
c='gray', lw=1.0, fontsize=10,
):
bar_len = total_width * gap_frac * 2
x1 = total_width * (1 + gap_frac)
x1_ = x1 + bar_len
y1 = 0
y1_ = y1 + bar_len
x2 = x1
x2_ = x2 + bar_len
y2 = total_height
y2_ = y2 - bar_len
x0 = x1_
x0_ = x0 + bar_len
x0__ = x0_ + bar_len / 2.0 # where to put text
y0 = (y1 + y2) / 2.0
ax.plot([x1, x1_], [y1, y1_], c=c, lw=lw)
ax.plot([x2, x2_], [y2, y2_], c=c, lw=lw)
ax.plot([x1_, x1_], [y1_, y2_], c=c, lw=lw)
ax.plot([x0, x0_], [y0, y0], c=c, lw=lw)
ax.text(
x0__, y0, text, ha='left', va='center', fontsize=fontsize, rotation=270,
)