Source code for mlens.visualization.var_analysis

"""ML-ENSEMBLE

:author: Sebastian Flennerhag
:copyright: 2017-2018
:licence: MIT

Explained variance plots.
"""

from __future__ import division, print_function

import numpy as np

import warnings

try:
    from pandas import DataFrame, Series
except ImportError:
    DataFrame = Series = None
    warnings.warn("Pandas not installed. Visualization module may not work "
                  "as intended.", ImportWarning)

try:
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    from sklearn.decomposition import KernelPCA
    from matplotlib.colors import ListedColormap
    from seaborn import color_palette
except:
    warnings.warn("Matplotlib and Seaborn not installed. Cannot load "
                  "visualization module.", ImportWarning)


[docs]def pca_comp_plot(X, y=None, figsize=(10, 8), title='Principal Components Comparison', title_font_size=14, show=True, **kwargs): """Function for comparing PCA analysis. Function compares across 2 and 3 dimensions and linear and rbf kernels. Parameters ---------- X : array-like of shape = [n_samples, n_features] input matrix to be used for prediction. y : array-like of shape = [n_samples, ] or None (default = None) training labels to be used for color highlighting. figsize : tuple (default = (10, 8)) Size of figure. title : str figure title if shown. title_font_size : int title font size. show : bool (default = True) whether to print figure :obj:`matplotlib.pyplot.show`. **kwargs : optional optional arguments to pass to :class:`mlens.visualization.pca_plot`. Returns ------- ax : axis object. See Also -------- :class:`mlens.visualization.pca_plot` """ comp = ['linear', 'rbf'] f = plt.figure(figsize=figsize) ax = [] subplot_kwarg = {} for dim, frame in [(2, 221), (3, 223)]: if dim is 3: # Need to specify projection subplot_kwarg = {'projection': '3d'} for i, kernel in enumerate(comp): # Create subplot ax.append(f.add_subplot(frame + i, **subplot_kwarg)) # Build figure ax[-1] = pca_plot(X, KernelPCA(dim, kernel), y, ax=ax[-1], show=False, **kwargs) ax[-1].set_title('%s kernel, %i dims' % (kernel, dim)) # Whiten background if dim is 3 if dim is 3: ax[-1].set_facecolor((1, 1, 1)) if show: f.suptitle(title, fontsize=title_font_size) plt.show() return ax
[docs]def pca_plot(X, estimator, y=None, cmap=None, figsize=(10, 8), title='Principal Components Analysis', title_font_size=14, show=True, ax=None, **kwargs): """Function to plot a PCA analysis of 1, 2, or 3 dims. Parameters ---------- X : array-like of shape = [n_samples, n_features] matrix to perform PCA analysis on. estimator : instance PCA estimator. Assumes a Scikit-learn API. y : array-like of shape = [n_samples, ] or None (default = None) training labels to be used for color highlighting. cmap : object, optional cmap object to pass to :obj:`matplotlib.pyplot.scatter`. figsize : tuple (default = (10, 8)) Size of figure. title : str figure title if shown. title_font_size : int title font size. show : bool (default = True) whether to print figure :obj:`matplotlib.pyplot.show`. ax : object, optional axis to attach plot to. **kwargs : optional arguments to pass to :obj:`matplotlib.pyplot.scatter`. Returns ------- ax : optional if ``ax`` was specified, returns ``ax`` with plot attached. """ Z = X.values if isinstance(X, (DataFrame, Series)) else X Z = estimator.fit_transform(Z) # prep figure if no axis supplied if ax is None: f, ax = plt.subplots(figsize=figsize) if estimator.n_components == 3: ax = f.add_subplot(111, projection='3d') if cmap is None: cmap = ListedColormap(color_palette('husl')) if estimator.n_components == 1: ax.scatter(Z, c=y, cmap=cmap, **kwargs) elif estimator.n_components == 2: ax.scatter(Z[:, 0], Z[:, 1], c=y, cmap=cmap, **kwargs) elif estimator.n_components == 3: ax.scatter(Z[:, 0], Z[:, 1], Z[:, 2], c=y, cmap=cmap, **kwargs) else: raise ValueError("'n_components' is too large to visualize. " "Set to one of [1, 2, 3].") if show: plt.title(title, fontsize=title_font_size) plt.show() return ax
[docs]def exp_var_plot(X, estimator, figsize=(10, 8), buffer=0.01, set_labels=True, title='Explained variance ratio', title_font_size=14, show=True, ax=None, **kwargs): """Function to plot the explained variance using PCA. Parameters ---------- X : array-like of shape = [n_samples, n_features] input matrix to be used for prediction. estimator : class PCA estimator, not initiated, assumes a Scikit-learn API. figsize : tuple (default = (10, 8)) Size of figure. buffer : float (default = 0.01) For creating a buffer around the edges of the graph. The buffer added is calculated as ``num_components`` * ``buffer``, where ``num_components`` determine the length of the x-axis. set_labels : bool whether to set axis labels. title : str figure title if shown. title_font_size : int title font size. show : bool (default = True) whether to print figure using :obj:`matplotlib.pyplot.show`. ax : object, optional axis to attach plot to. **kwargs : optional optional arguments passed to the :obj:`matplotlib.pyplot.step` function. Returns ------- ax : optional if ``ax`` was specified, returns ``ax`` with plot attached. """ estimator.set_params(**{'n_components': None}) ind_var_exp = estimator.fit(X).explained_variance_ratio_ cum_var_exp = np.cumsum(ind_var_exp) x = range(1, len(ind_var_exp) + 1) if ax is None: _, ax = plt.subplots(figsize=figsize) ax.step(x, cum_var_exp, **kwargs) buffer_ = buffer * len(ind_var_exp) ax.set_ylim(0, 1 + buffer_ / 3) ax.set_xlim(1 - buffer_, len(ind_var_exp) + buffer_) ax.set_xticks([i for i in x]) if set_labels: ax.set_xlabel('Number of Components') ax.set_ylabel('Explained variance ratio') if show: plt.title(title, fontsize=title_font_size) plt.show() return ax