Source code for mlens.parallel.handles

"""ML-Ensemble

:author: Sebastian Flennerhag
:copyright: 2017-2018
:license: MIT

Handles for mlens.parallel.
"""
from .base import BaseEstimator
from .learner import Learner, Transformer
from ._base_functions import mold_objects, transform
from ..utils import format_name, check_instances
from ..utils.formatting import _check_instances
from ..externals.sklearn.base import clone, BaseEstimator as _BaseEstimator

GLOBAL_GROUP_NAMES = list()
GLOBAL_PIPELINE_NAMES = list()


[docs]class Pipeline(_BaseEstimator): """Transformer pipeline Pipeline class for wrapping a preprocessing pipeline of transformers. .. versionadded: 0.2.0 Parameters ---------- pipeline : list, instance A :class:`~mlens.parallel.Transformer` instance or a list of :class:`~mlens.parallel.Transformer` instances. Accepted input formats:: option_1 = transformer_1 option_2 = [transformer_1, transformer_2] option_3 = [("tr-1", transformer_1), ("tr-2", transformer_2)] option_4 = [transformer_1, ("tr-2", transformer_2)] name : str, optional name of pipeline. return_y : bool, default = False If True, both X and y will be returned in a :func:`~mlens.parallel.handles.Pipeline.transform` call. """ def __init__(self, pipeline, name=None, return_y=False): self.name = format_name(name, 'pipeline', GLOBAL_PIPELINE_NAMES) self.pipeline = _check_instances(pipeline) self.return_y = return_y self._pipeline = None def _run(self, fit, process, X, y=None): """Run job on pipeline.""" out = self._check_empty(process, X, y) if out is not False: return out if fit: self._pipeline = [(tr_name, clone(tr)) for tr_name, tr in self.pipeline] for tr_name, tr in self._pipeline: if fit: tr.fit(X, y) if len(self._pipeline) > 1 or process: X, y = transform(tr, X, y) if process: if self.return_y: return X, y return X return self def _check_empty(self, process, X, y=None): """Check if empty pipeline and return vacuously""" # TODO: remove ability to set None pipelines: need to modify Evalutor if self.pipeline: return False if not process: return self if self.return_y: return X, y return X
[docs] def fit(self, X, y=None): """Fit pipeline. Note that the :class:`Pipeline` accepts both X and y arguments, and can return both X and y, depending on the transformers. The pipeline itself does no checks on the input. Parameters ---------- X : array-like of shape [n_samples, n_features] Input data y : array-like of shape [n_samples, ] Targets Returns ------- self : instance Fitted pipeline """ return self._run(True, False, X, y)
[docs] def transform(self, X, y=None): """Transform pipeline. Note that the :class:`Pipeline` accepts both X and y arguments, and can return both X and y, depending on the transformers. Pipeline itself does not checks the input. Parameters ---------- X : array-like of shape [n_samples, n_features] Input data y : array-like of shape [n_samples, ] Targets Returns ------- X_processed : array-like of shape [n_samples, n_preprocessed_features] Preprocessed input data y : array-like of shape [n_samples, ], optional Original or preprocessed targets, depending on the transformers. """ return self._run(False, True, X, y)
[docs] def fit_transform(self, X, y=None): """Fit and transform pipeline. Note that the :class:`Pipeline` accepts both X and y arguments, and can return both X and y, depending on the transformers. The pipeline itself does no checks on the input. Parameters ---------- X : array-like of shape [n_samples, n_features] Input data y : array-like of shape [n_samples, ] Targets Returns ------- X_processed : array-like of shape [n_samples, n_preprocessed_features] Preprocessed input data y : array-like of shape [n_samples, ], optional Preprocessed targets """ return self._run(True, True, X, y)
[docs] def get_params(self, deep=True): out = super(Pipeline, self).get_params(deep) if not deep: return out if self.pipeline: for tr_name, tr in self.pipeline: for k, v in tr.get_params(deep=True).items(): out['%s__%s' % (tr_name, k)] = v out[tr_name] = tr return out
[docs]class Group(BaseEstimator): """A handle for learners and transformers that share a common indexer. Lightweight class for pairing a set of independent learners with a set of transformers that all share the same cross-validation strategy. A :class:`Group` instance is an acceptable caller to :class:`~mlens.parallel.ParallelProcessing`. .. versionadded:: 0.2.0 .. note:: All instances will share *the same* indexer. If instances have a different indexer, that indexer will be replaced. .. seealso:: To run a :class:`Group` instance, see :func:`~mlens.parallel.wrapper.run`. To handle several groups, use the :class:`~mlens.parallel.layer.Layer` class. Parameters ---------- indexer : inst, optional A :obj:`~mlens.index` indexer to build learner and transformers on. If not passed, the first indexer of the learners will be enforced on all instances. learners : list, inst, optional :class:`~mlens.parallel.learner.Learner` instance(s) attached to indexer. Note that :class:`Group` overrides previous ``indexer`` parameter settings. transformers : list, inst, optional :class:`~mlens.parallel.learner.Transformer` instance(s) attached to indexer. Note that :class:`Group` overrides previous ``indexer`` parameter settings. name : str, optional name of group **kwargs : optional Optional keyword arguments to the :class:`~mlens.parallel.base.BaseParallel` backend. """ def __init__(self, indexer=None, learners=None, transformers=None, name=None, **kwargs): name = format_name(name, 'group', GLOBAL_GROUP_NAMES) super(Group, self).__init__(name=name, **kwargs) learners, transformers = mold_objects(learners, transformers) if not indexer: indexer = learners[0].indexer # Enforce common indexer self.indexer = indexer for o in learners + transformers: o.set_indexer(self.indexer) self.learners = learners self.transformers = transformers self.__static__.extend(['indexer', 'learners', 'transformers']) def __iter__(self): # We update optional backend kwargs that might have been passed # to ensure these are passed to the instances backend_kwargs = { param: getattr(self, param) for param in ['dtype', 'verbose', 'raise_on_exception'] if hasattr(self, param) } for tr in self.transformers: tr.set_params(**backend_kwargs) yield tr for lr in self.learners: lr.set_params(**backend_kwargs) yield lr @property def __fitted__(self): """Fitted status""" if not self._check_static_params(): return False return all([o.__fitted__ for o in self.learners + self.transformers])
[docs] def get_params(self, deep=True): out = super(Group, self).get_params(deep) if not deep: return out for item in self: for k, v in item.get_params(deep=deep).items(): out['%s__%s' % (item.name, k)] = v out[item.name] = item return out
[docs]def make_group(indexer, estimators, preprocessing, learner_kwargs=None, transformer_kwargs=None, name=None): """Creating a :class:`Group` from a set learners and transformers Utility function for creating mapping a set of estimators and preprocessing pipelines to a :class:`Group` of :class:`~mlens.parallel.learner.Learner` and :class:`~mlens.parallel.learner.Transformer` instances. Parameters ---------- indexer : instance or None, default = None Indexer instance to use. See :obj:`~mlens.index` for details. estimators : dict of lists or list of estimators. If ``preprocessing`` is ``None`` or ``list``, ``estimators`` should be a ``list``. The list can either contain estimator instances, named tuples of estimator instances, or a combination of both. :: option_1 = [estimator_1, estimator_2] option_2 = [("est-1", estimator_1), ("est-2", estimator_2)] option_3 = [estimator_1, ("est-2", estimator_2)] If different preprocessing pipelines are desired, a dictionary that maps estimators to preprocessing pipelines must be passed. The names of the estimator dictionary must correspond to the names of the estimator dictionary. :: preprocessing_cases = {"case-1": [trans_1, trans_2]. "case-2": [alt_trans_1, alt_trans_2]} estimators = {"case-1": [est_a, est_b]. "case-2": [est_c, est_d]} The lists for each dictionary entry can be any of ``option_1``, ``option_2`` and ``option_3``. preprocessing : dict of lists or list, optional, default = None preprocessing pipelines for given layer. If the same preprocessing applies to all estimators, ``preprocessing`` should be a list of transformer instances. The list can contain the instances directly, named tuples of transformers, or a combination of both. :: option_1 = [transformer_1, transformer_2] option_2 = [("trans-1", transformer_1), ("trans-2", transformer_2)] option_3 = [transformer_1, ("trans-2", transformer_2)] If different preprocessing pipelines are desired, a dictionary that maps preprocessing pipelines must be passed. The names of the preprocessing dictionary must correspond to the names of the estimator dictionary. :: preprocessing_cases = {"case-1": [trans_1, trans_2]. "case-2": [alt_trans_1, alt_trans_2]} estimators = {"case-1": [est_a, est_b]. "case-2": [est_c, est_d]} The lists for each dictionary entry can be any of ``option_1``, ``option_2`` and ``option_3``. transformer_kwargs : dict, optional Keyword arguments to pass to the :class:`~mlens.parallel.learner.Transformer` instances. learner_kwargs : dict, optional Keyword arguments to pass to the :class:`~mlens.parallel.learner.Learner` instances. name : str, optional Name of group. Should be unique. """ preprocessing, estimators = check_instances(estimators, preprocessing) if learner_kwargs is None: learner_kwargs = {} if transformer_kwargs is None: transformer_kwargs = {} transformers = [Transformer(estimator=Pipeline(tr, return_y=True), name=case_name, **transformer_kwargs) for case_name, tr in preprocessing] learners = [Learner(estimator=est, preprocess=case_name, name=learner_name, **learner_kwargs) for case_name, learner_name, est in estimators] group = Group(indexer=indexer, learners=learners, transformers=transformers, name=name) return group