"""ML-ENSEMBLE
:author: Sebastian Flennerhag
:copyright: 2017-2018
:licence: MIT
"""
from __future__ import division, print_function
from ..externals.sklearn.base import BaseEstimator, TransformerMixin
[docs]class Subset(BaseEstimator, TransformerMixin):
"""Select a subset of features.
The ``Subset`` class acts as a transformer that reduces the feature set
to a subset specified by the user.
Parameters
----------
subset : list
list of columns indexes to select subset with. Indexes can
either be of type ``str`` if data accepts slicing on a list of
strings, otherwise the list should be of type ``int``.
"""
def __init__(self, subset=None):
self.subset = subset
[docs] def fit(self, X, y=None):
"""Learn what format the data is stored in.
Parameters
----------
X : array-like of shape = [n_samples, n_features]
The whose type will be inferred.
y : array-like of shape = [n_samples, n_features]
pass-through for Scikit-learn pipeline compatibility.
"""
self.is_df_ = X.__class__.__name__ in ['DataFrame', 'Series']
if self.subset is not None:
self.use_loc_ = any([isinstance(x, str) for x in self.subset])
return self
[docs]class Shift(BaseEstimator, TransformerMixin):
r"""Lag operator.
Shift an input array :math:`X` with :math:`s` steps, i.e. for some time
series :math:`\mathbf{X} = (X_t, X_{t-1}, ..., X_{0})`,
.. math::
L^{s} \mathbf{X} = (X_{t-s}, X_{t-1-s}, ..., X_{s - s})
Parameters
----------
s : int
number of lags to generate
Examples
--------
>>> import numpy as np
>>> from mlens.preprocessing import Shift
>>> X = np.arange(10)
>>> L = Shift(2)
>>> Z = L.fit_transform(X)
>>> print("X : {}".format(X[2:]))
>>> print("Z : {}".format(Z))
X : [2 3 4 5 6 7 8 9]
Z : [0 1 2 3 4 5 6 7]
"""
def __init__(self, s):
self.s = s
[docs] def fit(self, X, y=None):
"""Pass through for compatability."""
return self