Source code for mincq.voter

#-*- coding:utf-8 -*-
__author__ = "Jean-Francis Roy"

import numpy as np


[docs]class Voter(object):
    """ Base class for a voter (function X -> [-1, 1]), where X is an array of samples
    """
    def __init__(self):
        pass

[docs]    def vote(self, X):
        """ Returns the output of the voter, on a sample list X

        Parameters
        ----------
        X : ndarray, shape=(n_samples, n_features)
            Input data to classify

        Returns
        -------
        votes : ndarray, shape=(n_samples,)
            The result the the voter function, for each sample
        """
        raise NotImplementedError("Voter.vote: Not implemented.")


[docs]class BinaryKernelVoter(Voter):
    """ A Binary Kernel Voter, which outputs the value of a kernel function whose first example is fixed a priori.
    The sign of the output depends on the label (-1 or 1) of the sample on which the kernel voter is based

    Parameters
    ----------
    x : ndarray, shape=(n_features,)
        The base sample's description vector

    y : int, -1 or 1
        The label of the base sample. Determines if the voter thinks "negative" or "positive"

    kernel_function : function
        The kernel function takes two samples and returns a similarity value. If the kernel has parameters, they should
        be set using kwargs parameter

    kwargs : keyword arguments (optional)
        Additional parameters for the kernel function
    """

    def __init__(self, x, y, kernel_function, **kwargs):
        assert(y in {-1, 1})
        super(BinaryKernelVoter, self).__init__()
        self._x = x
        self._y = y
        self._kernel_function = kernel_function
        self._kernel_kwargs = kwargs

    def vote(self, X):
        base_point_array = np.array([self._x])
        votes = self._y * self._kernel_function(base_point_array, X, **self._kernel_kwargs)
        votes = np.squeeze(np.asarray(votes))

        return votes


[docs]class DecisionStumpVoter(Voter):
    """
    Generic Attribute Threshold Binary Classifier

    Parameters
    ----------
    attribute_index : int
        The attribute to consider for the classification

    threshold : float
        The threshold value for classification rule

    direction : int (-1 or 1)
        Used to reverse classification decision
    """
    def __init__(self, attribute_index, threshold, direction=1):
        super(DecisionStumpVoter, self).__init__()
        self.attribute_index = attribute_index
        self.threshold = threshold
        self.direction = direction

    def vote(self, points):
        return [((point[self.attribute_index] > self.threshold) * 2 - 1) * self.direction for point in points]


[docs]class VotersGenerator(object):
    """ Base class to create a set of voters using training samples
    """

[docs]    def generate(self, X, y=None, self_complemented=False):
        """ Generates the voters using samples.

        Parameters
        ----------
        X : ndarray, shape=(n_samples, n_features)
            Input data on which to base the voters

        y : ndarray, shape=(n_samples,), optional
            Input labels, usually determines the decision polarity of each voter

        self_complemented : bool
            Determines if complement voters should be generated or not

        Returns
        -------
        voters : ndarray
            An array of voters
        """
        raise NotImplementedError("VotersGenerator.generate: not implemented")


[docs]class StumpsVotersGenerator(VotersGenerator):
    """ Decision Stumps Voters generator.

    Parameters
    ----------
    n_stumps_per_attribute : int, (default=10)
        Determines how many decision stumps will be created for each attribute.
    """
    def __init__(self, n_stumps_per_attribute=10):
        self._n_stumps_per_attribute = n_stumps_per_attribute

    def _find_extremums(self, X, i):
        mini = np.Infinity
        maxi = -np.Infinity
        for x in X:
            if x[i] < mini:
                mini = x[i]
            if x[i] > maxi:
                maxi = x[i]
        return mini, maxi

    def generate(self, X, y=None, self_complemented=False):
        voters = []
        if len(X) != 0:
            for i in range(len(X[0])):
                t = self._find_extremums(X, i)
                inter = (t[1] - t[0]) / (self._n_stumps_per_attribute + 1)

                if inter != 0:
                    # If inter is zero, the attribute is useless as it has a constant value. We do not add stumps for
                    # this attribute.
                    for x in range(self._n_stumps_per_attribute):
                        voters.append(DecisionStumpVoter(i, t[0] + inter * (x + 1), 1))

                        if self_complemented:
                            voters.append(DecisionStumpVoter(i, t[0] + inter * (x + 1), -1))

        return np.array(voters)


[docs]class KernelVotersGenerator(VotersGenerator):
    """ Utility function to create binary kernel voters for each (x, y) sample.

    Parameters
    ----------
    kernel_function : function
        The kernel function takes two samples and returns a similarity value. If the kernel has parameters, they should
        be set using kwargs parameter

    kwargs : keyword arguments (optional)
        Additional parameters for the kernel function
    """

    def __init__(self, kernel_function, **kwargs):
        self._kernel_function = kernel_function
        self._kernel_kwargs = kwargs

    def generate(self, X, y=None, self_complemented=False):
        if y is None:
            y = 1

        voters = []
        for point, label in zip(X, y):
            voters.append(BinaryKernelVoter(point, label, self._kernel_function, **self._kernel_kwargs))

        if self_complemented:
            for point, label in zip(X, y):
                voters.append(BinaryKernelVoter(point, -1 * label, self._kernel_function, **self._kernel_kwargs))

        return np.array(voters)