Source code for mincq.mincq_learner

#-*- coding:utf-8 -*-
""" MinCq learning algorithm

Related papers:
[1] From PAC-Bayes Bounds to Quadratic Programs for Majority Votes (Laviolette et al., 2011)
[2] Risk Bounds for the Majority Vote: From a PAC-Bayesian Analysis to a Learning Algorithm (Germain et al., 2014)

http://graal.ift.ulaval.ca/majorityvote/
"""
__author__ = 'Jean-Francis Roy'

import logging
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics.pairwise import rbf_kernel, linear_kernel, polynomial_kernel
from qp import QP
from majority_vote import MajorityVote
from voter import StumpsVotersGenerator, KernelVotersGenerator



[docs]class MinCqLearner(BaseEstimator, ClassifierMixin):
    """
    MinCq algorithm learner. See [1, 2]

    Parameters
    ----------
    mu : float
        The fixed value of the first moment of the margin.

    voters_type : string, optional (default='kernel')
        Specifies the type of voters.
        It must be one of 'kernel', 'stumps'.

    n_stumps_per_attribute : int, optional (default=10)
        Specifies the amount of decision stumps per attribute.
        It is only significant with 'stumps' voters_type.

    kernel : string, optional (default='rbf')
         Specifies the kernel type to be used in the algorithm.
         It must be one of 'linear', 'poly', 'rbf'.

    degree : int, optional (default=3)
        Degree of the polynomial kernel function ('poly').
        Ignored by all other kernels.

    gamma : float, optional (default=0.0)
        Kernel coefficient for 'rbf' and 'poly'.
        If gamma is 0.0 then 1/n_features will be used instead.
    """
    def __init__(self, mu, voters_type, n_stumps_per_attribute=10, kernel='rbf', degree=3, gamma=0.0):
        assert mu > 0 and mu <= 1, "MinCqLearner: mu parameter must be in (0, 1]"
        self.mu = mu
        self.voters_type = voters_type
        self.n_stumps_per_attribute = n_stumps_per_attribute
        self.kernel = kernel
        self.degree = degree
        self.gamma = gamma

        self.majority_vote = None
        self.qp = None

[docs]    def fit(self, X, y):
        """ Learn a majority vote weights using MinCq.

        Parameters
        ----------
        X : ndarray, shape=(n_samples, n_features)
            Training data

        y : ndarray, shape=(n_samples,), optional
            Training labels
        """
        # Preparation of the majority vote, using a voter generator that depends on class attributes
        voters_generator = None

        assert self.voters_type in ['stumps', 'kernel'], "MinCqLearner: voters_type must be 'stumps' or 'kernel'"
        if self.voters_type == 'stumps':
            assert self.n_stumps_per_attribute > 1, 'MinCqLearner: n_stumps_per_attribute must be positive'
            voters_generator = StumpsVotersGenerator(self.n_stumps_per_attribute)

        elif self.voters_type == 'kernel':
            assert self.kernel in ['linear', 'poly', 'rbf'], "MinCqLearner: kernel must be 'linear', 'poly' or 'rbf'"

            gamma = self.gamma
            if gamma == 0.0:
                gamma = 1.0 / np.shape(X)[1]

            if self.kernel == 'linear':
                voters_generator = KernelVotersGenerator(linear_kernel)
            elif self.kernel == 'poly':
                voters_generator = KernelVotersGenerator(polynomial_kernel, degree=self.degree, gamma=gamma)
            elif self.kernel == 'rbf':
                voters_generator = KernelVotersGenerator(rbf_kernel, gamma=gamma)

        voters = voters_generator.generate(X, y)

        logging.info("MinCq training started...")
        logging.info("Training dataset shape: {}".format(str(np.shape(X))))
        logging.info("Number of voters: {}".format(len(voters)))

        self.majority_vote = MajorityVote(voters)
        n_base_voters = len(self.majority_vote.weights)

        # Preparation and resolution of the quadratic program
        logging.info("Preparing QP...")
        self._prepare_qp(X, y)

        try:
            logging.info("Solving QP...")
            solver_weights = self.qp.solve()

            # Conversion of the weights of the n first voters to weights on the implicit 2n voters.
            # See Section 7.1 of [2] for an explanation.
            self.majority_vote.weights = np.array([2 * q - 1.0 / n_base_voters for q in solver_weights])

        except Exception as e:

            uniform_weights = np.array([1.0 / n_base_voters] * n_base_voters)
            logging.error("{}: Error while solving the quadratic program: {}.".format(str(self), str(e)))
            self.majority_vote = None

        return self

[docs]    def predict(self, X):
        """ Using previously learned majority vote weights, predict the labels of new data points.

        Parameters
        ----------
        X : ndarray, shape=(n_samples, n_features)
            Samples to predict

        Returns
        -------
        predictions : ndarray, shape=(n_samples,)
            The predicted labels
        """
        logging.info("Predicting...")
        if self.majority_vote is None:
            logging.error("{}: Error while predicting: MinCq has not been fit or fitting has failed. Will output invalid labels".format(str(self)))
            return np.zeros((len(X),))

        return self.majority_vote.vote(X)

[docs]    def predict_proba(self, X):
        """ Using previously learned majority vote weights, predict the labels of new data points with a confidence
        level. The confidence level is the margin of the majority vote.

        Parameters
        ----------
        X : ndarray, shape=(n_samples, n_features)
            Samples to predict

        Returns
        -------
        predictions : ndarray, shape=(n_samples,)
            The predicted labels
        """
        probabilities = np.zeros((np.shape(X)[0], 2))

        # The margin is between -1 and 1, we rescale it to be between 0 and 1.
        margins = self.majority_vote.margin(X)
        margins += 1
        margins /= 2

        # Then, the conficence for class +1 is set to the margin, and confidence for class -1 is set to 1 - margin.
        probabilities[:, 1] = margins
        probabilities[:, 0] = 1 - margins
        return probabilities

    def _prepare_qp(self, X, y):
        """ Prepare MinCq's quadratic program. See Program 1 of [2] for more details on its content.

        Parameters
        ----------
        X : ndarray, shape=(n_samples, n_features)
            Training data

        y : ndarray, shape=(n_samples,), optional
            Training labels
        """

        self.qp = QP()

        n_features = len(self.majority_vote.voters)
        classification_matrix = self.majority_vote.classification_matrix(X)

        # Objective function
        c_matrix = np.zeros((n_features, n_features))
        for i in range(n_features):
            for j in range(n_features):
                c_matrix[i][j] = np.mean(np.multiply(classification_matrix[i], classification_matrix[j]))
        self.qp.quadratic_func = 2 * np.matrix(c_matrix)
        self.qp.linear_func = np.matrix(np.matrix(-1.0 * np.mean(self.qp.quadratic_func / 2.0, axis=1))).T

        # First moment of the margin fixed to mu.
        c_matrix = np.zeros((self.qp.n_variables,))
        for i in range(self.qp.n_variables):
            c_matrix[i] = np.mean(np.multiply(np.asarray(classification_matrix)[i], y))

        column_means = np.mean(np.multiply(y, classification_matrix), axis=1)
        self.qp.add_equality_constraints(c_matrix, 0.5 * (self.mu + np.mean(column_means)))

        # Lower and upper bounds on the variables
        self.qp.add_lower_bound(0.0)
        self.qp.add_upper_bound(1.0 / n_features)