Source code for mincq.mincq_learner

#-*- coding:utf-8 -*-
""" MinCq learning algorithm

Related papers:
[1] From PAC-Bayes Bounds to Quadratic Programs for Majority Votes (Laviolette et al., 2011)
[2] Risk Bounds for the Majority Vote: From a PAC-Bayesian Analysis to a Learning Algorithm (Germain et al., 2014)

http://graal.ift.ulaval.ca/majorityvote/
"""
__author__ = 'Jean-Francis Roy'

import logging
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics.pairwise import rbf_kernel, linear_kernel, polynomial_kernel
from qp import QP
from majority_vote import MajorityVote
from voter import StumpsVotersGenerator, KernelVotersGenerator



[docs]class MinCqLearner(BaseEstimator, ClassifierMixin): """ MinCq algorithm learner. See [1, 2] Parameters ---------- mu : float The fixed value of the first moment of the margin. voters_type : string, optional (default='kernel') Specifies the type of voters. It must be one of 'kernel', 'stumps'. n_stumps_per_attribute : int, optional (default=10) Specifies the amount of decision stumps per attribute. It is only significant with 'stumps' voters_type. kernel : string, optional (default='rbf') Specifies the kernel type to be used in the algorithm. It must be one of 'linear', 'poly', 'rbf'. degree : int, optional (default=3) Degree of the polynomial kernel function ('poly'). Ignored by all other kernels. gamma : float, optional (default=0.0) Kernel coefficient for 'rbf' and 'poly'. If gamma is 0.0 then 1/n_features will be used instead. """ def __init__(self, mu, voters_type, n_stumps_per_attribute=10, kernel='rbf', degree=3, gamma=0.0): assert mu > 0 and mu <= 1, "MinCqLearner: mu parameter must be in (0, 1]" self.mu = mu self.voters_type = voters_type self.n_stumps_per_attribute = n_stumps_per_attribute self.kernel = kernel self.degree = degree self.gamma = gamma self.majority_vote = None self.qp = None
[docs] def fit(self, X, y): """ Learn a majority vote weights using MinCq. Parameters ---------- X : ndarray, shape=(n_samples, n_features) Training data y : ndarray, shape=(n_samples,), optional Training labels """ # Preparation of the majority vote, using a voter generator that depends on class attributes voters_generator = None assert self.voters_type in ['stumps', 'kernel'], "MinCqLearner: voters_type must be 'stumps' or 'kernel'" if self.voters_type == 'stumps': assert self.n_stumps_per_attribute > 1, 'MinCqLearner: n_stumps_per_attribute must be positive' voters_generator = StumpsVotersGenerator(self.n_stumps_per_attribute) elif self.voters_type == 'kernel': assert self.kernel in ['linear', 'poly', 'rbf'], "MinCqLearner: kernel must be 'linear', 'poly' or 'rbf'" gamma = self.gamma if gamma == 0.0: gamma = 1.0 / np.shape(X)[1] if self.kernel == 'linear': voters_generator = KernelVotersGenerator(linear_kernel) elif self.kernel == 'poly': voters_generator = KernelVotersGenerator(polynomial_kernel, degree=self.degree, gamma=gamma) elif self.kernel == 'rbf': voters_generator = KernelVotersGenerator(rbf_kernel, gamma=gamma) voters = voters_generator.generate(X, y) logging.info("MinCq training started...") logging.info("Training dataset shape: {}".format(str(np.shape(X)))) logging.info("Number of voters: {}".format(len(voters))) self.majority_vote = MajorityVote(voters) n_base_voters = len(self.majority_vote.weights) # Preparation and resolution of the quadratic program logging.info("Preparing QP...") self._prepare_qp(X, y) try: logging.info("Solving QP...") solver_weights = self.qp.solve() # Conversion of the weights of the n first voters to weights on the implicit 2n voters. # See Section 7.1 of [2] for an explanation. self.majority_vote.weights = np.array([2 * q - 1.0 / n_base_voters for q in solver_weights]) except Exception as e: uniform_weights = np.array([1.0 / n_base_voters] * n_base_voters) logging.error("{}: Error while solving the quadratic program: {}.".format(str(self), str(e))) self.majority_vote = None return self
[docs] def predict(self, X): """ Using previously learned majority vote weights, predict the labels of new data points. Parameters ---------- X : ndarray, shape=(n_samples, n_features) Samples to predict Returns ------- predictions : ndarray, shape=(n_samples,) The predicted labels """ logging.info("Predicting...") if self.majority_vote is None: logging.error("{}: Error while predicting: MinCq has not been fit or fitting has failed. Will output invalid labels".format(str(self))) return np.zeros((len(X),)) return self.majority_vote.vote(X)
[docs] def predict_proba(self, X): """ Using previously learned majority vote weights, predict the labels of new data points with a confidence level. The confidence level is the margin of the majority vote. Parameters ---------- X : ndarray, shape=(n_samples, n_features) Samples to predict Returns ------- predictions : ndarray, shape=(n_samples,) The predicted labels """ probabilities = np.zeros((np.shape(X)[0], 2)) # The margin is between -1 and 1, we rescale it to be between 0 and 1. margins = self.majority_vote.margin(X) margins += 1 margins /= 2 # Then, the conficence for class +1 is set to the margin, and confidence for class -1 is set to 1 - margin. probabilities[:, 1] = margins probabilities[:, 0] = 1 - margins return probabilities
def _prepare_qp(self, X, y): """ Prepare MinCq's quadratic program. See Program 1 of [2] for more details on its content. Parameters ---------- X : ndarray, shape=(n_samples, n_features) Training data y : ndarray, shape=(n_samples,), optional Training labels """ self.qp = QP() n_features = len(self.majority_vote.voters) classification_matrix = self.majority_vote.classification_matrix(X) # Objective function c_matrix = np.zeros((n_features, n_features)) for i in range(n_features): for j in range(n_features): c_matrix[i][j] = np.mean(np.multiply(classification_matrix[i], classification_matrix[j])) self.qp.quadratic_func = 2 * np.matrix(c_matrix) self.qp.linear_func = np.matrix(np.matrix(-1.0 * np.mean(self.qp.quadratic_func / 2.0, axis=1))).T # First moment of the margin fixed to mu. c_matrix = np.zeros((self.qp.n_variables,)) for i in range(self.qp.n_variables): c_matrix[i] = np.mean(np.multiply(np.asarray(classification_matrix)[i], y)) column_means = np.mean(np.multiply(y, classification_matrix), axis=1) self.qp.add_equality_constraints(c_matrix, 0.5 * (self.mu + np.mean(column_means))) # Lower and upper bounds on the variables self.qp.add_lower_bound(0.0) self.qp.add_upper_bound(1.0 / n_features)