Source code for mincq.voter

#-*- coding:utf-8 -*-
__author__ = "Jean-Francis Roy"

import numpy as np


[docs]class Voter(object): """ Base class for a voter (function X -> [-1, 1]), where X is an array of samples """ def __init__(self): pass
[docs] def vote(self, X): """ Returns the output of the voter, on a sample list X Parameters ---------- X : ndarray, shape=(n_samples, n_features) Input data to classify Returns ------- votes : ndarray, shape=(n_samples,) The result the the voter function, for each sample """ raise NotImplementedError("Voter.vote: Not implemented.")
[docs]class BinaryKernelVoter(Voter): """ A Binary Kernel Voter, which outputs the value of a kernel function whose first example is fixed a priori. The sign of the output depends on the label (-1 or 1) of the sample on which the kernel voter is based Parameters ---------- x : ndarray, shape=(n_features,) The base sample's description vector y : int, -1 or 1 The label of the base sample. Determines if the voter thinks "negative" or "positive" kernel_function : function The kernel function takes two samples and returns a similarity value. If the kernel has parameters, they should be set using kwargs parameter kwargs : keyword arguments (optional) Additional parameters for the kernel function """ def __init__(self, x, y, kernel_function, **kwargs): assert(y in {-1, 1}) super(BinaryKernelVoter, self).__init__() self._x = x self._y = y self._kernel_function = kernel_function self._kernel_kwargs = kwargs def vote(self, X): base_point_array = np.array([self._x]) votes = self._y * self._kernel_function(base_point_array, X, **self._kernel_kwargs) votes = np.squeeze(np.asarray(votes)) return votes
[docs]class DecisionStumpVoter(Voter): """ Generic Attribute Threshold Binary Classifier Parameters ---------- attribute_index : int The attribute to consider for the classification threshold : float The threshold value for classification rule direction : int (-1 or 1) Used to reverse classification decision """ def __init__(self, attribute_index, threshold, direction=1): super(DecisionStumpVoter, self).__init__() self.attribute_index = attribute_index self.threshold = threshold self.direction = direction def vote(self, points): return [((point[self.attribute_index] > self.threshold) * 2 - 1) * self.direction for point in points]
[docs]class VotersGenerator(object): """ Base class to create a set of voters using training samples """
[docs] def generate(self, X, y=None, self_complemented=False): """ Generates the voters using samples. Parameters ---------- X : ndarray, shape=(n_samples, n_features) Input data on which to base the voters y : ndarray, shape=(n_samples,), optional Input labels, usually determines the decision polarity of each voter self_complemented : bool Determines if complement voters should be generated or not Returns ------- voters : ndarray An array of voters """ raise NotImplementedError("VotersGenerator.generate: not implemented")
[docs]class StumpsVotersGenerator(VotersGenerator): """ Decision Stumps Voters generator. Parameters ---------- n_stumps_per_attribute : int, (default=10) Determines how many decision stumps will be created for each attribute. """ def __init__(self, n_stumps_per_attribute=10): self._n_stumps_per_attribute = n_stumps_per_attribute def _find_extremums(self, X, i): mini = np.Infinity maxi = -np.Infinity for x in X: if x[i] < mini: mini = x[i] if x[i] > maxi: maxi = x[i] return mini, maxi def generate(self, X, y=None, self_complemented=False): voters = [] if len(X) != 0: for i in range(len(X[0])): t = self._find_extremums(X, i) inter = (t[1] - t[0]) / (self._n_stumps_per_attribute + 1) if inter != 0: # If inter is zero, the attribute is useless as it has a constant value. We do not add stumps for # this attribute. for x in range(self._n_stumps_per_attribute): voters.append(DecisionStumpVoter(i, t[0] + inter * (x + 1), 1)) if self_complemented: voters.append(DecisionStumpVoter(i, t[0] + inter * (x + 1), -1)) return np.array(voters)
[docs]class KernelVotersGenerator(VotersGenerator): """ Utility function to create binary kernel voters for each (x, y) sample. Parameters ---------- kernel_function : function The kernel function takes two samples and returns a similarity value. If the kernel has parameters, they should be set using kwargs parameter kwargs : keyword arguments (optional) Additional parameters for the kernel function """ def __init__(self, kernel_function, **kwargs): self._kernel_function = kernel_function self._kernel_kwargs = kwargs def generate(self, X, y=None, self_complemented=False): if y is None: y = 1 voters = [] for point, label in zip(X, y): voters.append(BinaryKernelVoter(point, label, self._kernel_function, **self._kernel_kwargs)) if self_complemented: for point, label in zip(X, y): voters.append(BinaryKernelVoter(point, -1 * label, self._kernel_function, **self._kernel_kwargs)) return np.array(voters)