Adaptive Boosting

Adaptive Boosting#

Notebook by:

Royi Avital RoyiAvital@fixelalgorithms.com

Revision History#

Version	Date	User	Content / Changes
1.0.000	11/04/2024	Royi Avital	First version

# Import Packages

# General Tools
import numpy as np
import scipy as sp
import pandas as pd

# Machine Learning
from sklearn.datasets import fetch_openml
from sklearn.ensemble import AdaBoostClassifier

# Miscellaneous
import math
import os
from platform import python_version
import random
import timeit

# Typing
from typing import Callable, Dict, List, Optional, Self, Set, Tuple, Union

# Visualization
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

# Jupyter
from IPython import get_ipython
from IPython.display import Image
from IPython.display import display
from ipywidgets import Dropdown, FloatSlider, interact, IntSlider, Layout, SelectionSlider
from ipywidgets import interact

Notations#

(?) Question to answer interactively.
(!) Simple task to add code for the notebook.
(@) Optional / Extra self practice.
(#) Note / Useful resource / Food for thought.

Code Notations:

someVar    = 2; #<! Notation for a variable
vVector    = np.random.rand(4) #<! Notation for 1D array
mMatrix    = np.random.rand(4, 3) #<! Notation for 2D array
tTensor    = np.random.rand(4, 3, 2, 3) #<! Notation for nD array (Tensor)
tuTuple    = (1, 2, 3) #<! Notation for a tuple
lList      = [1, 2, 3] #<! Notation for a list
dDict      = {1: 3, 2: 2, 3: 1} #<! Notation for a dictionary
oObj       = MyClass() #<! Notation for an object
dfData     = pd.DataFrame() #<! Notation for a data frame
dsData     = pd.Series() #<! Notation for a series
hObj       = plt.Axes() #<! Notation for an object / handler / function handler

Code Exercise#

Single line fill

vallToFill = ???

Multi Line to Fill (At least one)

# You need to start writing
????

Section to Fill

#===========================Fill This===========================#
# 1. Explanation about what to do.
# !! Remarks to follow / take under consideration.
mX = ???

???
#===============================================================#

# Configuration
# %matplotlib inline

seedNum = 512
np.random.seed(seedNum)
random.seed(seedNum)

# Matplotlib default color palette
lMatPltLibclr = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
# sns.set_theme() #>! Apply SeaBorn theme

runInGoogleColab = 'google.colab' in str(get_ipython())

# Constants

FIG_SIZE_DEF    = (8, 8)
ELM_SIZE_DEF    = 50
CLASS_COLOR     = ('b', 'r')
EDGE_COLOR      = 'k'
MARKER_SIZE_DEF = 10
LINE_WIDTH_DEF  = 2

# Courses Packages

# General Auxiliary Functions
import sys
sys.path.append('../')
sys.path.append('../../')
sys.path.append('../../../')
from utils.DataVisualization import PlotBinaryClassData, PlotDecisionBoundaryClosure

Ada Boost Classification#

In this note book we’ll use the AdaBoost based classifier.
The AdaBoost concept optimizes a sequence of estimators by optimizing the weights of the data at each iteration.

(#) The AdaBoost method is a specific case of the Gradient Boosting method.
(#) The main disadvantage of the AdaBoost (And Gradient Boosting) method is the sequential stacking of the models.

# Parameters

# Data
numSamplesCls = 500

# Model
numEst = 150
adaBoostAlg = 'SAMME'

# Data Visualization
numGridPts = 500

Generate / Load Data#

# Generate Data

vN    = np.sqrt(np.random.rand(numSamplesCls, 1)) * 480 * 2 * (np.pi / 360)
vCos  = -vN * np.cos(vN) + np.random.rand(numSamplesCls, 1) / 2
vSin  =  vN * np.sin(vN) + np.random.rand(numSamplesCls, 1) / 2
mX1   =  np.c_[vCos, vSin]
mX2   = -np.c_[vCos, vSin]
mX    =  np.r_[mX1, mX2]
vY    =  np.r_[-np.ones(numSamplesCls), np.ones(numSamplesCls)]

numSamples = np.size(vY)

vIdx0 = vY == -1
vIdx1 = vY ==  1

print(f'The features data shape: {mX.shape}')
print(f'The labels data shape: {vY.shape}')

# Decision Boundary Plotter
PlotDecisionBoundary = PlotDecisionBoundaryClosure(numGridPts, mX[:, 0].min(), mX[:, 0].max(), mX[:, 1].min(), mX[:, 1].max())

The features data shape: (1000, 2)
The labels data shape: (1000,)

Plot Data#

# Plot the Data

hF, hA = plt.subplots(figsize = (12, 8))

hA = PlotBinaryClassData(mX, vY, hA = hA)

../../../../_images/c7658c750daef47edb2360e4c98fe669cf73d78af7db67ac27635db3acae37ed.png

(?) Is there a simple feature engineering to make the data set linearly separable?

Train an Ada Boosting Model#

The model is attributes are:

\[\begin{split} \begin{align*} \text{Train error} & =\frac{1}{N}\sum_{i=1}^{N}\mathbb{I}\left\{ \hat{y}_{i}\neq y\right\} \\ \text{Train loss} & =\frac{1}{N}\sum_{i=1}^{N}\exp\left(-y_{i}H\left(\boldsymbol{x}_{i}\right)\right)\\ \hat{y}_{i} & =\text{sign}\left(H\left(\boldsymbol{x}_{i}\right)\right) \end{align*} \end{split}\]

# Constructing and Training the Model
oAdaBoost = AdaBoostClassifier(n_estimators = numEst, algorithm = adaBoostAlg)
oAdaBoost = oAdaBoost.fit(mX, vY)

# Plot the Model by Number of Estimators

def PlotAdaBoost( numEst: int, oAdaBoostCls: AdaBoostClassifier, mX: np.ndarray, vY: np.ndarray, numGridPts: int ):
    
    def Predict(oAdaBoost: AdaBoostClassifier, numEst: int, mX: np.ndarray, vY: Optional[np.ndarray] = None) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
        numSamples  = mX.shape[0]
        vW          = np.ones(numSamples) / numSamples
        vH          = np.zeros(numSamples)
        vTrainErr   = np.full(numEst, np.nan)
        vLoss       = np.full(numEst, np.nan)
        for mm in range(numEst):
            α_m = oAdaBoost.estimator_weights_[mm]
            h_m     = oAdaBoost.estimators_[mm]
            vHatYm  = h_m.predict(mX)
            vH     += α_m * vHatYm

            if vY is not None:
                vW             = vW * np.exp(-α_m * vY * h_m.predict(mX)) #<! Weights per sample
                vW            /= np.sum(vW)
                vTrainErr[mm]  = np.mean(np.sign(vH) != vY)
                vLoss[mm]      = np.mean(np.exp(-vH * vY))
        
        vH = np.sign(vH)
        
        return vH, vW, vTrainErr, vLoss
        
    v0       = np.linspace(mX[:,0].min(), mX[:,0].max(), numGridPts)
    v1       = np.linspace(mX[:,1].min(), mX[:,1].max(), numGridPts)
    XX0, XX1 = np.meshgrid(v0, v1)
    XX       = np.c_[XX0.ravel(), XX1.ravel()]

    _, vW, vTrainErr, vLoss = Predict(oAdaBoostCls, numEst, mX, vY)
    ZZ                      = Predict(oAdaBoostCls, numEst, XX)[0]
    ZZ                      = np.reshape(ZZ, XX0.shape)

    numSamples = np.size(mX, 0)

    plt.figure  (figsize = (12, 5))
    plt.subplot (1, 2, 1)
    plt.contourf(XX0, XX1, ZZ, colors = ['red', 'blue'], alpha = 0.3)
    plt.scatter (mX[vIdx0, 0], mX[vIdx0, 1], s = 50 * numSamples * vW[vIdx0], color = 'r', edgecolors = 'k')
    plt.scatter (mX[vIdx1, 0], mX[vIdx1, 1], s = 50 * numSamples * vW[vIdx1], color = 'b', edgecolors = 'k')
    plt.title   ('$M = ' + str(numEst) + '$')
                                        
    plt.subplot(1,2,2)
    plt.plot   (vTrainErr, 'b', lw = 2, label = 'Train Error')
    plt.plot   (vLoss,     'r', lw = 2, label = 'Train Loss')
    plt.grid   ()
    plt.legend ()
    
    plt.tight_layout()
    plt.show        ()

# Plotting Wrapper

hPlotAdaBoost = lambda numEst: PlotAdaBoost(numEst, oAdaBoost, mX, vY, numGridPts)

# Interactive Plot
mSlider = IntSlider(min = 1, max = numEst, step = 1, value = 1, layout = Layout(width = '80%'))
interact(hPlotAdaBoost, numEst = mSlider);

plt.show()