R 中 gbm 包的“多项式”分布使用什么损失函数?

数据挖掘 r gbm 多类分类
2022-02-20 20:05:26

R 中包中的所有分布gbm都与损失函数相关联。例如,当我们设置 时distribution = 'binomial',内部选择的损失函数是逻辑损失函数。谁能解释多类分类是如何工作的gbm以及用于它的损失函数,即当我们设置时distribution='multinomial'它是在后台使用一对多还是全部对多来进行多类分类?

1个回答

这是相关的源代码。我很确定它对多类使用交叉熵:

    //  GBM by Greg Ridgeway  Copyright (C) 2003

#include "multinomial.h"

CMultinomial::CMultinomial(int cNumClasses, int cRows)
{
   mcNumClasses = cNumClasses;
   mcRows = cRows;

   madProb = new double[cNumClasses * cRows];
}

CMultinomial::~CMultinomial()
{
   if(madProb != NULL)
   {
      delete [] madProb;
   }
}


GBMRESULT CMultinomial::UpdateParams
(
   double *adF,
   double *adOffset,
   double *adWeight,
   unsigned long cLength
)
{
   // Local variables
   unsigned long ii=0;
   unsigned long kk=0;

   // Set the probabilities for each observation in each class
   for (ii = 0; ii < mcRows; ii++)
   {
      double dClassSum = 0.0;
      for (kk = 0; kk < mcNumClasses; kk++)
      {
         int iIdx = ii + kk * mcRows;
         double dF = (adOffset == NULL) ? adF[iIdx] : adF[iIdx] + adOffset[iIdx];
         madProb[iIdx] = adWeight[iIdx] * exp(dF);
         dClassSum += adWeight[iIdx] * exp(dF);
      }

      dClassSum = (dClassSum > 0) ? dClassSum : 1e-8;

      for (kk = 0; kk < mcNumClasses; kk++)
      {
         madProb[ii + kk * mcRows] /= dClassSum;
      }
   }

   return GBM_OK; 
}


GBMRESULT CMultinomial::ComputeWorkingResponse
(
    double *adY,
    double *adMisc,
    double *adOffset,
    double *adF, 
    double *adZ, 
    double *adWeight,
    bool *afInBag,
    unsigned long nTrain,
    int cIdxOff
)
{
    unsigned long i = 0;

    for(i=cIdxOff; i<nTrain+cIdxOff; i++)
    {
       adZ[i] = adY[i] - madProb[i];
    }

    return GBM_OK;
}


GBMRESULT CMultinomial::InitF
(
    double *adY,
    double *adMisc,
    double *adOffset, 
    double *adWeight,
    double &dInitF, 
    unsigned long cLength
)
{
    dInitF = 0.0;
    return GBM_OK;
}

double CMultinomial::Deviance
(
    double *adY,
    double *adMisc,
    double *adOffset, 
    double *adWeight,
    double *adF,
    unsigned long cLength,
    int cIdxOff
)
{
    unsigned long ii=0;
    double dL = 0.0;
    double dW = 0.0;

    for(ii=cIdxOff; ii<cLength+cIdxOff; ii++)
    {
        dL += -adWeight[ii] * adY[ii] * log(madProb[ii]);
        dW += adWeight[ii];
    }

    return dL/dW;
}


GBMRESULT CMultinomial::FitBestConstant
(
    double *adY,
    double *adMisc,
    double *adOffset,
    double *adW,
    double *adF,
    double *adZ,
    unsigned long *aiNodeAssign,
    unsigned long nTrain,
    VEC_P_NODETERMINAL vecpTermNodes,
    unsigned long cTermNodes,
    unsigned long cMinObsInNode,
    bool *afInBag,
    double *adFadj,
   int cIdxOff
)
{
      // Local variables
    GBMRESULT hr = GBM_OK;
    unsigned long iNode = 0;
    unsigned long iObs = 0;

   // Call LocM for the array of values on each node
    for(iNode=0; iNode<cTermNodes; iNode++)
    {
        if(vecpTermNodes[iNode]->cN >= cMinObsInNode)
        {
         // Get the number of nodes here
         double dNum = 0.0;
         double dDenom = 0.0;
         for (iObs = 0; iObs < nTrain; iObs++)
         {
            if(afInBag[iObs] && (aiNodeAssign[iObs] == iNode))
                {
               int iIdx = iObs + cIdxOff;
                    dNum += adW[iIdx] * adZ[iIdx];
               dDenom += adW[iIdx] * fabs(adZ[iIdx]) * (1 - fabs(adZ[iIdx]));
                }
         }

         dDenom = (dDenom > 0) ? dDenom : 1e-8;

         vecpTermNodes[iNode]->dPrediction = dNum / dDenom;
        }
    }

    return hr;
}

double CMultinomial::BagImprovement
(
    double *adY,
    double *adMisc,
    double *adOffset,
    double *adWeight,
    double *adF,
    double *adFadj,
    bool *afInBag,
    double dStepSize,
    unsigned long nTrain
)
{
    double dReturnValue = 0.0;
    double dW = 0.0;

   unsigned long ii;
   unsigned long kk;

   // Calculate the probabilities after the step
   double *adStepProb = new double[mcNumClasses * mcRows];

   // Assume that this is last class - calculate new prob as in updateParams but
   // using (F_ik + ss*Fadj_ik) instead of F_ik. Then calculate OOB improve
   for (ii = 0; ii < mcRows; ii++)
   {
      double dClassSum = 0.0;
      for (kk = 0; kk < mcNumClasses; kk++)
      {
         int iIdx = ii + kk * mcRows;
         double dF = (adOffset == NULL) ? adF[iIdx] : adF[iIdx] + adOffset[iIdx];
         dF += dStepSize * adFadj[iIdx];
         adStepProb[iIdx] = adWeight[iIdx] * exp(dF);
         dClassSum += adWeight[iIdx] * exp(dF);
      }

      dClassSum = (dClassSum > 0) ? dClassSum : 1e-8;

      for (kk = 0; kk < mcNumClasses; kk++)
      {
         adStepProb[ii + kk * mcRows] /= dClassSum;
      }
   }

   // Calculate the improvement
    for(ii=0; ii<nTrain; ii++)
    {
        if(!afInBag[ii])
      {
         for (kk = 0; kk < mcNumClasses; kk++)
         {
            int iIdx = ii + kk * mcRows;
                dReturnValue += adWeight[iIdx] * adY[iIdx] * 
                               (log(adStepProb[iIdx]) - log(madProb[iIdx]));

            dW += adWeight[iIdx] * adY[iIdx];
         }
      }
    }

    return dReturnValue/dW;
}