OpXGBoostClassifier

Instance Constructors

new OpXGBoostClassifier(uid: String = UID[OpXGBoostClassifier])

Type Members

final type InputFeatures = (FeatureLike[RealNN], FeatureLike[OPVector])

Input Features type

Input Features type

Definition Classes

OpPipelineStage2 → OpPipelineStage → InputParams
final type OutputFeatures = FeatureLike[Prediction]

Definition Classes

OpPipelineStage → OpPipelineStageBase

Value Members

final def !=(arg0: Any): Boolean

Definition Classes

AnyRef → Any
final def ##(): Int

Definition Classes

AnyRef → Any
final def $[T](param: Param[T]): T

Attributes

protected

Definition Classes

Params
final def ==(arg0: Any): Boolean

Definition Classes

AnyRef → Any
def MLlib2XGBoostParams: Map[String, Any]

Definition Classes

ParamMapFuncs
def XGBoostToMLlibParams(xgboostParams: Map[String, Any]): Unit

Definition Classes

ParamMapFuncs
final val alpha: DoubleParam

Definition Classes

BoosterParams
final def asInstanceOf[T0]: T0

Definition Classes

Any
final val baseMarginCol: Param[String]

Definition Classes

HasBaseMarginCol
final val baseScore: DoubleParam

Definition Classes

LearningTaskParams
final val cacheTrainingSet: BooleanParam

Definition Classes

LearningTaskParams
final def checkInputLength(features: Array[_]): Boolean

Checks the input length

Checks the input length

features

input features

returns

true is input size as expected, false otherwise

Definition Classes

OpPipelineStage2 → InputParams
def checkSerializable: Try[Unit]

Check if the stage is serializable

Check if the stage is serializable

returns

Failure if not serializable

Definition Classes

OpPipelineStageBase
final val checkpointInterval: IntParam

Definition Classes

GeneralParams
final val checkpointPath: Param[String]

Definition Classes

GeneralParams
final def clear(param: Param[_]): OpXGBoostClassifier.this.type

Definition Classes

Params
def clone(): AnyRef

Attributes

protected[java.lang]

Definition Classes

AnyRef

Annotations

@throws( ... )
final val colsampleBylevel: DoubleParam

Definition Classes

BoosterParams
final val colsampleBytree: DoubleParam

Definition Classes

BoosterParams
final val contribPredictionCol: Param[String]

Definition Classes

HasContribPredictionCol
final def copy(extra: ParamMap): OpXGBoostClassifier.this.type

This method is used to make a copy of the instance with new parameters in several methods in spark internals Default will find the constructor and make a copy for any class (AS LONG AS ALL CONSTRUCTOR PARAMS ARE VALS, this is why type tags are written as implicit vals in base classes).

This method is used to make a copy of the instance with new parameters in several methods in spark internals Default will find the constructor and make a copy for any class (AS LONG AS ALL CONSTRUCTOR PARAMS ARE VALS, this is why type tags are written as implicit vals in base classes).

Note: that the convention in spark is to have the uid be a constructor argument, so that copies will share a uid with the original (developers should follow this convention).

extra

new parameters want to add to instance

returns

a new instance with the same uid

Definition Classes

OpPipelineStageBase → Params
def copyValues[T <: Params](to: T, extra: ParamMap): T

Attributes

protected

Definition Classes

Params
final val customEval: CustomEvalParam

Definition Classes

GeneralParams
final val customObj: CustomObjParam

Definition Classes

GeneralParams
final def defaultCopy[T <: Params](extra: ParamMap): T

Attributes

protected

Definition Classes

Params
final def eq(arg0: AnyRef): Boolean

Definition Classes

AnyRef
def equals(arg0: Any): Boolean

Definition Classes

AnyRef → Any
final val eta: DoubleParam

Definition Classes

BoosterParams
final val evalMetric: Param[String]

Definition Classes

LearningTaskParams
var evalSetsMap: Map[String, DataFrame]

Attributes

protected

Definition Classes

NonParamVariables
def explainParam(param: Param[_]): String

Definition Classes

Params
def explainParams(): String

Definition Classes

Params
final def extractParamMap(): ParamMap

Definition Classes

Params
final def extractParamMap(extra: ParamMap): ParamMap

Definition Classes

Params
def finalize(): Unit

Attributes

protected[java.lang]

Definition Classes

AnyRef

Annotations

@throws( classOf[java.lang.Throwable] )
def fit(dataset: Dataset[_]): OpPredictorWrapperModel[XGBoostClassificationModel]

Function that fits the binary model

Function that fits the binary model

Definition Classes

OpPredictorWrapper → Estimator
def fit(dataset: Dataset[_], paramMaps: Array[ParamMap]): Seq[OpPredictorWrapperModel[XGBoostClassificationModel]]

Definition Classes

Estimator

Annotations

@Since( "2.0.0" )
def fit(dataset: Dataset[_], paramMap: ParamMap): OpPredictorWrapperModel[XGBoostClassificationModel]

Definition Classes

Estimator

Annotations

@Since( "2.0.0" )
def fit(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): OpPredictorWrapperModel[XGBoostClassificationModel]

Definition Classes

Estimator

Annotations

@Since( "2.0.0" ) @varargs()
final val gamma: DoubleParam

Definition Classes

BoosterParams
final def get[T](param: Param[T]): Option[T]

Definition Classes

Params
final def getAlpha: Double

Definition Classes

BoosterParams
final def getBaseMarginCol: String

Definition Classes

HasBaseMarginCol
final def getBaseScore: Double

Definition Classes

LearningTaskParams
final def getCheckpointInterval: Int

Definition Classes

GeneralParams
final def getCheckpointPath: String

Definition Classes

GeneralParams
final def getClass(): Class[_]

Definition Classes

AnyRef → Any
final def getColsampleBylevel: Double

Definition Classes

BoosterParams
final def getColsampleBytree: Double

Definition Classes

BoosterParams
final def getContribPredictionCol: String

Definition Classes

HasContribPredictionCol
final def getDefault[T](param: Param[T]): Option[T]

Definition Classes

Params
final def getEta: Double

Definition Classes

BoosterParams
final def getEvalMetric: String

Definition Classes

LearningTaskParams
def getEvalSets(params: Map[String, Any]): Map[String, DataFrame]

Definition Classes

NonParamVariables
final def getGamma: Double

Definition Classes

BoosterParams
final def getGrowPolicy: String

Definition Classes

BoosterParams
def getInputColParamNames(): Array[String]

Gets names of parameters that control input columns for Spark stage

Gets names of parameters that control input columns for Spark stage

Definition Classes

SparkWrapperParams
final def getInputFeature[T <: FeatureType](i: Int): Option[FeatureLike[T]]

Gets an input feature Note: this method IS NOT safe to use outside the driver, please use getTransientFeature method instead

Gets an input feature Note: this method IS NOT safe to use outside the driver, please use getTransientFeature method instead

returns

array of features

Definition Classes

InputParams

Exceptions thrown

NoSuchElementException if the features are not set

RuntimeException in case one of the features is null
final def getInputFeatures(): Array[OPFeature]

Gets the input features Note: this method IS NOT safe to use outside the driver, please use getTransientFeatures method instead

Gets the input features Note: this method IS NOT safe to use outside the driver, please use getTransientFeatures method instead

returns

array of features

Definition Classes

InputParams

Exceptions thrown

NoSuchElementException if the features are not set

RuntimeException in case one of the features is null
final def getInputSchema(): StructType

Definition Classes

OpPipelineStageParams
final def getInteractionConstraints: String

Definition Classes

BoosterParams
final def getLambda: Double

Definition Classes

BoosterParams
final def getLambdaBias: Double

Definition Classes

BoosterParams
final def getLeafPredictionCol: String

Definition Classes

HasLeafPredictionCol
def getLocalMlStage(): Option[Transformer]

Method to access the local version of stage being wrapped

Method to access the local version of stage being wrapped

returns

Option of ml leap runtime version of the spark stage after reloading as local

Definition Classes

SparkWrapperParams
final def getMaxBins: Int

Definition Classes

BoosterParams
final def getMaxDeltaStep: Double

Definition Classes

BoosterParams
final def getMaxDepth: Int

Definition Classes

BoosterParams
final def getMaxLeaves: Int

Definition Classes

BoosterParams
final def getMaximizeEvaluationMetrics: Boolean

Definition Classes

LearningTaskParams
final def getMetadata(): Metadata

Definition Classes

OpPipelineStageParams
final def getMinChildWeight: Double

Definition Classes

BoosterParams
final def getMissing: Float

Definition Classes

GeneralParams
final def getMonotoneConstraints: String

Definition Classes

BoosterParams
final def getNormalizeType: String

Definition Classes

BoosterParams
final def getNthread: Int

Definition Classes

GeneralParams
final def getNumClass: Int

Definition Classes

HasNumClass
final def getNumEarlyStoppingRounds: Int

Definition Classes

LearningTaskParams
final def getNumRound: Int

Definition Classes

GeneralParams
final def getNumWorkers: Int

Definition Classes

GeneralParams
final def getObjective: String

Definition Classes

LearningTaskParams
final def getObjectiveType: String

Definition Classes

LearningTaskParams
final def getOrDefault[T](param: Param[T]): T

Definition Classes

Params
def getOutput(): FeatureLike[Prediction]

Output features that will be created by this stage

Output features that will be created by this stage

returns

feature of type OutputFeatures

Definition Classes

HasOut → OpPipelineStageBase
def getOutputColParamNames(): Array[String]

Gets names of parameters that control output columns for Spark stage

Gets names of parameters that control output columns for Spark stage

Definition Classes

SparkWrapperParams
final def getOutputFeatureName: String

Name of output feature (i.e.

Name of output feature (i.e. column created by this stage)

Definition Classes

OpPipelineStage
def getParam(paramName: String): Param[Any]

Definition Classes

Params
final def getRateDrop: Double

Definition Classes

BoosterParams
final def getSampleType: String

Definition Classes

BoosterParams
final def getScalePosWeight: Double

Definition Classes

BoosterParams
final def getSeed: Long

Definition Classes

GeneralParams
final def getSilent: Int

Definition Classes

GeneralParams
final def getSketchEps: Double

Definition Classes

BoosterParams
final def getSkipDrop: Double

Definition Classes

BoosterParams
def getSparkMlStage(): Option[XGBoostClassifier]

Method to access the spark stage being wrapped

Method to access the spark stage being wrapped

returns

Option of spark ml stage

Definition Classes

SparkWrapperParams
def getStageSavePath(): Option[String]

Gets a save path for wrapped spark stage

Gets a save path for wrapped spark stage

Definition Classes

SparkWrapperParams
final def getSubsample: Double

Definition Classes

BoosterParams
final def getTimeoutRequestWorkers: Long

Definition Classes

GeneralParams
final def getTrainTestRatio: Double

Definition Classes

LearningTaskParams
final def getTransientFeature(i: Int): Option[TransientFeature]

Gets an input feature at index i

Gets an input feature at index i

i

input index

returns

maybe an input feature

Definition Classes

InputParams
final def getTransientFeatures(): Array[TransientFeature]

Gets the input Features

Gets the input Features

returns

input features

Definition Classes

InputParams
final def getTreeLimit: Int

Definition Classes

BoosterParams
final def getTreeMethod: String

Definition Classes

BoosterParams
final def getUseExternalMemory: Boolean

Definition Classes

GeneralParams
final def getVerbosity: Int

Definition Classes

GeneralParams
final def getWeightCol: String

Definition Classes

HasWeightCol
final val growPolicy: Param[String]

Definition Classes

BoosterParams
final def hasDefault[T](param: Param[T]): Boolean

Definition Classes

Params
def hasParam(paramName: String): Boolean

Definition Classes

Params
def hashCode(): Int

Definition Classes

AnyRef → Any
final def in1: TransientFeature

Attributes

protected

Definition Classes

HasIn1
final def in2: TransientFeature

Attributes

protected

Definition Classes

HasIn2
def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean

Attributes

protected

Definition Classes

Logging
def initializeLogIfNecessary(isInterpreter: Boolean): Unit

Attributes

protected

Definition Classes

Logging
final def inputAsArray(in: InputFeatures): Array[OPFeature]

Function to convert InputFeatures to an Array of FeatureLike

Function to convert InputFeatures to an Array of FeatureLike

returns

an Array of FeatureLike

Definition Classes

OpPipelineStage2 → InputParams
val inputParam1Name: String

Definition Classes

OpPredictorWrapper
val inputParam2Name: String

Definition Classes

OpPredictorWrapper
final val interactionConstraints: Param[String]

Definition Classes

BoosterParams
final def isDefined(param: Param[_]): Boolean

Definition Classes

Params
final def isInstanceOf[T0]: Boolean

Definition Classes

Any
final def isSet(param: Param[_]): Boolean

Definition Classes

Params
def isTraceEnabled(): Boolean

Attributes

protected

Definition Classes

Logging
final val lambda: DoubleParam

Definition Classes

BoosterParams
final val lambdaBias: DoubleParam

Definition Classes

BoosterParams
final val leafPredictionCol: Param[String]

Definition Classes

HasLeafPredictionCol
def log: Logger

Attributes

protected

Definition Classes

Logging
def logDebug(msg: ⇒ String, throwable: Throwable): Unit

Attributes

protected

Definition Classes

Logging
def logDebug(msg: ⇒ String): Unit

Attributes

protected

Definition Classes

Logging
def logError(msg: ⇒ String, throwable: Throwable): Unit

Attributes

protected

Definition Classes

Logging
def logError(msg: ⇒ String): Unit

Attributes

protected

Definition Classes

Logging
def logInfo(msg: ⇒ String, throwable: Throwable): Unit

Attributes

protected

Definition Classes

Logging
def logInfo(msg: ⇒ String): Unit

Attributes

protected

Definition Classes

Logging
def logName: String

Attributes

protected

Definition Classes

Logging
def logTrace(msg: ⇒ String, throwable: Throwable): Unit

Attributes

protected

Definition Classes

Logging
def logTrace(msg: ⇒ String): Unit

Attributes

protected

Definition Classes

Logging
def logWarning(msg: ⇒ String, throwable: Throwable): Unit

Attributes

protected

Definition Classes

Logging
def logWarning(msg: ⇒ String): Unit

Attributes

protected

Definition Classes

Logging
final val maxBins: IntParam

Definition Classes

BoosterParams
final val maxDeltaStep: DoubleParam

Definition Classes

BoosterParams
final val maxDepth: IntParam

Definition Classes

BoosterParams
final val maxLeaves: IntParam

Definition Classes

BoosterParams
final val maximizeEvaluationMetrics: BooleanParam

Definition Classes

LearningTaskParams
final val minChildWeight: DoubleParam

Definition Classes

BoosterParams
final val missing: FloatParam

Definition Classes

GeneralParams
final val monotoneConstraints: Param[String]

Definition Classes

BoosterParams
final def ne(arg0: AnyRef): Boolean

Definition Classes

AnyRef
final val normalizeType: Param[String]

Definition Classes

BoosterParams
final def notify(): Unit

Definition Classes

AnyRef
final def notifyAll(): Unit

Definition Classes

AnyRef
final val nthread: IntParam

Definition Classes

GeneralParams
final val numClass: IntParam

Definition Classes

HasNumClass
final val numEarlyStoppingRounds: IntParam

Definition Classes

LearningTaskParams
final val numRound: IntParam

Definition Classes

GeneralParams
final val numWorkers: IntParam

Definition Classes

GeneralParams
final val objective: Param[String]

Definition Classes

LearningTaskParams
final val objectiveType: Param[String]

Definition Classes

LearningTaskParams
def onGetMetadata(): Unit

Function to be called on getMetadata

Function to be called on getMetadata

Attributes

protected

Definition Classes

OpPipelineStageParams
def onSetInput(): Unit

Function to be called on setInput

Function to be called on setInput

Attributes

protected

Definition Classes

OpXGBoostClassifier → InputParams
val operationName: String

Short unique name of the operation this stage performs

Short unique name of the operation this stage performs

returns

operation name

Definition Classes

OpPredictorWrapper → OpPipelineStageBase
final def outputAsArray(out: OutputFeatures): Array[OPFeature]

Function to convert OutputFeatures to an Array of FeatureLike

Function to convert OutputFeatures to an Array of FeatureLike

returns

an Array of FeatureLike

Definition Classes

OpPipelineStage → OpPipelineStageBase
def outputFeatureUid: String

Attributes

protected[com.salesforce.op]

Definition Classes

OpPipelineStage2 → OpPipelineStage
def outputIsResponse: Boolean

Should output feature be a response? Yes, if any of the input features are.

Should output feature be a response? Yes, if any of the input features are.

returns

true if the the output feature should be a response

Definition Classes

OpPipelineStage
val outputParamName: String

Definition Classes

OpPredictorWrapper
lazy val params: Array[Param[_]]

Definition Classes

Params
val predictor: XGBoostClassifier

the predictor to wrap

the predictor to wrap

Definition Classes

OpPredictorWrapper
final val rateDrop: DoubleParam

Definition Classes

BoosterParams
final val sampleType: Param[String]

Definition Classes

BoosterParams
def save(path: String): Unit

Definition Classes

MLWritable

Annotations

@Since( "1.6.0" ) @throws( ... )
final val scalePosWeight: DoubleParam

Definition Classes

BoosterParams
final val seed: LongParam

Definition Classes

GeneralParams
final def set(paramPair: ParamPair[_]): OpXGBoostClassifier.this.type

Attributes

protected

Definition Classes

Params
final def set(param: String, value: Any): OpXGBoostClassifier.this.type

Attributes

protected

Definition Classes

Params
final def set[T](param: Param[T], value: T): OpXGBoostClassifier.this.type

Definition Classes

Params
def setAlpha(value: Double): OpXGBoostClassifier.this.type

L1 regularization term on weights, increase this value will make model more conservative.

L1 regularization term on weights, increase this value will make model more conservative. [default=0]
def setBaseMarginCol(value: String): OpXGBoostClassifier.this.type

Initial prediction (aka base margin) column name.
def setBaseScore(value: Double): OpXGBoostClassifier.this.type

Specify the learning task and the corresponding learning objective.

Specify the learning task and the corresponding learning objective. options: reg:linear, reg:logistic, binary:logistic, binary:logitraw, count:poisson, multi:softmax, multi:softprob, rank:pairwise, reg:gamma. default: reg:linear
def setCheckpointInterval(value: Int): OpXGBoostClassifier.this.type

Checkpoint interval (>= 1) or disable checkpoint (-1).

Checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the trained model will get checkpointed every 10 iterations. Note: checkpoint_path must also be set if the checkpoint interval is greater than 0.
def setCheckpointPath(value: String): OpXGBoostClassifier.this.type

The hdfs folder to load and save checkpoint boosters.

The hdfs folder to load and save checkpoint boosters. default: empty_string
def setColsampleBylevel(value: Double): OpXGBoostClassifier.this.type

Subsample ratio of columns for each split, in each level.

Subsample ratio of columns for each split, in each level. [default=1] range: (0,1]
def setColsampleBytree(value: Double): OpXGBoostClassifier.this.type

Subsample ratio of columns when constructing each tree.

Subsample ratio of columns when constructing each tree. [default=1] range: (0,1]
def setCustomEval(value: EvalTrait): OpXGBoostClassifier.this.type

Customized evaluation function provided by user.

Customized evaluation function provided by user. default: null
def setCustomObj(value: ObjectiveTrait): OpXGBoostClassifier.this.type

Customized objective function provided by user.

Customized objective function provided by user. default: null
final def setDefault(paramPairs: ParamPair[_]*): OpXGBoostClassifier.this.type

Attributes

protected

Definition Classes

Params
final def setDefault[T](param: Param[T], value: T): OpXGBoostClassifier.this.type

Attributes

protected

Definition Classes

Params
def setEta(value: Double): OpXGBoostClassifier.this.type

Step size shrinkage used in update to prevents overfitting.

Step size shrinkage used in update to prevents overfitting. After each boosting step, we can directly get the weights of new features and eta actually shrinks the feature weights to make the boosting process more conservative. [default=0.3] range: [0,1]
def setEvalMetric(value: String): OpXGBoostClassifier.this.type

Evaluation metrics for validation data, a default metric will be assigned according to objective(rmse for regression, and error for classification, mean average precision for ranking).

Evaluation metrics for validation data, a default metric will be assigned according to objective(rmse for regression, and error for classification, mean average precision for ranking). options: rmse, mae, logloss, error, merror, mlogloss, auc, aucpr, ndcg, map, gamma-deviance
def setEvalSets(evalSets: Map[String, DataFrame]): OpXGBoostClassifier.this.type

Definition Classes

NonParamVariables
def setGamma(value: Double): OpXGBoostClassifier.this.type

Minimum loss reduction required to make a further partition on a leaf node of the tree.

Minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be. [default=0] range: [0, Double.MaxValue]
def setGrowPolicy(value: String): OpXGBoostClassifier.this.type

Growth policy for fast histogram algorithm
final def setInput(features: InputFeatures): OpXGBoostClassifier.this.type

Input features that will be used by the stage

Input features that will be used by the stage

returns

feature of type InputFeatures

Definition Classes

OpPipelineStageBase
final def setInputFeatures[S <: OPFeature](features: Array[S]): OpXGBoostClassifier.this.type

Sets input features

Sets input features

S

feature like type

features

array of input features

returns

this stage

Attributes

protected

Definition Classes

InputParams
def setLambda(value: Double): OpXGBoostClassifier.this.type

L2 regularization term on weights, increase this value will make model more conservative.

L2 regularization term on weights, increase this value will make model more conservative. [default=1]
def setLambdaBias(value: Double): OpXGBoostClassifier.this.type

Parameter of linear booster L2 regularization term on bias, default 0(no L1 reg on bias because it is not important)
def setMaxBins(value: Int): OpXGBoostClassifier.this.type

Maximum number of bins in histogram
def setMaxDeltaStep(value: Double): OpXGBoostClassifier.this.type

Maximum delta step we allow each tree's weight estimation to be.

Maximum delta step we allow each tree's weight estimation to be. If the value is set to 0, it means there is no constraint. If it is set to a positive value, it can help making the update step more conservative. Usually this parameter is not needed, but it might help in logistic regression when class is extremely imbalanced. Set it to value of 1-10 might help control the update. [default=0] range: [0, Double.MaxValue]
def setMaxDepth(value: Int): OpXGBoostClassifier.this.type

Maximum depth of a tree, increase this value will make model more complex / likely to be overfitting.

Maximum depth of a tree, increase this value will make model more complex / likely to be overfitting. [default=6] range: [1, Int.MaxValue]
def setMaxLeaves(value: Int): OpXGBoostClassifier.this.type

Maximum number of nodes to be added.

Maximum number of nodes to be added. Only relevant when grow_policy=lossguide is set.
def setMaximizeEvaluationMetrics(value: Boolean): OpXGBoostClassifier.this.type

Define the expected optimization to the evaluation metrics, true to maximize otherwise minimize it
final def setMetadata(m: Metadata): OpXGBoostClassifier.this.type

Definition Classes

OpPipelineStageParams
def setMinChildWeight(value: Double): OpXGBoostClassifier.this.type

Minimum sum of instance weight(hessian) needed in a child.

Minimum sum of instance weight(hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be. [default=1] range: [0, Double.MaxValue]
def setMissing(value: Float): OpXGBoostClassifier.this.type

The value treated as missing
def setNormalizeType(value: String): OpXGBoostClassifier.this.type

Parameter of Dart booster.

Parameter of Dart booster. type of normalization algorithm, options: {'tree', 'forest'}. [default="tree"]
def setNthread(value: Int): OpXGBoostClassifier.this.type

Number of threads used by per worker.

Number of threads used by per worker. default 1
def setNumClass(value: Int): OpXGBoostClassifier.this.type

Number of classes
def setNumEarlyStoppingRounds(value: Int): OpXGBoostClassifier.this.type

If non-zero, the training will be stopped after a specified number of consecutive increases in any evaluation metric.
def setNumRound(value: Int): OpXGBoostClassifier.this.type

The number of rounds for boosting
def setNumWorkers(value: Int): OpXGBoostClassifier.this.type

Number of workers used to train xgboost model.

Number of workers used to train xgboost model. default: 1
def setObjective(value: String): OpXGBoostClassifier.this.type

Specify the learning task and the corresponding learning objective.

Specify the learning task and the corresponding learning objective. options: reg:squarederror, reg:logistic, binary:logistic, binary:logitraw, count:poisson, multi:softmax, multi:softprob, rank:pairwise, reg:gamma. default: reg:squarederror
def setObjectiveType(value: String): OpXGBoostClassifier.this.type

Objective type used for training.

Objective type used for training. For options see ml.dmlc.xgboost4j.scala.spark.params.LearningTaskParams
def setOutputDF(df: DataFrame): Unit

Definition Classes

SparkWrapperParams
def setOutputFeatureName(name: String): OpXGBoostClassifier.this.type

Definition Classes

OpPipelineStage
def setRateDrop(value: Double): OpXGBoostClassifier.this.type

Parameter of Dart booster.

Parameter of Dart booster. dropout rate. [default=0.0] range: [0.0, 1.0]
def setSampleType(value: String): OpXGBoostClassifier.this.type

Parameter for Dart booster.

Parameter for Dart booster. Type of sampling algorithm. "uniform": dropped trees are selected uniformly. "weighted": dropped trees are selected in proportion to weight. [default="uniform"]
def setScalePosWeight(value: Double): OpXGBoostClassifier.this.type

Control the balance of positive and negative weights, useful for unbalanced classes.

Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: sum(negative cases) / sum(positive cases). [default=1]
def setSeed(value: Long): OpXGBoostClassifier.this.type

Random seed for the C++ part of XGBoost and train/test splitting.
def setSilent(value: Int): OpXGBoostClassifier.this.type

0 means printing running messages, 1 means silent mode.

0 means printing running messages, 1 means silent mode. default: 0
def setSketchEps(value: Double): OpXGBoostClassifier.this.type

This is only used for approximate greedy algorithm.

This is only used for approximate greedy algorithm. This roughly translated into O(1 / sketch_eps) number of bins. Compared to directly select number of bins, this comes with theoretical guarantee with sketch accuracy. [default=0.03] range: (0, 1)
def setSkipDrop(value: Double): OpXGBoostClassifier.this.type

Parameter of Dart booster.

Parameter of Dart booster. probability of skip dropout. If a dropout is skipped, new trees are added in the same manner as gbtree. [default=0.0] range: [0.0, 1.0]
def setSparkMlStage(stage: Option[XGBoostClassifier]): OpXGBoostClassifier.this.type

Attributes

protected

Definition Classes

SparkWrapperParams
def setStageSavePath(path: String): OpXGBoostClassifier.this.type

Sets a save path for wrapped spark stage

Sets a save path for wrapped spark stage

Definition Classes

SparkWrapperParams
def setSubsample(value: Double): OpXGBoostClassifier.this.type

Subsample ratio of the training instance.

Subsample ratio of the training instance. Setting it to 0.5 means that XGBoost randomly collected half of the data instances to grow trees and this will prevent overfitting. [default=1] range:(0,1]
def setTimeoutRequestWorkers(value: Long): OpXGBoostClassifier.this.type

The maximum time to wait for the job requesting new workers.

The maximum time to wait for the job requesting new workers. default: 30 minutes
def setTrackerConf(value: TrackerConf): OpXGBoostClassifier.this.type

Rabit tracker configurations.
Rabit tracker configurations. The parameter must be provided as an instance of the TrackerConf class, which has the following definition:

case class TrackerConf(workerConnectionTimeout: Duration, trainingTimeout: Duration, trackerImpl: String)

See below for detailed explanations.
- trackerImpl: Select the implementation of Rabit tracker. default: "python"
Choice between "python" or "scala". The former utilizes the Java wrapper of the Python Rabit tracker (in dmlc_core), and does not support timeout settings. The "scala" version removes Python components, and fully supports timeout settings.
- workerConnectionTimeout: the maximum wait time for all workers to connect to the tracker. default: 0 millisecond (no timeout)
The timeout value should take the time of data loading and pre-processing into account, due to the lazy execution of Spark's operations. Alternatively, you may force Spark to perform data transformation before calling XGBoost.train(), so that this timeout truly reflects the connection delay. Set a reasonable timeout value to prevent model training/testing from hanging indefinitely, possible due to network issues. Note that zero timeout value means to wait indefinitely (equivalent to Duration.Inf). Ignored if the tracker implementation is "python".
def setTrainTestRatio(value: Double): OpXGBoostClassifier.this.type

Fraction of training points to use for testing.
def setTreeMethod(value: String): OpXGBoostClassifier.this.type

The tree construction algorithm used in XGBoost.

The tree construction algorithm used in XGBoost. options: {'auto', 'exact', 'approx'} [default='auto']
def setUseExternalMemory(value: Boolean): OpXGBoostClassifier.this.type

Whether to use external memory as cache.

Whether to use external memory as cache. default: false
def setWeightCol(value: String): OpXGBoostClassifier.this.type

Weight column name.

Weight column name. If this is not set or empty, we treat all instance weights as 1.0.
final val silent: IntParam

Definition Classes

GeneralParams
final val sketchEps: DoubleParam

Definition Classes

BoosterParams
final val skipDrop: DoubleParam

Definition Classes

BoosterParams
final val sparkInputColParamNames: StringArrayParam

Definition Classes

SparkWrapperParams
final val sparkMlStage: SparkStageParam[XGBoostClassifier]

Definition Classes

SparkWrapperParams
final val sparkOutputColParamNames: StringArrayParam

Definition Classes

SparkWrapperParams
final def stageName: String

Stage unique name consisting of the stage operation name and uid

Stage unique name consisting of the stage operation name and uid

returns

stage name

Definition Classes

OpPipelineStageBase
final val subsample: DoubleParam

Definition Classes

BoosterParams
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes

AnyRef
final val timeoutRequestWorkers: LongParam

Definition Classes

GeneralParams
def toString(): String

Definition Classes

Identifiable → AnyRef → Any
final val trackerConf: TrackerConfParam

Definition Classes

GeneralParams
final val trainTestRatio: DoubleParam

Definition Classes

LearningTaskParams
final def transformSchema(schema: StructType): StructType

This function translates the input and output features into spark schema checks and changes that will occur on the underlying data frame

This function translates the input and output features into spark schema checks and changes that will occur on the underlying data frame

schema

schema of the input data frame

returns

a new schema with the output features added

Definition Classes

OpPipelineStageBase
def transformSchema(schema: StructType, logging: Boolean): StructType

Attributes

protected

Definition Classes

PipelineStage

Annotations

@DeveloperApi()
final val treeLimit: IntParam

Definition Classes

BoosterParams
final val treeMethod: Param[String]

Definition Classes

BoosterParams
implicit val tti1: scala.reflect.api.JavaUniverse.TypeTag[RealNN]

Definition Classes

OpPredictorWrapper
implicit val tti2: scala.reflect.api.JavaUniverse.TypeTag[OPVector]

Definition Classes

OpPredictorWrapper
implicit val tto: scala.reflect.api.JavaUniverse.TypeTag[Prediction]

Type tag of the output

Type tag of the output

Definition Classes

OpPredictorWrapper → HasOut
implicit val ttov: scala.reflect.api.JavaUniverse.TypeTag[Map[String, Double]]

Type tag of the output value

Type tag of the output value

Definition Classes

OpPredictorWrapper → HasOut
val uid: String

stage uid

stage uid

Definition Classes

OpPredictorWrapper → Identifiable
final val useExternalMemory: BooleanParam

Definition Classes

GeneralParams
final val verbosity: IntParam

Definition Classes

GeneralParams
final def wait(): Unit

Definition Classes

AnyRef

Annotations

@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes

AnyRef

Annotations

@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes

AnyRef

Annotations

@throws( ... )
final val weightCol: Param[String]

Definition Classes

HasWeightCol
final def write: MLWriter

Definition Classes

OpPipelineStageBase → MLWritable

Inherited from OpXGBoostClassifierParams

Inherited from OpXGBoostGeneralParamsDefaults

Inherited from XGBoostClassifierParams

Inherited from NonParamVariables

Inherited from HasContribPredictionCol

Inherited from HasLeafPredictionCol

Inherited from ParamMapFuncs

Inherited from HasNumClass

Inherited from HasBaseMarginCol

Inherited from HasWeightCol

Inherited from BoosterParams

Inherited from LearningTaskParams

Inherited from GeneralParams

Inherited from OpPredictorWrapper[XGBoostClassifier, XGBoostClassificationModel]

Related Doc: package classification

class OpXGBoostClassifier extends OpPredictorWrapper[XGBoostClassifier, XGBoostClassificationModel] with OpXGBoostClassifierParams

Instance Constructors

new OpXGBoostClassifier(uid: String = UID[OpXGBoostClassifier])

Type Members

final type InputFeatures = (FeatureLike[RealNN], FeatureLike[OPVector])

final type OutputFeatures = FeatureLike[Prediction]

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def $[T](param: Param[T]): T

final def ==(arg0: Any): Boolean

def MLlib2XGBoostParams: Map[String, Any]

def XGBoostToMLlibParams(xgboostParams: Map[String, Any]): Unit

final val alpha: DoubleParam

final def asInstanceOf[T0]: T0

final val baseMarginCol: Param[String]

final val baseScore: DoubleParam

final val cacheTrainingSet: BooleanParam

final def checkInputLength(features: Array[_]): Boolean

def checkSerializable: Try[Unit]

final val checkpointInterval: IntParam

final val checkpointPath: Param[String]

final def clear(param: Param[_]): OpXGBoostClassifier.this.type

def clone(): AnyRef

final val colsampleBylevel: DoubleParam

final val colsampleBytree: DoubleParam

final val contribPredictionCol: Param[String]

final def copy(extra: ParamMap): OpXGBoostClassifier.this.type

def copyValues[T <: Params](to: T, extra: ParamMap): T

final val customEval: CustomEvalParam

final val customObj: CustomObjParam

final def defaultCopy[T <: Params](extra: ParamMap): T

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

final val eta: DoubleParam

final val evalMetric: Param[String]

var evalSetsMap: Map[String, DataFrame]

def explainParam(param: Param[_]): String

def explainParams(): String

final def extractParamMap(): ParamMap

final def extractParamMap(extra: ParamMap): ParamMap

def finalize(): Unit

def fit(dataset: Dataset[_]): OpPredictorWrapperModel[XGBoostClassificationModel]

def fit(dataset: Dataset[_], paramMaps: Array[ParamMap]): Seq[OpPredictorWrapperModel[XGBoostClassificationModel]]

def fit(dataset: Dataset[_], paramMap: ParamMap): OpPredictorWrapperModel[XGBoostClassificationModel]

def fit(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): OpPredictorWrapperModel[XGBoostClassificationModel]

final val gamma: DoubleParam

final def get[T](param: Param[T]): Option[T]

final def getAlpha: Double

final def getBaseMarginCol: String

final def getBaseScore: Double

final def getCheckpointInterval: Int

final def getCheckpointPath: String

final def getClass(): Class[_]

final def getColsampleBylevel: Double

final def getColsampleBytree: Double

final def getContribPredictionCol: String

final def getDefault[T](param: Param[T]): Option[T]

final def getEta: Double

final def getEvalMetric: String

def getEvalSets(params: Map[String, Any]): Map[String, DataFrame]

final def getGamma: Double

final def getGrowPolicy: String

def getInputColParamNames(): Array[String]

final def getInputFeature[T <: FeatureType](i: Int): Option[FeatureLike[T]]

final def getInputFeatures(): Array[OPFeature]

final def getInputSchema(): StructType

final def getInteractionConstraints: String

final def getLambda: Double

final def getLambdaBias: Double

final def getLeafPredictionCol: String

def getLocalMlStage(): Option[Transformer]

final def getMaxBins: Int

final def getMaxDeltaStep: Double

final def getMaxDepth: Int

final def getMaxLeaves: Int

final def getMaximizeEvaluationMetrics: Boolean

final def getMetadata(): Metadata