PCASageMakerEstimator

Instance Constructors

new PCASageMakerEstimator(sagemakerRole: IAMRoleResource = IAMRoleFromConfig(), trainingInstanceType: String, trainingInstanceCount: Int, endpointInstanceType: String, endpointInitialInstanceCount: Int, requestRowSerializer: RequestRowSerializer = new ProtobufRequestRowSerializer(), responseRowDeserializer: ResponseRowDeserializer = ..., trainingInputS3DataPath: S3Resource = S3AutoCreatePath(), trainingOutputS3DataPath: S3Resource = S3AutoCreatePath(), trainingInstanceVolumeSizeInGB: Int = 1024, trainingProjectedColumns: Option[List[String]] = None, trainingChannelName: String = "train", trainingContentType: Option[String] = None, trainingS3DataDistribution: String = ..., trainingSparkDataFormat: String = "sagemaker", trainingSparkDataFormatOptions: Map[String, String] = Map(), trainingInputMode: String = TrainingInputMode.File.toString, trainingCompressionCodec: Option[String] = None, trainingMaxRuntimeInSeconds: Int = 24 * 60 * 60, trainingKmsKeyId: Option[String] = None, modelEnvironmentVariables: Map[String, String] = Map(), endpointCreationPolicy: EndpointCreationPolicy = ..., sagemakerClient: AmazonSageMaker = ..., region: Option[String] = None, s3Client: AmazonS3 = ..., stsClient: AWSSecurityTokenService = ..., modelPrependInputRowsToTransformationRows: Boolean = true, deleteStagingDataAfterTraining: Boolean = true, namePolicyFactory: NamePolicyFactory = new RandomNamePolicyFactory(), uid: String = Identifiable.randomUID("sagemaker"))

sagemakerRole
The SageMaker TrainingJob and Hosting IAM Role. Used by a SageMaker to access S3 and ECR resources. SageMaker hosted Endpoints instances launched by this Estimator run with this role.
trainingInstanceType
The SageMaker TrainingJob Instance Type to use
trainingInstanceCount
The number of instances of instanceType to run an SageMaker Training Job with
endpointInstanceType
The SageMaker Endpoint Confing instance type
endpointInitialInstanceCount
The SageMaker Endpoint Config minimum number of instances that can be used to host modelImage
requestRowSerializer
Serializes Spark DataFrame Rows for transformation by Models built from this Estimator.
responseRowDeserializer
Deserializes an Endpoint response into a series of Rows.
trainingInputS3DataPath
An S3 location to upload SageMaker Training Job input data to.
trainingOutputS3DataPath
An S3 location for SageMaker to store Training Job output data to.
trainingInstanceVolumeSizeInGB
The EBS volume size in gigabytes of each instance.
trainingProjectedColumns
The columns to project from the Dataset being fit before training. If an Optional.empty is passed then no specific projection will occur and all columns will be serialized.
trainingChannelName
The SageMaker Channel name to input serialized Dataset fit input to
trainingContentType
The MIME type of the training data.
trainingS3DataDistribution
The SageMaker Training Job S3 data distribution scheme.
trainingSparkDataFormat
The Spark Data Format name used to serialize the Dataset being fit for input to SageMaker.
trainingSparkDataFormatOptions
The Spark Data Format Options used during serialization of the Dataset being fit.
trainingInputMode
The SageMaker Training Job Channel input mode.
trainingCompressionCodec
The type of compression to use when serializing the Dataset being fit for input to SageMaker.
trainingMaxRuntimeInSeconds
A SageMaker Training Job Termination Condition MaxRuntimeInHours.
trainingKmsKeyId
A KMS key ID for the Output Data Source
modelEnvironmentVariables
The environment variables that SageMaker will set on the model container during execution.
endpointCreationPolicy
Defines how a SageMaker Endpoint referenced by a SageMakerModel is created.
sagemakerClient
Amazon SageMaker client. Used to send CreateTrainingJob, CreateModel, and CreateEndpoint requests.
region
The region in which to run the algorithm. If not specified, gets the region from the DefaultAwsRegionProviderChain.
s3Client
AmazonS3. Used to create a bucket for staging SageMaker Training Job input and/or output if either are set to S3AutoCreatePath.
stsClient
AmazonSTS. Used to resolve the account number when creating staging input / output buckets.
modelPrependInputRowsToTransformationRows
Whether the transformation result on Models built by this Estimator should also include the input Rows. If true, each output Row is formed by a concatenation of the input Row with the corresponding Row produced by SageMaker Endpoint invocation, produced by responseRowDeserializer. If false, each output Row is just taken from responseRowDeserializer.
deleteStagingDataAfterTraining
Whether to remove the training data on s3 after training is complete or failed.
namePolicyFactory
The NamePolicyFactory to use when naming SageMaker entities created during fit
uid
The unique identifier of this Estimator. Used to represent this stage in Spark ML pipelines.

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def $[T](param: Param[T]): T

Attributes
protected
Definition Classes
Params
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
val algorithmMode: Param[String]

PCA algorithm.
PCA algorithm. Supported options: "regular", "stable", and "randomized". Default: "regular".

Definition Classes
PCAParams
final def asInstanceOf[T0]: T0

Definition Classes
Any
def autoOrAboveParamValidator(lowerBound: Double, inclusive: Boolean): (String) ⇒ Boolean

Attributes
protected
Definition Classes
SageMakerAlgorithmParams
final def clear(param: Param[_]): PCASageMakerEstimator.this.type

Definition Classes
Params
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def copy(extra: ParamMap): SageMakerEstimator

Definition Classes
SageMakerEstimator → Estimator → PipelineStage → Params
def copyValues[T <: Params](to: T, extra: ParamMap): T

Attributes
protected
Definition Classes
Params
final def defaultCopy[T <: Params](extra: ParamMap): T

Attributes
protected
Definition Classes
Params
val deleteStagingDataAfterTraining: Boolean

Whether to remove the training data on s3 after training is complete or failed.
Whether to remove the training data on s3 after training is complete or failed.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val endpointCreationPolicy: EndpointCreationPolicy

Defines how a SageMaker Endpoint referenced by a SageMakerModel is created.
Defines how a SageMaker Endpoint referenced by a SageMakerModel is created.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val endpointInitialInstanceCount: Int

The SageMaker Endpoint Config minimum number of instances that can be used to host modelImage
The SageMaker Endpoint Config minimum number of instances that can be used to host modelImage

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val endpointInstanceType: String

The SageMaker Endpoint Confing instance type
The SageMaker Endpoint Confing instance type

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def explainParam(param: Param[_]): String

Definition Classes
Params
def explainParams(): String

Definition Classes
Params
val extraComponents: IntParam

Number of extra components to compute.
Number of extra components to compute. Must be -1 or > 0. Valid for "randomized" mode. Ignored by other modes. Initializes a random matrix for covariance computation independent from the desired num_components. As it grows larger, the solution is more accurate but the runtime and memory consumption increase linearly. Default: -1

Definition Classes
PCAParams
final def extractParamMap(): ParamMap

Definition Classes
Params
final def extractParamMap(extra: ParamMap): ParamMap

Definition Classes
Params
val featureDim: IntParam

The dimension of the input vectors.
The dimension of the input vectors. Must be > 0. Required.

Definition Classes
SageMakerAlgorithmParams
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
def fit(dataSet: Dataset[_]): SageMakerModel

Fits a SageMakerModel on dataSet by running a SageMaker training job.
Fits a SageMakerModel on dataSet by running a SageMaker training job.

Definition Classes
SageMakerEstimator → Estimator
def fit(dataset: Dataset[_], paramMaps: Array[ParamMap]): Seq[SageMakerModel]

Definition Classes
Estimator
Annotations
@Since( "2.0.0" )
def fit(dataset: Dataset[_], paramMap: ParamMap): SageMakerModel

Definition Classes
Estimator
Annotations
@Since( "2.0.0" )
def fit(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): SageMakerModel

Definition Classes
Estimator
Annotations
@Since( "2.0.0" ) @varargs()
final def get[T](param: Param[T]): Option[T]

Definition Classes
Params
def getAlgorithmMode: String

Definition Classes
PCAParams
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
final def getDefault[T](param: Param[T]): Option[T]

Definition Classes
Params
def getExtraComponents: Integer

Definition Classes
PCAParams
def getFeatureDim: Int

Definition Classes
SageMakerAlgorithmParams
def getMiniBatchSize: Int

Definition Classes
SageMakerAlgorithmParams
def getNumComponents: Int

Definition Classes
PCAParams
final def getOrDefault[T](param: Param[T]): T

Definition Classes
Params
def getParam(paramName: String): Param[Any]

Definition Classes
Params
def getSubtractMean: Boolean

Definition Classes
PCAParams
final def hasDefault[T](param: Param[T]): Boolean

Definition Classes
Params
def hasParam(paramName: String): Boolean

Definition Classes
Params
def hashCode(): Int

Definition Classes
AnyRef → Any
val hyperParameters: Map[String, String]

A map from hyperParameter names to their respective values for training.
A map from hyperParameter names to their respective values for training.

Definition Classes
SageMakerEstimator
def initializeLogIfNecessary(isInterpreter: Boolean): Unit

Attributes
protected
Definition Classes
Logging
final def isDefined(param: Param[_]): Boolean

Definition Classes
Params
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def isSet(param: Param[_]): Boolean

Definition Classes
Params
def isTraceEnabled(): Boolean

Attributes
protected
Definition Classes
Logging
def log: Logger

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logName: String

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
val miniBatchSize: IntParam

The number of examples in a mini-batch.
The number of examples in a mini-batch. Must be > 0. Required.

Definition Classes
SageMakerAlgorithmParams
val modelEnvironmentVariables: Map[String, String]

The environment variables that SageMaker will set on the model container during execution.
The environment variables that SageMaker will set on the model container during execution.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val modelImage: String

A SageMaker Model hosting Docker image URI.
A SageMaker Model hosting Docker image URI.

Definition Classes
SageMakerEstimator
val modelPrependInputRowsToTransformationRows: Boolean

Whether the transformation result on Models built by this Estimator should also include the input Rows.
Whether the transformation result on Models built by this Estimator should also include the input Rows. If true, each output Row is formed by a concatenation of the input Row with the corresponding Row produced by SageMaker Endpoint invocation, produced by responseRowDeserializer. If false, each output Row is just taken from responseRowDeserializer.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val namePolicyFactory: NamePolicyFactory

The NamePolicyFactory to use when naming SageMaker entities created during fit
The NamePolicyFactory to use when naming SageMaker entities created during fit

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
val numComponents: IntParam

Number of principal components.
Number of principal components. Required.

Definition Classes
PCAParams
lazy val params: Array[Param[_]]

Definition Classes
Params
def parseTrueAndFalse(param: Param[String]): Boolean

Attributes
protected
Definition Classes
SageMakerAlgorithmParams
val region: Option[String]

The region in which to run the algorithm.
The region in which to run the algorithm. If not specified, gets the region from the DefaultAwsRegionProviderChain.
val requestRowSerializer: RequestRowSerializer

Serializes Spark DataFrame Rows for transformation by Models built from this Estimator.
Serializes Spark DataFrame Rows for transformation by Models built from this Estimator.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val responseRowDeserializer: ResponseRowDeserializer

Deserializes an Endpoint response into a series of Rows.
Deserializes an Endpoint response into a series of Rows.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val s3Client: AmazonS3

AmazonS3.
AmazonS3. Used to create a bucket for staging SageMaker Training Job input and/or output if either are set to S3AutoCreatePath.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val sagemakerClient: AmazonSageMaker

Amazon SageMaker client.
Amazon SageMaker client. Used to send CreateTrainingJob, CreateModel, and CreateEndpoint requests.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val sagemakerRole: IAMRoleResource

The SageMaker TrainingJob and Hosting IAM Role.
The SageMaker TrainingJob and Hosting IAM Role. Used by a SageMaker to access S3 and ECR resources. SageMaker hosted Endpoints instances launched by this Estimator run with this role.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
final def set(paramPair: ParamPair[_]): PCASageMakerEstimator.this.type

Attributes
protected
Definition Classes
Params
final def set(param: String, value: Any): PCASageMakerEstimator.this.type

Attributes
protected
Definition Classes
Params
final def set[T](param: Param[T], value: T): PCASageMakerEstimator.this.type

Definition Classes
Params
def setAlgorithmMode(value: String): PCASageMakerEstimator.this.type
final def setDefault(paramPairs: ParamPair[_]*): PCASageMakerEstimator.this.type

Attributes
protected
Definition Classes
Params
final def setDefault[T](param: Param[T], value: T): PCASageMakerEstimator.this.type

Attributes
protected
Definition Classes
Params
def setExtraComponents(value: Int): PCASageMakerEstimator.this.type
def setFeatureDim(value: Int): PCASageMakerEstimator.this.type
def setMiniBatchSize(value: Int): PCASageMakerEstimator.this.type
def setNumComponents(value: Int): PCASageMakerEstimator.this.type
def setSubtractMean(value: Boolean): PCASageMakerEstimator.this.type
val stsClient: AWSSecurityTokenService

AmazonSTS.
AmazonSTS. Used to resolve the account number when creating staging input / output buckets.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val subtractMean: Param[String]

Whether to subtract the mean during training and inference Default: True
Whether to subtract the mean during training and inference Default: True

Definition Classes
PCAParams
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
Identifiable → AnyRef → Any
val trainingChannelName: String

The SageMaker Channel name to input serialized Dataset fit input to
The SageMaker Channel name to input serialized Dataset fit input to

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val trainingCompressionCodec: Option[String]

The type of compression to use when serializing the Dataset being fit for input to SageMaker.
The type of compression to use when serializing the Dataset being fit for input to SageMaker.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val trainingContentType: Option[String]

The MIME type of the training data.
The MIME type of the training data.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val trainingImage: String

A SageMaker Training Job Algorithm Specification Training Image Docker image URI.
A SageMaker Training Job Algorithm Specification Training Image Docker image URI.

Definition Classes
SageMakerEstimator
val trainingInputMode: String

The SageMaker Training Job Channel input mode.
The SageMaker Training Job Channel input mode.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val trainingInputS3DataPath: S3Resource

An S3 location to upload SageMaker Training Job input data to.
An S3 location to upload SageMaker Training Job input data to.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val trainingInstanceCount: Int

The number of instances of instanceType to run an SageMaker Training Job with
The number of instances of instanceType to run an SageMaker Training Job with

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val trainingInstanceType: String

The SageMaker TrainingJob Instance Type to use
The SageMaker TrainingJob Instance Type to use

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val trainingInstanceVolumeSizeInGB: Int

The EBS volume size in gigabytes of each instance.
The EBS volume size in gigabytes of each instance.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val trainingKmsKeyId: Option[String]

A KMS key ID for the Output Data Source
A KMS key ID for the Output Data Source

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val trainingMaxRuntimeInSeconds: Int

A SageMaker Training Job Termination Condition MaxRuntimeInHours.
A SageMaker Training Job Termination Condition MaxRuntimeInHours.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val trainingOutputS3DataPath: S3Resource

An S3 location for SageMaker to store Training Job output data to.
An S3 location for SageMaker to store Training Job output data to.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val trainingProjectedColumns: Option[List[String]]

The columns to project from the Dataset being fit before training.
The columns to project from the Dataset being fit before training. If an Optional.empty is passed then no specific projection will occur and all columns will be serialized.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val trainingS3DataDistribution: String

The SageMaker Training Job S3 data distribution scheme.
The SageMaker Training Job S3 data distribution scheme.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val trainingSparkDataFormat: String

The Spark Data Format name used to serialize the Dataset being fit for input to SageMaker.
The Spark Data Format name used to serialize the Dataset being fit for input to SageMaker.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
val trainingSparkDataFormatOptions: Map[String, String]

The Spark Data Format Options used during serialization of the Dataset being fit.
The Spark Data Format Options used during serialization of the Dataset being fit.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator
def transformSchema(schema: StructType): StructType

Definition Classes
PCASageMakerEstimator → SageMakerEstimator → PipelineStage
def transformSchema(schema: StructType, logging: Boolean): StructType

Attributes
protected
Definition Classes
PipelineStage
Annotations
@DeveloperApi()
val uid: String

The unique identifier of this Estimator.
The unique identifier of this Estimator. Used to represent this stage in Spark ML pipelines.

Definition Classes
PCASageMakerEstimator → SageMakerEstimator → Identifiable
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Docs: object PCASageMakerEstimator | package algorithms

class PCASageMakerEstimator extends SageMakerEstimator with PCAParams

Instance Constructors

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def $[T](param: Param[T]): T

final def ==(arg0: Any): Boolean

val algorithmMode: Param[String]

final def asInstanceOf[T0]: T0

def autoOrAboveParamValidator(lowerBound: Double, inclusive: Boolean): (String) ⇒ Boolean

final def clear(param: Param[_]): PCASageMakerEstimator.this.type

def clone(): AnyRef

def copy(extra: ParamMap): SageMakerEstimator

def copyValues[T <: Params](to: T, extra: ParamMap): T

final def defaultCopy[T <: Params](extra: ParamMap): T

val deleteStagingDataAfterTraining: Boolean

val endpointCreationPolicy: EndpointCreationPolicy

val endpointInitialInstanceCount: Int

val endpointInstanceType: String

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def explainParam(param: Param[_]): String

def explainParams(): String

val extraComponents: IntParam

final def extractParamMap(): ParamMap

final def extractParamMap(extra: ParamMap): ParamMap

val featureDim: IntParam

def finalize(): Unit

def fit(dataSet: Dataset[_]): SageMakerModel

def fit(dataset: Dataset[_], paramMaps: Array[ParamMap]): Seq[SageMakerModel]

def fit(dataset: Dataset[_], paramMap: ParamMap): SageMakerModel

def fit(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): SageMakerModel

final def get[T](param: Param[T]): Option[T]

def getAlgorithmMode: String

final def getClass(): Class[_]

final def getDefault[T](param: Param[T]): Option[T]

def getExtraComponents: Integer

def getFeatureDim: Int

def getMiniBatchSize: Int

def getNumComponents: Int

final def getOrDefault[T](param: Param[T]): T

def getParam(paramName: String): Param[Any]

def getSubtractMean: Boolean

final def hasDefault[T](param: Param[T]): Boolean

def hasParam(paramName: String): Boolean

def hashCode(): Int

val hyperParameters: Map[String, String]

def initializeLogIfNecessary(isInterpreter: Boolean): Unit

final def isDefined(param: Param[_]): Boolean

final def isInstanceOf[T0]: Boolean

final def isSet(param: Param[_]): Boolean

def isTraceEnabled(): Boolean

def log: Logger

def logDebug(msg: ⇒ String, throwable: Throwable): Unit

def logDebug(msg: ⇒ String): Unit

def logError(msg: ⇒ String, throwable: Throwable): Unit

def logError(msg: ⇒ String): Unit

def logInfo(msg: ⇒ String, throwable: Throwable): Unit

def logInfo(msg: ⇒ String): Unit

def logName: String

def logTrace(msg: ⇒ String, throwable: Throwable): Unit

def logTrace(msg: ⇒ String): Unit

def logWarning(msg: ⇒ String, throwable: Throwable): Unit

def logWarning(msg: ⇒ String): Unit

val miniBatchSize: IntParam

val modelEnvironmentVariables: Map[String, String]

val modelImage: String

val modelPrependInputRowsToTransformationRows: Boolean

val namePolicyFactory: NamePolicyFactory

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

val numComponents: IntParam

lazy val params: Array[Param[_]]

def parseTrueAndFalse(param: Param[String]): Boolean

val region: Option[String]

val requestRowSerializer: RequestRowSerializer

val responseRowDeserializer: ResponseRowDeserializer