AggregateCSVAutoReader

Instance Constructors

new AggregateCSVAutoReader(readPath: Option[String], key: (T) ⇒ String, headers: Seq[String] = Seq.empty, options: CSVOptions = CSVDefaults.CSVOptions, timeZone: String = CSVDefaults.TimeZone, recordNamespace: String = CSVDefaults.RecordNamespace, recordName: String = CSVDefaults.RecordName, aggregateParams: AggregateParams[T])(implicit arg0: ClassTag[T])

readPath

default path to data

key

function for extracting key from avro record

headers

header of the CSV file as array, otherwise the first row is assumed as a headers line

options

CSV options

timeZone

timeZone to be used for any dateTime fields

recordNamespace

result record namespace

recordName

result record name

aggregateParams

aggregate params function for extracting timestamp of event

Value Members

final def !=(arg0: Any): Boolean

Definition Classes

AnyRef → Any
final def ##(): Int

Definition Classes

AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes

AnyRef → Any
val aggregateParams: AggregateParams[T]

aggregate params function for extracting timestamp of event

aggregate params function for extracting timestamp of event

Definition Classes

AggregateCSVAutoReader → AggregateDataReader
final def asInstanceOf[T0]: T0

Definition Classes

Any
def clone(): AnyRef

Attributes

protected[java.lang]

Definition Classes

AnyRef

Annotations

@throws( ... )
final def eq(arg0: AnyRef): Boolean

Definition Classes

AnyRef
def equals(arg0: Any): Boolean

Definition Classes

AnyRef → Any
def finalize(): Unit

Attributes

protected[java.lang]

Definition Classes

AnyRef

Annotations

@throws( classOf[java.lang.Throwable] )
final def fullTypeName: String

Full reader input type name

Full reader input type name

returns

full input type name

Definition Classes

ReaderType
final def generateDataFrame(rawFeatures: Array[OPFeature], opParams: OpParams = new OpParams())(implicit spark: SparkSession): DataFrame

Generate the Dataframe that will be used in the OpPipeline calling this method

Generate the Dataframe that will be used in the OpPipeline calling this method

rawFeatures

features to generate from the dataset read in by this reader

opParams

op parameters

spark

spark instance to do the reading and conversion from RDD to Dataframe

returns

A Dataframe containing columns with all of the raw input features expected by the pipeline

Definition Classes

AggregatedReader → DataReader → Reader
final def generateRow(key: String, records: Seq[T], rawFeatures: Array[OPFeature], schema: StructType): Option[Row]

Definition Classes

AggregateDataReader → AggregatedReader
def generateRow(key: String, record: T, rawFeatures: Array[OPFeature], schema: StructType): Option[Row]

Attributes

protected

Definition Classes

DataReader
final def getClass(): Class[_]

Definition Classes

AnyRef → Any
final def getFinalReadPath(params: OpParams): String

Default method for extracting the path used in read method.

Default method for extracting the path used in read method. The path is taken in the following order of priority: readerPath, params

returns

final path to use

Attributes

protected

Definition Classes

DataReader
def getGenStage[I](f: OPFeature): FeatureGeneratorStage[I, _ <: FeatureType]

Attributes

protected[com.salesforce.op]

Definition Classes

Reader
final def getReaderParams(opParams: OpParams): Option[ReaderParams]

Default method for extracting this reader's parameters from readerParams in OpParams

Default method for extracting this reader's parameters from readerParams in OpParams

opParams

contains map of reader type to ReaderParams instances

returns

ReaderParams instance if it exists

Definition Classes

ReaderType
final def getSchema(rawFeatures: Array[OPFeature]): StructType

Derives DataFrame schema for raw features.

Derives DataFrame schema for raw features.

rawFeatures

feature array representing raw feature-data

returns

a StructType instance

Attributes

protected

Definition Classes

DataReader
def hashCode(): Int

Definition Classes

AnyRef → Any
val headers: Seq[String]

header of the CSV file as array, otherwise the first row is assumed as a headers line

header of the CSV file as array, otherwise the first row is assumed as a headers line

Definition Classes

CSVAutoReader
final def innerJoin[U](other: DataReader[U], joinKeys: JoinKeys = JoinKeys()): JoinedDataReader[T, U]

Inner join

Inner join

U

Type of data read by right data reader

other

reader from right side of join

joinKeys

join keys to use

returns

joined reader

Definition Classes

Reader
final def isInstanceOf[T0]: Boolean

Definition Classes

Any
final def join[U](other: DataReader[U], joinType: JoinType, joinKeys: JoinKeys = JoinKeys()): JoinedDataReader[T, U]

Join readers

Join readers

U

Type of data read by right data reader

other

reader from right side of join

joinType

type of join to perform

joinKeys

join keys to use

returns

joined reader

Attributes

protected

Definition Classes

Reader
val key: (T) ⇒ String

function for extracting key from avro record

function for extracting key from avro record

Definition Classes

CSVAutoReader → ReaderKey
final def leftOuterJoin[U](other: DataReader[U], joinKeys: JoinKeys = JoinKeys()): JoinedDataReader[T, U]

Left Outer join

Left Outer join

U

Type of data read by right data reader

other

reader from right side of join

joinKeys

join keys to use

returns

joined reader

Definition Classes

Reader
final def maybeRepartition(data: Dataset[T], params: OpParams): Dataset[T]

Function to repartition the data based on the op params of this reader

Function to repartition the data based on the op params of this reader

data

dataset

params

op params

returns

maybe repartitioned dataset

Attributes

protected

Definition Classes

DataReader
final def maybeRepartition(data: RDD[T], params: OpParams): RDD[T]

Function to repartition the data based on the op params of this reader

Function to repartition the data based on the op params of this reader

data

rdd

params

op params

returns

maybe repartitioned rdd

Attributes

protected

Definition Classes

DataReader
final def ne(arg0: AnyRef): Boolean

Definition Classes

AnyRef
final def notify(): Unit

Definition Classes

AnyRef
final def notifyAll(): Unit

Definition Classes

AnyRef
val options: CSVOptions

CSV options

CSV options

Definition Classes

CSVAutoReader
final def outerJoin[U](other: DataReader[U], joinKeys: JoinKeys = JoinKeys()): JoinedDataReader[T, U]

Outer join

Outer join

U

Type of data read by right data reader

other

reader from right side of join

joinKeys

join keys to use

returns

joined reader

Definition Classes

Reader
def read(params: OpParams = new OpParams())(implicit spark: SparkSession): Either[RDD[T], Dataset[T]]

Function which reads raw data from specified location to use in Dataframe creation, i.e.

Function which reads raw data from specified location to use in Dataframe creation, i.e. generateDataFrame fun. This function returns either RDD or Dataset of the type specified by this reader. It can be overwritten to carry out any special logic required for the reader (ie filters or joins needed to produce the specified reader type).

params

parameters used to carry out specialized logic in reader (passed in from workflow)

spark

spark instance to do the reading and conversion from RDD to Dataframe

returns

either RDD or Dataset of type T

Definition Classes

CSVAutoReader → DataReader
final def readDataset(params: OpParams = new OpParams())(implicit sc: SparkSession, encoder: Encoder[T]): Dataset[T]

Function which reads raw data from specified location to use in Dataframe creation, i.e.

Function which reads raw data from specified location to use in Dataframe creation, i.e. generateDataFrame fun. This function returns a Dataset of the type specified by this reader.

params

parameters used to carry out specialized logic in reader (passed in from workflow)

sc

spark session

returns

Dataset of type T

Definition Classes

DataReader
val readPath: Option[String]

default path to data

default path to data

Definition Classes

CSVAutoReader → DataReader
final def readRDD(params: OpParams = new OpParams())(implicit sc: SparkSession): RDD[T]

Function which reads raw data from specified location to use in Dataframe creation, i.e.

Function which reads raw data from specified location to use in Dataframe creation, i.e. generateDataFrame fun. This function returns a RDD of the type specified by this reader.

params

parameters used to carry out specialized logic in reader (passed in from workflow)

sc

spark session

returns

RDD of type T

Definition Classes

DataReader
val recordName: String

result record name

result record name

Definition Classes

CSVAutoReader
val recordNamespace: String

result record namespace

result record namespace

Definition Classes

CSVAutoReader
implicit val seqEnc: Encoder[Seq[T]]

Definition Classes

AggregatedReader
implicit val strEnc: Encoder[String]

Definition Classes

AggregatedReader
final def subReaders: Seq[DataReader[_]]

All the reader's sub readers (used in joins)

All the reader's sub readers (used in joins)

returns

sub readers

Definition Classes

DataReader → Reader
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes

AnyRef
val timeZone: String

timeZone to be used for any dateTime fields

timeZone to be used for any dateTime fields

Definition Classes

CSVAutoReader
def toString(): String

Definition Classes

AnyRef → Any
implicit val tupEnc: Encoder[(String, Seq[T])]

Definition Classes

AggregatedReader
final def typeName: String

Short reader input type name

Short reader input type name

returns

short reader input type name

Definition Classes

ReaderType
final def wait(): Unit

Definition Classes

AnyRef

Annotations

@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes

AnyRef

Annotations

@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes

AnyRef

Annotations

@throws( ... )
implicit val wtt: scala.reflect.api.JavaUniverse.WeakTypeTag[T]

Reader type tag

Reader type tag

Definition Classes

CSVAutoReader → ReaderType

Related Doc: package readers

class AggregateCSVAutoReader[T <: GenericRecord] extends CSVAutoReader[T] with AggregateDataReader[T]

Instance Constructors

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

val aggregateParams: AggregateParams[T]

final def asInstanceOf[T0]: T0

def clone(): AnyRef

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def fullTypeName: String

final def generateDataFrame(rawFeatures: Array[OPFeature], opParams: OpParams = new OpParams())(implicit spark: SparkSession): DataFrame

final def generateRow(key: String, records: Seq[T], rawFeatures: Array[OPFeature], schema: StructType): Option[Row]

def generateRow(key: String, record: T, rawFeatures: Array[OPFeature], schema: StructType): Option[Row]

final def getClass(): Class[_]

final def getFinalReadPath(params: OpParams): String

def getGenStage[I](f: OPFeature): FeatureGeneratorStage[I, _ <: FeatureType]

final def getReaderParams(opParams: OpParams): Option[ReaderParams]

final def getSchema(rawFeatures: Array[OPFeature]): StructType

def hashCode(): Int

val headers: Seq[String]

final def innerJoin[U](other: DataReader[U], joinKeys: JoinKeys = JoinKeys()): JoinedDataReader[T, U]

final def isInstanceOf[T0]: Boolean

final def join[U](other: DataReader[U], joinType: JoinType, joinKeys: JoinKeys = JoinKeys()): JoinedDataReader[T, U]

val key: (T) ⇒ String

final def leftOuterJoin[U](other: DataReader[U], joinKeys: JoinKeys = JoinKeys()): JoinedDataReader[T, U]

final def maybeRepartition(data: Dataset[T], params: OpParams): Dataset[T]

final def maybeRepartition(data: RDD[T], params: OpParams): RDD[T]

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

val options: CSVOptions

final def outerJoin[U](other: DataReader[U], joinKeys: JoinKeys = JoinKeys()): JoinedDataReader[T, U]

def read(params: OpParams = new OpParams())(implicit spark: SparkSession): Either[RDD[T], Dataset[T]]

final def readDataset(params: OpParams = new OpParams())(implicit sc: SparkSession, encoder: Encoder[T]): Dataset[T]

val readPath: Option[String]

final def readRDD(params: OpParams = new OpParams())(implicit sc: SparkSession): RDD[T]

val recordName: String

val recordNamespace: String

implicit val seqEnc: Encoder[Seq[T]]

implicit val strEnc: Encoder[String]

final def subReaders: Seq[DataReader[_]]

final def synchronized[T0](arg0: ⇒ T0): T0

val timeZone: String

def toString(): String

implicit val tupEnc: Encoder[(String, Seq[T])]

final def typeName: String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

implicit val wtt: scala.reflect.api.JavaUniverse.WeakTypeTag[T]

Inherited from AggregateDataReader[T]

Inherited from AggregatedReader[T]

Inherited from CSVAutoReader[T]

Inherited from DataReader[T]

Inherited from ReaderKey[T]

Inherited from Reader[T]

Inherited from ReaderType[T]

Inherited from Serializable

Inherited from Serializable

Inherited from AnyRef

Inherited from Any

Ungrouped