DataFrameMethods

Instance Constructors

new DataFrameMethods(df: DataFrame)

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def columnDiff(otherDF: DataFrame): Seq[String]

Returns the columns in otherDF that aren't in self
def composeTrans(customTransforms: List[CustomTransform]): DataFrame

Executes a list of transformations in CustomTransform objects Uses function composition
def composeTransforms(transforms: (DataFrame) ⇒ DataFrame*): DataFrame

Executes a list of custom DataFrame transformations Uses function composition to run a list of DataFrame transformations.
Executes a list of custom DataFrame transformations Uses function composition to run a list of DataFrame transformations.
def withGreeting()(df: DataFrame): DataFrame = { df.withColumn("greeting", lit("hello world")) }
def withCat(name: String)(df: DataFrame): DataFrame = { df.withColumn("cats", lit(name + " meow")) }
sourceDF.composeTransforms(withGreeting(), withCat("sandy"))
def composeTransforms(transforms: List[(DataFrame) ⇒ DataFrame]): DataFrame

Executes a list of custom DataFrame transformations Uses function composition to run a list of DataFrame transformations.
Executes a list of custom DataFrame transformations Uses function composition to run a list of DataFrame transformations.
def withGreeting()(df: DataFrame): DataFrame = { df.withColumn("greeting", lit("hello world")) }
def withCat(name: String)(df: DataFrame): DataFrame = { df.withColumn("cats", lit(name + " meow")) }
val transforms = List( withGreeting()(_), withCat("sandy")(_) )
sourceDF.composeTransforms(transforms)
def containsColumn(structField: StructField): Boolean

Returns true if the DataFrame contains the StructField
Returns true if the DataFrame contains the StructField
```
sourceDF.containsColumn(StructField("team", StringType, true))
```
Returns true if sourceDF contains the StructField and false otherwise.
def containsColumn(colName: String): Boolean

Returns true if the DataFrame contains the column
Returns true if the DataFrame contains the column
```
sourceDF.containsColumn("team")
```
Returns true if sourceDF contains a column named "team" and false otherwise.
def containsColumns(colNames: String*): Boolean

Returns true if the DataFrame contains all the columns
Returns true if the DataFrame contains all the columns
```
sourceDF.containsColumns("team", "city")
```
Returns true if sourceDF contains the "team" and "city" columns and false otherwise.
def dropColumns(f: (String) ⇒ Boolean): DataFrame

Drops multiple columns that satisfy the conditions of a function Here is how to drop all columns that start with an underscore df.dropColumns(_.startsWith("_"))
def dropNestedColumn(fullColumnName: String): DataFrame

Drop nested column by specifying full name (for example foo.bar)
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
def flattenSchema(delimiter: String = "."): DataFrame

Converts all the StructType columns to regular columns This StackOverflow answer provides a detailed description how to use flattenSchema: https://stackoverflow.com/a/50402697/1125159
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def killDuplicates(): DataFrame

Completely removes all duplicates from a DataFrame
def killDuplicates(col1: String, cols: String*): DataFrame

Completely removes all duplicates from a DataFrame
def killDuplicates(cols: Column*): DataFrame

Completely removes all duplicates from a DataFrame
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def printSchemaInCodeFormat(): Unit

Prints the schema with StructType and StructFields so it's easy to copy into code Spark has a printSchema method to print the schema of a DataFrame and a schema method that returns a StructType object.
Prints the schema with StructType and StructFields so it's easy to copy into code Spark has a printSchema method to print the schema of a DataFrame and a schema method that returns a StructType object.
The Dataset#schema method can be easily converted into working code for small DataFrames, but it can be a lot of manual work for DataFrames with a lot of columns.
The printSchemaInCodeFormat DataFrame extension prints the DataFrame schema as a valid StructType object.
Suppose you have the following sourceDF:
```
+--------+--------+---------+
|    team|   sport|goals_for|
+--------+--------+---------+
|    jets|football|       45|
|nacional|  soccer|       10|
+--------+--------+---------+

`sourceDF.printSchemaInCodeFormat()` will output the following rows in the console:

StructType(
  List(
    StructField("team", StringType, true),
    StructField("sport", StringType, true),
    StructField("goals_for", IntegerType, true)
  )
)
```
def renameColumns(f: (String) ⇒ String): DataFrame

Rename columns Here is how to lowercase all the columns df.renameColumns(_.toLowerCase) Here is how to trim all the columns df.renameColumns(_.trim)
def reorderColumns(colNames: Seq[String]): DataFrame

Reorders columns as specified Reorders the columns in a DataFrame.
Reorders columns as specified Reorders the columns in a DataFrame.
```
val actualDF = sourceDF.reorderColumns(
  Seq("greeting", "team", "cats")
)
```
The actualDF will have the greeting column first, then the team column then the cats column.
def setNullableForAllColumns(nullable: Boolean): DataFrame

Makes all columns nullable or vice versa
def structureSchema(delimiter: String = "_"): DataFrame

This method is opposite of flattenSchema.
This method is opposite of flattenSchema. For example, if you have flat dataframe with snake case columns it will convert it to dataframe with nested columns.
From: root |-- person_id: long (nullable = true) |-- person_name: string (nullable = true) |-- person_surname: string (nullable = true)
To: root |-- person: struct (nullable = false) | |-- name: string (nullable = true) | |-- surname: string (nullable = true) | |-- id: long (nullable = true)
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
def trans(customTransform: CustomTransform): DataFrame

Like transform(), but for CustomTransform objects Enables you to specify the columns that should be added / removed by a custom transformations and errors out if the columns the columns that are actually added / removed are different.
Like transform(), but for CustomTransform objects Enables you to specify the columns that should be added / removed by a custom transformations and errors out if the columns the columns that are actually added / removed are different.
val actualDF = sourceDF .trans( CustomTransform( transform = ExampleTransforms.withGreeting(), addedColumns = Seq("greeting"), requiredColumns = Seq("something") ) ) .trans( CustomTransform( transform = ExampleTransforms.withCat("spanky"), addedColumns = Seq("cats") ) ) .trans( CustomTransform( transform = ExampleTransforms.dropWordCol(), removedColumns = Seq("word") ) )
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
def withColumnCast(columnName: String, newType: DataType): DataFrame

Returns a new DataFrame with the column columnName cast as newType.
Returns a new DataFrame with the column columnName cast as newType.
columnName
the column to cast
newType
the new type for columnName
def withColumnCast(columnName: String, newType: String): DataFrame

Returns a new DataFrame with the column columnName cast as newType.
Returns a new DataFrame with the column columnName cast as newType.
columnName
the column to cast
newType
the new type for columnName

Related Doc: package DataFrameExt

implicit class DataFrameMethods extends AnyRef

Instance Constructors

new DataFrameMethods(df: DataFrame)

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

def columnDiff(otherDF: DataFrame): Seq[String]

def composeTrans(customTransforms: List[CustomTransform]): DataFrame

def composeTransforms(transforms: (DataFrame) ⇒ DataFrame*): DataFrame

def composeTransforms(transforms: List[(DataFrame) ⇒ DataFrame]): DataFrame

def containsColumn(structField: StructField): Boolean

def containsColumn(colName: String): Boolean

def containsColumns(colNames: String*): Boolean

def dropColumns(f: (String) ⇒ Boolean): DataFrame

def dropNestedColumn(fullColumnName: String): DataFrame

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

def flattenSchema(delimiter: String = "."): DataFrame

final def getClass(): Class[_]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

def killDuplicates(): DataFrame

def killDuplicates(col1: String, cols: String*): DataFrame

def killDuplicates(cols: Column*): DataFrame

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def printSchemaInCodeFormat(): Unit

def renameColumns(f: (String) ⇒ String): DataFrame

def reorderColumns(colNames: Seq[String]): DataFrame

def setNullableForAllColumns(nullable: Boolean): DataFrame

def structureSchema(delimiter: String = "_"): DataFrame

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

def trans(customTransform: CustomTransform): DataFrame

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

def withColumnCast(columnName: String, newType: DataType): DataFrame

def withColumnCast(columnName: String, newType: String): DataFrame

Inherited from AnyRef

Inherited from Any

Ungrouped