Skip to content

Convert Eventhub Body Column from Binary to String

BinaryToStringTransformer

Bases: TransformerInterface

Converts a dataframe body column from a binary to a string.

Parameters:

Name Type Description Default
data DataFrame

Dataframe to be transformed

required
source_column_name str

Spark Dataframe column containing the Binary data

required
target_column_name str

Spark Dataframe column name to be used for the String data

required
Source code in src/sdk/python/rtdip_sdk/pipelines/transformers/spark/binary_to_string.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
class BinaryToStringTransformer(TransformerInterface):
    """
    Converts a dataframe body column from a binary to a string.

    Args:
        data (DataFrame): Dataframe to be transformed
        source_column_name (str): Spark Dataframe column containing the Binary data
        target_column_name (str): Spark Dataframe column name to be used for the String data
    """

    data: DataFrame
    source_column_name: str
    target_column_name: str

    def __init__(
        self, data: DataFrame, source_column_name: str, target_column_name: str
    ) -> None:
        self.data = data
        self.source_column_name = source_column_name
        self.target_column_name = target_column_name

    @staticmethod
    def system_type():
        """
        Attributes:
            SystemType (Environment): Requires PYSPARK
        """
        return SystemType.PYSPARK

    @staticmethod
    def libraries():
        libraries = Libraries()
        return libraries

    @staticmethod
    def settings() -> dict:
        return {}

    def pre_transform_validation(self):
        return True

    def post_transform_validation(self):
        return True

    def transform(self) -> DataFrame:
        """
        Returns:
            DataFrame: A dataframe with the body column converted to string.
        """
        return self.data.withColumn(
            self.target_column_name, self.data[self.source_column_name].cast("string")
        )

system_type() staticmethod

Attributes:

Name Type Description
SystemType Environment

Requires PYSPARK

Source code in src/sdk/python/rtdip_sdk/pipelines/transformers/spark/binary_to_string.py
42
43
44
45
46
47
48
@staticmethod
def system_type():
    """
    Attributes:
        SystemType (Environment): Requires PYSPARK
    """
    return SystemType.PYSPARK

transform()

Returns:

Name Type Description
DataFrame DataFrame

A dataframe with the body column converted to string.

Source code in src/sdk/python/rtdip_sdk/pipelines/transformers/spark/binary_to_string.py
65
66
67
68
69
70
71
72
def transform(self) -> DataFrame:
    """
    Returns:
        DataFrame: A dataframe with the body column converted to string.
    """
    return self.data.withColumn(
        self.target_column_name, self.data[self.source_column_name].cast("string")
    )