Skip to content

Convert Eventhub Body Column from Binary to String

BinaryToStringTransformer

Bases: TransformerInterface

Converts a dataframe body column from a binary to a string.

Example

from rtdip_sdk.pipelines.transformers import BinaryToStringTransformer

binary_to_string_transformer = BinaryToStringTransformer(
    data=df,
    souce_column_name="body",
    target_column_name="body"
)

result = binary_to_string_transformer.transform()

Parameters:

Name Type Description Default
data DataFrame

Dataframe to be transformed

required
source_column_name str

Spark Dataframe column containing the Binary data

required
target_column_name str

Spark Dataframe column name to be used for the String data

required
Source code in src/sdk/python/rtdip_sdk/pipelines/transformers/spark/binary_to_string.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
class BinaryToStringTransformer(TransformerInterface):
    """
    Converts a dataframe body column from a binary to a string.

    Example
    --------
    ```python
    from rtdip_sdk.pipelines.transformers import BinaryToStringTransformer

    binary_to_string_transformer = BinaryToStringTransformer(
        data=df,
        souce_column_name="body",
        target_column_name="body"
    )

    result = binary_to_string_transformer.transform()
    ```

    Parameters:
        data (DataFrame): Dataframe to be transformed
        source_column_name (str): Spark Dataframe column containing the Binary data
        target_column_name (str): Spark Dataframe column name to be used for the String data
    """

    data: DataFrame
    source_column_name: str
    target_column_name: str

    def __init__(
        self, data: DataFrame, source_column_name: str, target_column_name: str
    ) -> None:
        self.data = data
        self.source_column_name = source_column_name
        self.target_column_name = target_column_name

    @staticmethod
    def system_type():
        """
        Attributes:
            SystemType (Environment): Requires PYSPARK
        """
        return SystemType.PYSPARK

    @staticmethod
    def libraries():
        libraries = Libraries()
        return libraries

    @staticmethod
    def settings() -> dict:
        return {}

    def pre_transform_validation(self):
        return True

    def post_transform_validation(self):
        return True

    def transform(self) -> DataFrame:
        """
        Returns:
            DataFrame: A dataframe with the body column converted to string.
        """
        return self.data.withColumn(
            self.target_column_name, self.data[self.source_column_name].cast("string")
        )

system_type() staticmethod

Attributes:

Name Type Description
SystemType Environment

Requires PYSPARK

Source code in src/sdk/python/rtdip_sdk/pipelines/transformers/spark/binary_to_string.py
56
57
58
59
60
61
62
@staticmethod
def system_type():
    """
    Attributes:
        SystemType (Environment): Requires PYSPARK
    """
    return SystemType.PYSPARK

transform()

Returns:

Name Type Description
DataFrame DataFrame

A dataframe with the body column converted to string.

Source code in src/sdk/python/rtdip_sdk/pipelines/transformers/spark/binary_to_string.py
79
80
81
82
83
84
85
86
def transform(self) -> DataFrame:
    """
    Returns:
        DataFrame: A dataframe with the body column converted to string.
    """
    return self.data.withColumn(
        self.target_column_name, self.data[self.source_column_name].cast("string")
    )