fmeval.transforms.transform

 1from abc import ABC, abstractmethod
 2from typing import Any, Dict, List
 3
 4from fmeval.transforms.util import validate_key_uniqueness
 5from fmeval.util import assert_condition
 6
 7
 8class Transform(ABC):
 9    """A Transform represents a single operation that consumes a record and outputs another.
10
11    Typically, the output record is the same object as the input; the Transform simply
12    mutates its input (usually by augmenting it with new data). However, the output
13    record can also be a new object, independent of the input record.
14
15    The logic for creating the output record is implemented in the Transform's __call__ method,
16    which takes a record as its sole argument. Any additional data besides this record
17    that is required to perform the transformation logic should be stored as instance
18    attributes in the Transform.
19    """
20
21    def __init__(self, *args, **kwargs):
22        """Transform initializer.
23
24        Concrete subclasses of Transform should always call super().__init__
25        with every argument passed to their own __init__ method.
26        Transform.__init__ stores all positional arguments in the `args` instance
27        attribute and all keyword arguments in the `kwargs` instance attribute.
28        This data is passed to Ray when Ray creates copies of this Transform instance
29        to perform parallel execution.
30
31        Note: The `input_keys` and `output_keys` attributes are initialized to None
32        and only assigned a meaningful value if the `register_input_output_keys` method
33        is called. This method is used in conjunction with the `validate_call` decorator
34        to perform validations of the __call__ inputs and outputs at runtime.
35        While it is not strictly necessary to utilize `register_input_output_keys` and
36        `validate_call` when implementing your own transforms, these methods are used in
37        all built-in transforms.
38
39        :param *args: Variable length argument list.
40        :param **kwargs: Arbitrary keyword arguments.
41        """
42        self.args = args
43        self.kwargs = kwargs
44        self.input_keys = None
45        self.output_keys = None
46
47    @abstractmethod
48    def __call__(self, record: Dict[str, Any]) -> Dict[str, Any]:
49        """Return a record containing data that gets computed in this method.
50
51        :param record: The input record to be transformed.
52        :returns: A record containing data that gets computed in this method.
53            This record can be the same object as the input record. In this case,
54            the logic in this method should mutate the input record directly.
55        """
56
57    def __repr__(self):
58        return (
59            f"{self.__class__.__name__}(input_keys={self.input_keys}, output_keys={self.output_keys}, "
60            f"args={list(self.args)}, kwargs={self.kwargs})"
61        )
62
63    def register_input_output_keys(self, input_keys: List[str], output_keys: List[str], allow_duplicates: bool = False):
64        """Assign self.input_keys and self.output_keys attributes.
65
66        Concrete subclasses of Transform should call this method in their __init__
67        if their __call__ method is decorated with `validate_call`.
68
69        :param input_keys: The record keys corresponding to data that this Transform
70            requires as inputs.
71        :param output_keys: The keys introduced by this Transform's __call__ logic
72            that will be present in the output record. If this Transform mutates its
73            input, then these keys should be added by __call__ to the input record.
74        :param allow_duplicates: Whether to allow duplicate values in `input_keys`.
75        """
76        assert_condition(isinstance(input_keys, List), "input_keys should be a list.")
77        assert_condition(
78            all(isinstance(input_key, str) for input_key in input_keys),
79            "All keys in input_keys should be strings.",
80        )
81        if not allow_duplicates:
82            validate_key_uniqueness(input_keys)
83        assert_condition(isinstance(output_keys, List), "output_keys should be a list.")
84        assert_condition(len(output_keys) > 0, "output_keys should be a non-empty list.")
85        assert_condition(
86            all(isinstance(output_key, str) for output_key in output_keys),
87            "All keys in output_keys should be strings.",
88        )
89        validate_key_uniqueness(output_keys)
90        self.input_keys = input_keys
91        self.output_keys = output_keys
class Transform(abc.ABC):
 9class Transform(ABC):
10    """A Transform represents a single operation that consumes a record and outputs another.
11
12    Typically, the output record is the same object as the input; the Transform simply
13    mutates its input (usually by augmenting it with new data). However, the output
14    record can also be a new object, independent of the input record.
15
16    The logic for creating the output record is implemented in the Transform's __call__ method,
17    which takes a record as its sole argument. Any additional data besides this record
18    that is required to perform the transformation logic should be stored as instance
19    attributes in the Transform.
20    """
21
22    def __init__(self, *args, **kwargs):
23        """Transform initializer.
24
25        Concrete subclasses of Transform should always call super().__init__
26        with every argument passed to their own __init__ method.
27        Transform.__init__ stores all positional arguments in the `args` instance
28        attribute and all keyword arguments in the `kwargs` instance attribute.
29        This data is passed to Ray when Ray creates copies of this Transform instance
30        to perform parallel execution.
31
32        Note: The `input_keys` and `output_keys` attributes are initialized to None
33        and only assigned a meaningful value if the `register_input_output_keys` method
34        is called. This method is used in conjunction with the `validate_call` decorator
35        to perform validations of the __call__ inputs and outputs at runtime.
36        While it is not strictly necessary to utilize `register_input_output_keys` and
37        `validate_call` when implementing your own transforms, these methods are used in
38        all built-in transforms.
39
40        :param *args: Variable length argument list.
41        :param **kwargs: Arbitrary keyword arguments.
42        """
43        self.args = args
44        self.kwargs = kwargs
45        self.input_keys = None
46        self.output_keys = None
47
48    @abstractmethod
49    def __call__(self, record: Dict[str, Any]) -> Dict[str, Any]:
50        """Return a record containing data that gets computed in this method.
51
52        :param record: The input record to be transformed.
53        :returns: A record containing data that gets computed in this method.
54            This record can be the same object as the input record. In this case,
55            the logic in this method should mutate the input record directly.
56        """
57
58    def __repr__(self):
59        return (
60            f"{self.__class__.__name__}(input_keys={self.input_keys}, output_keys={self.output_keys}, "
61            f"args={list(self.args)}, kwargs={self.kwargs})"
62        )
63
64    def register_input_output_keys(self, input_keys: List[str], output_keys: List[str], allow_duplicates: bool = False):
65        """Assign self.input_keys and self.output_keys attributes.
66
67        Concrete subclasses of Transform should call this method in their __init__
68        if their __call__ method is decorated with `validate_call`.
69
70        :param input_keys: The record keys corresponding to data that this Transform
71            requires as inputs.
72        :param output_keys: The keys introduced by this Transform's __call__ logic
73            that will be present in the output record. If this Transform mutates its
74            input, then these keys should be added by __call__ to the input record.
75        :param allow_duplicates: Whether to allow duplicate values in `input_keys`.
76        """
77        assert_condition(isinstance(input_keys, List), "input_keys should be a list.")
78        assert_condition(
79            all(isinstance(input_key, str) for input_key in input_keys),
80            "All keys in input_keys should be strings.",
81        )
82        if not allow_duplicates:
83            validate_key_uniqueness(input_keys)
84        assert_condition(isinstance(output_keys, List), "output_keys should be a list.")
85        assert_condition(len(output_keys) > 0, "output_keys should be a non-empty list.")
86        assert_condition(
87            all(isinstance(output_key, str) for output_key in output_keys),
88            "All keys in output_keys should be strings.",
89        )
90        validate_key_uniqueness(output_keys)
91        self.input_keys = input_keys
92        self.output_keys = output_keys

A Transform represents a single operation that consumes a record and outputs another.

Typically, the output record is the same object as the input; the Transform simply mutates its input (usually by augmenting it with new data). However, the output record can also be a new object, independent of the input record.

The logic for creating the output record is implemented in the Transform's __call__ method, which takes a record as its sole argument. Any additional data besides this record that is required to perform the transformation logic should be stored as instance attributes in the Transform.

Transform(*args, **kwargs)
22    def __init__(self, *args, **kwargs):
23        """Transform initializer.
24
25        Concrete subclasses of Transform should always call super().__init__
26        with every argument passed to their own __init__ method.
27        Transform.__init__ stores all positional arguments in the `args` instance
28        attribute and all keyword arguments in the `kwargs` instance attribute.
29        This data is passed to Ray when Ray creates copies of this Transform instance
30        to perform parallel execution.
31
32        Note: The `input_keys` and `output_keys` attributes are initialized to None
33        and only assigned a meaningful value if the `register_input_output_keys` method
34        is called. This method is used in conjunction with the `validate_call` decorator
35        to perform validations of the __call__ inputs and outputs at runtime.
36        While it is not strictly necessary to utilize `register_input_output_keys` and
37        `validate_call` when implementing your own transforms, these methods are used in
38        all built-in transforms.
39
40        :param *args: Variable length argument list.
41        :param **kwargs: Arbitrary keyword arguments.
42        """
43        self.args = args
44        self.kwargs = kwargs
45        self.input_keys = None
46        self.output_keys = None

Transform initializer.

Concrete subclasses of Transform should always call super().__init__ with every argument passed to their own __init__ method. Transform.__init__ stores all positional arguments in the args instance attribute and all keyword arguments in the kwargs instance attribute. This data is passed to Ray when Ray creates copies of this Transform instance to perform parallel execution.

Note: The input_keys and output_keys attributes are initialized to None and only assigned a meaningful value if the register_input_output_keys method is called. This method is used in conjunction with the validate_call decorator to perform validations of the __call__ inputs and outputs at runtime. While it is not strictly necessary to utilize register_input_output_keys and validate_call when implementing your own transforms, these methods are used in all built-in transforms.

Parameters
  • *args: Variable length argument list.
  • **kwargs: Arbitrary keyword arguments.
args
kwargs
input_keys
output_keys
def register_input_output_keys( self, input_keys: List[str], output_keys: List[str], allow_duplicates: bool = False):
64    def register_input_output_keys(self, input_keys: List[str], output_keys: List[str], allow_duplicates: bool = False):
65        """Assign self.input_keys and self.output_keys attributes.
66
67        Concrete subclasses of Transform should call this method in their __init__
68        if their __call__ method is decorated with `validate_call`.
69
70        :param input_keys: The record keys corresponding to data that this Transform
71            requires as inputs.
72        :param output_keys: The keys introduced by this Transform's __call__ logic
73            that will be present in the output record. If this Transform mutates its
74            input, then these keys should be added by __call__ to the input record.
75        :param allow_duplicates: Whether to allow duplicate values in `input_keys`.
76        """
77        assert_condition(isinstance(input_keys, List), "input_keys should be a list.")
78        assert_condition(
79            all(isinstance(input_key, str) for input_key in input_keys),
80            "All keys in input_keys should be strings.",
81        )
82        if not allow_duplicates:
83            validate_key_uniqueness(input_keys)
84        assert_condition(isinstance(output_keys, List), "output_keys should be a list.")
85        assert_condition(len(output_keys) > 0, "output_keys should be a non-empty list.")
86        assert_condition(
87            all(isinstance(output_key, str) for output_key in output_keys),
88            "All keys in output_keys should be strings.",
89        )
90        validate_key_uniqueness(output_keys)
91        self.input_keys = input_keys
92        self.output_keys = output_keys

Assign self.input_keys and self.output_keys attributes.

Concrete subclasses of Transform should call this method in their __init__ if their __call__ method is decorated with validate_call.

Parameters
  • input_keys: The record keys corresponding to data that this Transform requires as inputs.
  • output_keys: The keys introduced by this Transform's __call__ logic that will be present in the output record. If this Transform mutates its input, then these keys should be added by __call__ to the input record.
  • allow_duplicates: Whether to allow duplicate values in input_keys.