fmeval.transforms.transform
1from abc import ABC, abstractmethod 2from typing import Any, Dict, List 3 4from fmeval.transforms.util import validate_key_uniqueness 5from fmeval.util import assert_condition 6 7 8class Transform(ABC): 9 """A Transform represents a single operation that consumes a record and outputs another. 10 11 Typically, the output record is the same object as the input; the Transform simply 12 mutates its input (usually by augmenting it with new data). However, the output 13 record can also be a new object, independent of the input record. 14 15 The logic for creating the output record is implemented in the Transform's __call__ method, 16 which takes a record as its sole argument. Any additional data besides this record 17 that is required to perform the transformation logic should be stored as instance 18 attributes in the Transform. 19 """ 20 21 def __init__(self, *args, **kwargs): 22 """Transform initializer. 23 24 Concrete subclasses of Transform should always call super().__init__ 25 with every argument passed to their own __init__ method. 26 Transform.__init__ stores all positional arguments in the `args` instance 27 attribute and all keyword arguments in the `kwargs` instance attribute. 28 This data is passed to Ray when Ray creates copies of this Transform instance 29 to perform parallel execution. 30 31 Note: The `input_keys` and `output_keys` attributes are initialized to None 32 and only assigned a meaningful value if the `register_input_output_keys` method 33 is called. This method is used in conjunction with the `validate_call` decorator 34 to perform validations of the __call__ inputs and outputs at runtime. 35 While it is not strictly necessary to utilize `register_input_output_keys` and 36 `validate_call` when implementing your own transforms, these methods are used in 37 all built-in transforms. 38 39 :param *args: Variable length argument list. 40 :param **kwargs: Arbitrary keyword arguments. 41 """ 42 self.args = args 43 self.kwargs = kwargs 44 self.input_keys = None 45 self.output_keys = None 46 47 @abstractmethod 48 def __call__(self, record: Dict[str, Any]) -> Dict[str, Any]: 49 """Return a record containing data that gets computed in this method. 50 51 :param record: The input record to be transformed. 52 :returns: A record containing data that gets computed in this method. 53 This record can be the same object as the input record. In this case, 54 the logic in this method should mutate the input record directly. 55 """ 56 57 def __repr__(self): 58 return ( 59 f"{self.__class__.__name__}(input_keys={self.input_keys}, output_keys={self.output_keys}, " 60 f"args={list(self.args)}, kwargs={self.kwargs})" 61 ) 62 63 def register_input_output_keys(self, input_keys: List[str], output_keys: List[str], allow_duplicates: bool = False): 64 """Assign self.input_keys and self.output_keys attributes. 65 66 Concrete subclasses of Transform should call this method in their __init__ 67 if their __call__ method is decorated with `validate_call`. 68 69 :param input_keys: The record keys corresponding to data that this Transform 70 requires as inputs. 71 :param output_keys: The keys introduced by this Transform's __call__ logic 72 that will be present in the output record. If this Transform mutates its 73 input, then these keys should be added by __call__ to the input record. 74 :param allow_duplicates: Whether to allow duplicate values in `input_keys`. 75 """ 76 assert_condition(isinstance(input_keys, List), "input_keys should be a list.") 77 assert_condition( 78 all(isinstance(input_key, str) for input_key in input_keys), 79 "All keys in input_keys should be strings.", 80 ) 81 if not allow_duplicates: 82 validate_key_uniqueness(input_keys) 83 assert_condition(isinstance(output_keys, List), "output_keys should be a list.") 84 assert_condition(len(output_keys) > 0, "output_keys should be a non-empty list.") 85 assert_condition( 86 all(isinstance(output_key, str) for output_key in output_keys), 87 "All keys in output_keys should be strings.", 88 ) 89 validate_key_uniqueness(output_keys) 90 self.input_keys = input_keys 91 self.output_keys = output_keys
9class Transform(ABC): 10 """A Transform represents a single operation that consumes a record and outputs another. 11 12 Typically, the output record is the same object as the input; the Transform simply 13 mutates its input (usually by augmenting it with new data). However, the output 14 record can also be a new object, independent of the input record. 15 16 The logic for creating the output record is implemented in the Transform's __call__ method, 17 which takes a record as its sole argument. Any additional data besides this record 18 that is required to perform the transformation logic should be stored as instance 19 attributes in the Transform. 20 """ 21 22 def __init__(self, *args, **kwargs): 23 """Transform initializer. 24 25 Concrete subclasses of Transform should always call super().__init__ 26 with every argument passed to their own __init__ method. 27 Transform.__init__ stores all positional arguments in the `args` instance 28 attribute and all keyword arguments in the `kwargs` instance attribute. 29 This data is passed to Ray when Ray creates copies of this Transform instance 30 to perform parallel execution. 31 32 Note: The `input_keys` and `output_keys` attributes are initialized to None 33 and only assigned a meaningful value if the `register_input_output_keys` method 34 is called. This method is used in conjunction with the `validate_call` decorator 35 to perform validations of the __call__ inputs and outputs at runtime. 36 While it is not strictly necessary to utilize `register_input_output_keys` and 37 `validate_call` when implementing your own transforms, these methods are used in 38 all built-in transforms. 39 40 :param *args: Variable length argument list. 41 :param **kwargs: Arbitrary keyword arguments. 42 """ 43 self.args = args 44 self.kwargs = kwargs 45 self.input_keys = None 46 self.output_keys = None 47 48 @abstractmethod 49 def __call__(self, record: Dict[str, Any]) -> Dict[str, Any]: 50 """Return a record containing data that gets computed in this method. 51 52 :param record: The input record to be transformed. 53 :returns: A record containing data that gets computed in this method. 54 This record can be the same object as the input record. In this case, 55 the logic in this method should mutate the input record directly. 56 """ 57 58 def __repr__(self): 59 return ( 60 f"{self.__class__.__name__}(input_keys={self.input_keys}, output_keys={self.output_keys}, " 61 f"args={list(self.args)}, kwargs={self.kwargs})" 62 ) 63 64 def register_input_output_keys(self, input_keys: List[str], output_keys: List[str], allow_duplicates: bool = False): 65 """Assign self.input_keys and self.output_keys attributes. 66 67 Concrete subclasses of Transform should call this method in their __init__ 68 if their __call__ method is decorated with `validate_call`. 69 70 :param input_keys: The record keys corresponding to data that this Transform 71 requires as inputs. 72 :param output_keys: The keys introduced by this Transform's __call__ logic 73 that will be present in the output record. If this Transform mutates its 74 input, then these keys should be added by __call__ to the input record. 75 :param allow_duplicates: Whether to allow duplicate values in `input_keys`. 76 """ 77 assert_condition(isinstance(input_keys, List), "input_keys should be a list.") 78 assert_condition( 79 all(isinstance(input_key, str) for input_key in input_keys), 80 "All keys in input_keys should be strings.", 81 ) 82 if not allow_duplicates: 83 validate_key_uniqueness(input_keys) 84 assert_condition(isinstance(output_keys, List), "output_keys should be a list.") 85 assert_condition(len(output_keys) > 0, "output_keys should be a non-empty list.") 86 assert_condition( 87 all(isinstance(output_key, str) for output_key in output_keys), 88 "All keys in output_keys should be strings.", 89 ) 90 validate_key_uniqueness(output_keys) 91 self.input_keys = input_keys 92 self.output_keys = output_keys
A Transform represents a single operation that consumes a record and outputs another.
Typically, the output record is the same object as the input; the Transform simply mutates its input (usually by augmenting it with new data). However, the output record can also be a new object, independent of the input record.
The logic for creating the output record is implemented in the Transform's __call__ method, which takes a record as its sole argument. Any additional data besides this record that is required to perform the transformation logic should be stored as instance attributes in the Transform.
22 def __init__(self, *args, **kwargs): 23 """Transform initializer. 24 25 Concrete subclasses of Transform should always call super().__init__ 26 with every argument passed to their own __init__ method. 27 Transform.__init__ stores all positional arguments in the `args` instance 28 attribute and all keyword arguments in the `kwargs` instance attribute. 29 This data is passed to Ray when Ray creates copies of this Transform instance 30 to perform parallel execution. 31 32 Note: The `input_keys` and `output_keys` attributes are initialized to None 33 and only assigned a meaningful value if the `register_input_output_keys` method 34 is called. This method is used in conjunction with the `validate_call` decorator 35 to perform validations of the __call__ inputs and outputs at runtime. 36 While it is not strictly necessary to utilize `register_input_output_keys` and 37 `validate_call` when implementing your own transforms, these methods are used in 38 all built-in transforms. 39 40 :param *args: Variable length argument list. 41 :param **kwargs: Arbitrary keyword arguments. 42 """ 43 self.args = args 44 self.kwargs = kwargs 45 self.input_keys = None 46 self.output_keys = None
Transform initializer.
Concrete subclasses of Transform should always call super().__init__
with every argument passed to their own __init__ method.
Transform.__init__ stores all positional arguments in the args
instance
attribute and all keyword arguments in the kwargs
instance attribute.
This data is passed to Ray when Ray creates copies of this Transform instance
to perform parallel execution.
Note: The input_keys
and output_keys
attributes are initialized to None
and only assigned a meaningful value if the register_input_output_keys
method
is called. This method is used in conjunction with the validate_call
decorator
to perform validations of the __call__ inputs and outputs at runtime.
While it is not strictly necessary to utilize register_input_output_keys
and
validate_call
when implementing your own transforms, these methods are used in
all built-in transforms.
Parameters
- *args: Variable length argument list.
- **kwargs: Arbitrary keyword arguments.
64 def register_input_output_keys(self, input_keys: List[str], output_keys: List[str], allow_duplicates: bool = False): 65 """Assign self.input_keys and self.output_keys attributes. 66 67 Concrete subclasses of Transform should call this method in their __init__ 68 if their __call__ method is decorated with `validate_call`. 69 70 :param input_keys: The record keys corresponding to data that this Transform 71 requires as inputs. 72 :param output_keys: The keys introduced by this Transform's __call__ logic 73 that will be present in the output record. If this Transform mutates its 74 input, then these keys should be added by __call__ to the input record. 75 :param allow_duplicates: Whether to allow duplicate values in `input_keys`. 76 """ 77 assert_condition(isinstance(input_keys, List), "input_keys should be a list.") 78 assert_condition( 79 all(isinstance(input_key, str) for input_key in input_keys), 80 "All keys in input_keys should be strings.", 81 ) 82 if not allow_duplicates: 83 validate_key_uniqueness(input_keys) 84 assert_condition(isinstance(output_keys, List), "output_keys should be a list.") 85 assert_condition(len(output_keys) > 0, "output_keys should be a non-empty list.") 86 assert_condition( 87 all(isinstance(output_key, str) for output_key in output_keys), 88 "All keys in output_keys should be strings.", 89 ) 90 validate_key_uniqueness(output_keys) 91 self.input_keys = input_keys 92 self.output_keys = output_keys
Assign self.input_keys and self.output_keys attributes.
Concrete subclasses of Transform should call this method in their __init__
if their __call__ method is decorated with validate_call
.
Parameters
- input_keys: The record keys corresponding to data that this Transform requires as inputs.
- output_keys: The keys introduced by this Transform's __call__ logic that will be present in the output record. If this Transform mutates its input, then these keys should be added by __call__ to the input record.
- allow_duplicates: Whether to allow duplicate values in
input_keys
.