fmeval.transforms.common
1import numpy as np 2from typing import Any, Dict, List, Optional 3 4 5from fmeval.model_runners.composers.composers import PromptComposer 6from fmeval.model_runners.model_runner import ModelRunner 7from fmeval.transforms.transform import Transform 8from fmeval.transforms.util import validate_call 9 10 11class GeneratePrompt(Transform): 12 """This transform augments an input record with LLM prompts constructed according to a template. 13 14 If multiple input keys are provided, this transform creates prompts out of all of them, 15 applying the same prompt template to each input. 16 """ 17 18 def __init__( 19 self, 20 input_keys: List[str], 21 output_keys: List[str], 22 prompt_template: str, 23 placeholder_to_record_key: Optional[Dict[str, str]] = None, 24 ): 25 """GeneratePrompt initializer. 26 27 :param input_keys: The keys corresponding to the text that will be used to create prompts. 28 If multiple input keys are provided, a prompt will be constructed from each input, but 29 the created prompts will all utilize the same prompt template. 30 :param output_keys: The keys corresponding to the prompts that get added by this Transform. 31 :param prompt_template: The template used to construct the prompt. 32 Example: "Summarize the following text: $model_input". 33 :param placeholder_to_record_key: The placeholders and the corresponding record keys dict. 34 Note that when using `placeholder_to_record_key`, having input keys or more than one output key 35 doesn't make much sense, as all composed prompts will be identical. 36 Example: 37 Inputs: 38 prompt_template = "Summarize $x and $y" 39 input_keys = [] 40 output_keys = ["my_prompt"] 41 placeholder_to_record_key = {"x": "statement_1", "y": "statement_2"} 42 record = {"statement_1": "some long text", "statement_2": "some other long text"} 43 Output record (only new keys and values are shown): 44 {"my_prompt": "Summarize some long text and some other long text"} 45 46 Output record (only new keys and values are shown): 47 {"my_prompt": "Summarize some long text and some other long text"} 48 """ 49 super().__init__(input_keys, output_keys, prompt_template, placeholder_to_record_key) 50 input_keys_to_register = list(placeholder_to_record_key.values()) if placeholder_to_record_key else input_keys 51 self.register_input_output_keys(input_keys_to_register, output_keys) 52 self.placeholder_to_record_key = placeholder_to_record_key 53 self.prompt_template = prompt_template 54 self.prompt_composer = PromptComposer(prompt_template) 55 56 @validate_call 57 def __call__(self, record: Dict[str, Any]) -> Dict[str, Any]: 58 """Augment the input record with LLM prompts and returns said record. 59 60 :param record: The input record. 61 :returns: The input record with prompts added in. 62 """ 63 if self.placeholder_to_record_key is not None: 64 placeholder_data_dict = { 65 placeholder_key: record[self.placeholder_to_record_key[placeholder_key]] 66 for placeholder_key in self.placeholder_to_record_key 67 } 68 for prompt_key in self.output_keys: 69 record[prompt_key] = self.prompt_composer.compose(placeholder_data_dict=placeholder_data_dict) 70 else: 71 for input_key, prompt_key in zip(self.input_keys, self.output_keys): 72 record[prompt_key] = self.prompt_composer.compose(record[input_key]) 73 return record 74 75 76class GetModelOutputs(Transform): 77 """Invokes a ModelRunner's `predict` method and augments the input record with the model output. 78 79 An instance of this transform can be configured to get model outputs for multiple inputs. 80 See __init__ docstring for more details. 81 """ 82 83 def __init__( 84 self, 85 input_to_output_keys: Dict[str, List[str]], 86 model_runner: ModelRunner, 87 ): 88 """GetModelOutputs initializer. 89 90 :param input_to_output_keys: Maps an input key (corresponding to 91 the input payload to the model) to a list of output keys, where 92 each output key corresponds to the model output that is returned 93 when calling the `predict` method of `model_runner` on the input. 94 95 Note that the reason a list of output keys is used (as opposed to 96 a singular key) is so that `model_runner` can be invoked on the 97 same input multiple times. 98 99 Note that the response payload from calling `predict` will be a tuple of the 100 form (model_output, log_probability), and this transform is only concerned with 101 the model_output element. 102 :param model_runner: The ModelRunner instance whose outputs will be obtained. 103 """ 104 super().__init__(input_to_output_keys, model_runner) 105 self.register_input_output_keys( 106 input_keys=list(input_to_output_keys.keys()), 107 output_keys=[ 108 output_key for output_key_list in input_to_output_keys.values() for output_key in output_key_list 109 ], 110 ) 111 self.input_to_output_keys = input_to_output_keys 112 self.model_runner = model_runner 113 114 @validate_call 115 def __call__(self, record: Dict[str, Any]) -> Dict[str, Any]: 116 """Augment the input record with model outputs and return said record. 117 118 :param record: The input record. 119 :returns: The input record with model output data added in. 120 """ 121 for input_key, output_keys in self.input_to_output_keys.items(): 122 for output_key in output_keys: 123 model_output, _ = self.model_runner.predict(record[input_key]) 124 record[output_key] = model_output 125 return record 126 127 128class GetLogProbabilities(Transform): 129 """Invokes a ModelRunner's `predict` method and augments the input record with the returned log probability. 130 131 This transform can obtain multiple log probabilities, by invoking the provided model on multiple inputs. 132 See the __init__ docstring for more details. 133 """ 134 135 def __init__( 136 self, 137 input_keys: List[str], 138 output_keys: List[str], 139 model_runner: ModelRunner, 140 ): 141 """GetModelOutputs initializer. 142 143 Note that the ith element of input_keys should correspond to the ith element of 144 output_keys. In other words, the log probability obtained from invoking the model 145 on the input with key input_keys[i] will be assigned the key output_keys[i]. 146 147 :param input_keys: The keys within the input record corresponding to model inputs. 148 :param output_keys: The keys corresponding to the log probability data that will get 149 added to the record by this transform. 150 :param model_runner: The ModelRunner instance whose `predict` method wil be invoked 151 to obtain the log probability. 152 """ 153 super().__init__(input_keys, output_keys, model_runner) 154 self.register_input_output_keys( 155 input_keys=input_keys, 156 output_keys=output_keys, 157 ) 158 self.model_runner = model_runner 159 160 @validate_call 161 def __call__(self, record: Dict[str, Any]) -> Dict[str, Any]: 162 """Augment the input record with the log probability that is returned by the model. 163 164 :param record: The input record. 165 :returns: The input record with log probability data added in. 166 """ 167 for input_key, output_key in zip(self.input_keys, self.output_keys): 168 _, log_prob = self.model_runner.predict(record[input_key]) 169 record[output_key] = log_prob 170 return record 171 172 173class Mean(Transform): 174 """This transform computes the arithmetic mean of specified values in a record and augments said record.""" 175 176 def __init__(self, input_keys: List[str], output_key: str): 177 """Mean initializer. 178 :param input_keys: The keys corresponding to the values to take the mean of. 179 :param output_key: The key corresponding to the mean value, which gets 180 added to the record. 181 """ 182 super().__init__(input_keys, output_key) 183 self.register_input_output_keys(input_keys, [output_key]) 184 self.output_key = output_key 185 186 @validate_call 187 def __call__(self, record: Dict[str, Any]) -> Dict[str, Any]: 188 """Augment the input record with the computed mean. 189 :param record: The input record. 190 :returns: The input record with the mean added in. 191 """ 192 avg = np.mean([record[input_key] for input_key in self.input_keys]) 193 record[self.output_key] = avg 194 return record 195 196 197class SplitWithDelimiter(Transform): 198 """This transform splits the target output a list of possible targets based on a target_output_delimiter 199 and augments the input record. 200 For example, if we had "England<OR>Uk" as a target output, record[output_key] = ["England", "Uk"]""" 201 202 def __init__(self, input_key: str, output_key: str, target_output_delimiter: str = "<OR>"): 203 """SplitWithDelimiter initializer. 204 :param input_keys: The key that corresponds to the target output string. 205 :param output_key: The key corresponding to the list of target outputs, which gets 206 added to the record. 207 """ 208 super().__init__(input_key, output_key, target_output_delimiter) 209 self.register_input_output_keys([input_key], [output_key]) 210 self.input_key = input_key 211 self.output_key = output_key 212 self.target_output_delimiter = target_output_delimiter 213 214 @validate_call 215 def __call__(self, record: Dict[str, Any]) -> Dict[str, Any]: 216 """Augment the input record with the new list of possible targets. 217 :param record: The input record. 218 :returns: The input record with the list of possible targets added in. 219 """ 220 record[self.output_key] = record[self.input_key].split(self.target_output_delimiter) 221 return record
12class GeneratePrompt(Transform): 13 """This transform augments an input record with LLM prompts constructed according to a template. 14 15 If multiple input keys are provided, this transform creates prompts out of all of them, 16 applying the same prompt template to each input. 17 """ 18 19 def __init__( 20 self, 21 input_keys: List[str], 22 output_keys: List[str], 23 prompt_template: str, 24 placeholder_to_record_key: Optional[Dict[str, str]] = None, 25 ): 26 """GeneratePrompt initializer. 27 28 :param input_keys: The keys corresponding to the text that will be used to create prompts. 29 If multiple input keys are provided, a prompt will be constructed from each input, but 30 the created prompts will all utilize the same prompt template. 31 :param output_keys: The keys corresponding to the prompts that get added by this Transform. 32 :param prompt_template: The template used to construct the prompt. 33 Example: "Summarize the following text: $model_input". 34 :param placeholder_to_record_key: The placeholders and the corresponding record keys dict. 35 Note that when using `placeholder_to_record_key`, having input keys or more than one output key 36 doesn't make much sense, as all composed prompts will be identical. 37 Example: 38 Inputs: 39 prompt_template = "Summarize $x and $y" 40 input_keys = [] 41 output_keys = ["my_prompt"] 42 placeholder_to_record_key = {"x": "statement_1", "y": "statement_2"} 43 record = {"statement_1": "some long text", "statement_2": "some other long text"} 44 Output record (only new keys and values are shown): 45 {"my_prompt": "Summarize some long text and some other long text"} 46 47 Output record (only new keys and values are shown): 48 {"my_prompt": "Summarize some long text and some other long text"} 49 """ 50 super().__init__(input_keys, output_keys, prompt_template, placeholder_to_record_key) 51 input_keys_to_register = list(placeholder_to_record_key.values()) if placeholder_to_record_key else input_keys 52 self.register_input_output_keys(input_keys_to_register, output_keys) 53 self.placeholder_to_record_key = placeholder_to_record_key 54 self.prompt_template = prompt_template 55 self.prompt_composer = PromptComposer(prompt_template) 56 57 @validate_call 58 def __call__(self, record: Dict[str, Any]) -> Dict[str, Any]: 59 """Augment the input record with LLM prompts and returns said record. 60 61 :param record: The input record. 62 :returns: The input record with prompts added in. 63 """ 64 if self.placeholder_to_record_key is not None: 65 placeholder_data_dict = { 66 placeholder_key: record[self.placeholder_to_record_key[placeholder_key]] 67 for placeholder_key in self.placeholder_to_record_key 68 } 69 for prompt_key in self.output_keys: 70 record[prompt_key] = self.prompt_composer.compose(placeholder_data_dict=placeholder_data_dict) 71 else: 72 for input_key, prompt_key in zip(self.input_keys, self.output_keys): 73 record[prompt_key] = self.prompt_composer.compose(record[input_key]) 74 return record
This transform augments an input record with LLM prompts constructed according to a template.
If multiple input keys are provided, this transform creates prompts out of all of them, applying the same prompt template to each input.
19 def __init__( 20 self, 21 input_keys: List[str], 22 output_keys: List[str], 23 prompt_template: str, 24 placeholder_to_record_key: Optional[Dict[str, str]] = None, 25 ): 26 """GeneratePrompt initializer. 27 28 :param input_keys: The keys corresponding to the text that will be used to create prompts. 29 If multiple input keys are provided, a prompt will be constructed from each input, but 30 the created prompts will all utilize the same prompt template. 31 :param output_keys: The keys corresponding to the prompts that get added by this Transform. 32 :param prompt_template: The template used to construct the prompt. 33 Example: "Summarize the following text: $model_input". 34 :param placeholder_to_record_key: The placeholders and the corresponding record keys dict. 35 Note that when using `placeholder_to_record_key`, having input keys or more than one output key 36 doesn't make much sense, as all composed prompts will be identical. 37 Example: 38 Inputs: 39 prompt_template = "Summarize $x and $y" 40 input_keys = [] 41 output_keys = ["my_prompt"] 42 placeholder_to_record_key = {"x": "statement_1", "y": "statement_2"} 43 record = {"statement_1": "some long text", "statement_2": "some other long text"} 44 Output record (only new keys and values are shown): 45 {"my_prompt": "Summarize some long text and some other long text"} 46 47 Output record (only new keys and values are shown): 48 {"my_prompt": "Summarize some long text and some other long text"} 49 """ 50 super().__init__(input_keys, output_keys, prompt_template, placeholder_to_record_key) 51 input_keys_to_register = list(placeholder_to_record_key.values()) if placeholder_to_record_key else input_keys 52 self.register_input_output_keys(input_keys_to_register, output_keys) 53 self.placeholder_to_record_key = placeholder_to_record_key 54 self.prompt_template = prompt_template 55 self.prompt_composer = PromptComposer(prompt_template)
GeneratePrompt initializer.
:param input_keys: The keys corresponding to the text that will be used to create prompts.
If multiple input keys are provided, a prompt will be constructed from each input, but
the created prompts will all utilize the same prompt template.
:param output_keys: The keys corresponding to the prompts that get added by this Transform.
:param prompt_template: The template used to construct the prompt.
Example: "Summarize the following text: $model_input".
:param placeholder_to_record_key: The placeholders and the corresponding record keys dict.
Note that when using `placeholder_to_record_key`, having input keys or more than one output key
doesn't make much sense, as all composed prompts will be identical.
Example:
Inputs:
prompt_template = "Summarize $x and $y"
input_keys = []
output_keys = ["my_prompt"]
placeholder_to_record_key = {"x": "statement_1", "y": "statement_2"}
record = {"statement_1": "some long text", "statement_2": "some other long text"}
Output record (only new keys and values are shown):
{"my_prompt": "Summarize some long text and some other long text"}
Output record (only new keys and values are shown): {"my_prompt": "Summarize some long text and some other long text"}
77class GetModelOutputs(Transform): 78 """Invokes a ModelRunner's `predict` method and augments the input record with the model output. 79 80 An instance of this transform can be configured to get model outputs for multiple inputs. 81 See __init__ docstring for more details. 82 """ 83 84 def __init__( 85 self, 86 input_to_output_keys: Dict[str, List[str]], 87 model_runner: ModelRunner, 88 ): 89 """GetModelOutputs initializer. 90 91 :param input_to_output_keys: Maps an input key (corresponding to 92 the input payload to the model) to a list of output keys, where 93 each output key corresponds to the model output that is returned 94 when calling the `predict` method of `model_runner` on the input. 95 96 Note that the reason a list of output keys is used (as opposed to 97 a singular key) is so that `model_runner` can be invoked on the 98 same input multiple times. 99 100 Note that the response payload from calling `predict` will be a tuple of the 101 form (model_output, log_probability), and this transform is only concerned with 102 the model_output element. 103 :param model_runner: The ModelRunner instance whose outputs will be obtained. 104 """ 105 super().__init__(input_to_output_keys, model_runner) 106 self.register_input_output_keys( 107 input_keys=list(input_to_output_keys.keys()), 108 output_keys=[ 109 output_key for output_key_list in input_to_output_keys.values() for output_key in output_key_list 110 ], 111 ) 112 self.input_to_output_keys = input_to_output_keys 113 self.model_runner = model_runner 114 115 @validate_call 116 def __call__(self, record: Dict[str, Any]) -> Dict[str, Any]: 117 """Augment the input record with model outputs and return said record. 118 119 :param record: The input record. 120 :returns: The input record with model output data added in. 121 """ 122 for input_key, output_keys in self.input_to_output_keys.items(): 123 for output_key in output_keys: 124 model_output, _ = self.model_runner.predict(record[input_key]) 125 record[output_key] = model_output 126 return record
Invokes a ModelRunner's predict
method and augments the input record with the model output.
An instance of this transform can be configured to get model outputs for multiple inputs. See __init__ docstring for more details.
84 def __init__( 85 self, 86 input_to_output_keys: Dict[str, List[str]], 87 model_runner: ModelRunner, 88 ): 89 """GetModelOutputs initializer. 90 91 :param input_to_output_keys: Maps an input key (corresponding to 92 the input payload to the model) to a list of output keys, where 93 each output key corresponds to the model output that is returned 94 when calling the `predict` method of `model_runner` on the input. 95 96 Note that the reason a list of output keys is used (as opposed to 97 a singular key) is so that `model_runner` can be invoked on the 98 same input multiple times. 99 100 Note that the response payload from calling `predict` will be a tuple of the 101 form (model_output, log_probability), and this transform is only concerned with 102 the model_output element. 103 :param model_runner: The ModelRunner instance whose outputs will be obtained. 104 """ 105 super().__init__(input_to_output_keys, model_runner) 106 self.register_input_output_keys( 107 input_keys=list(input_to_output_keys.keys()), 108 output_keys=[ 109 output_key for output_key_list in input_to_output_keys.values() for output_key in output_key_list 110 ], 111 ) 112 self.input_to_output_keys = input_to_output_keys 113 self.model_runner = model_runner
GetModelOutputs initializer.
Parameters
input_to_output_keys: Maps an input key (corresponding to the input payload to the model) to a list of output keys, where each output key corresponds to the model output that is returned when calling the
predict
method ofmodel_runner
on the input.Note that the reason a list of output keys is used (as opposed to a singular key) is so that
model_runner
can be invoked on the same input multiple times.Note that the response payload from calling
predict
will be a tuple of the form (model_output, log_probability), and this transform is only concerned with the model_output element.- model_runner: The ModelRunner instance whose outputs will be obtained.
129class GetLogProbabilities(Transform): 130 """Invokes a ModelRunner's `predict` method and augments the input record with the returned log probability. 131 132 This transform can obtain multiple log probabilities, by invoking the provided model on multiple inputs. 133 See the __init__ docstring for more details. 134 """ 135 136 def __init__( 137 self, 138 input_keys: List[str], 139 output_keys: List[str], 140 model_runner: ModelRunner, 141 ): 142 """GetModelOutputs initializer. 143 144 Note that the ith element of input_keys should correspond to the ith element of 145 output_keys. In other words, the log probability obtained from invoking the model 146 on the input with key input_keys[i] will be assigned the key output_keys[i]. 147 148 :param input_keys: The keys within the input record corresponding to model inputs. 149 :param output_keys: The keys corresponding to the log probability data that will get 150 added to the record by this transform. 151 :param model_runner: The ModelRunner instance whose `predict` method wil be invoked 152 to obtain the log probability. 153 """ 154 super().__init__(input_keys, output_keys, model_runner) 155 self.register_input_output_keys( 156 input_keys=input_keys, 157 output_keys=output_keys, 158 ) 159 self.model_runner = model_runner 160 161 @validate_call 162 def __call__(self, record: Dict[str, Any]) -> Dict[str, Any]: 163 """Augment the input record with the log probability that is returned by the model. 164 165 :param record: The input record. 166 :returns: The input record with log probability data added in. 167 """ 168 for input_key, output_key in zip(self.input_keys, self.output_keys): 169 _, log_prob = self.model_runner.predict(record[input_key]) 170 record[output_key] = log_prob 171 return record
Invokes a ModelRunner's predict
method and augments the input record with the returned log probability.
This transform can obtain multiple log probabilities, by invoking the provided model on multiple inputs. See the __init__ docstring for more details.
136 def __init__( 137 self, 138 input_keys: List[str], 139 output_keys: List[str], 140 model_runner: ModelRunner, 141 ): 142 """GetModelOutputs initializer. 143 144 Note that the ith element of input_keys should correspond to the ith element of 145 output_keys. In other words, the log probability obtained from invoking the model 146 on the input with key input_keys[i] will be assigned the key output_keys[i]. 147 148 :param input_keys: The keys within the input record corresponding to model inputs. 149 :param output_keys: The keys corresponding to the log probability data that will get 150 added to the record by this transform. 151 :param model_runner: The ModelRunner instance whose `predict` method wil be invoked 152 to obtain the log probability. 153 """ 154 super().__init__(input_keys, output_keys, model_runner) 155 self.register_input_output_keys( 156 input_keys=input_keys, 157 output_keys=output_keys, 158 ) 159 self.model_runner = model_runner
GetModelOutputs initializer.
Note that the ith element of input_keys should correspond to the ith element of output_keys. In other words, the log probability obtained from invoking the model on the input with key input_keys[i] will be assigned the key output_keys[i].
Parameters
- input_keys: The keys within the input record corresponding to model inputs.
- output_keys: The keys corresponding to the log probability data that will get added to the record by this transform.
- model_runner: The ModelRunner instance whose
predict
method wil be invoked to obtain the log probability.
174class Mean(Transform): 175 """This transform computes the arithmetic mean of specified values in a record and augments said record.""" 176 177 def __init__(self, input_keys: List[str], output_key: str): 178 """Mean initializer. 179 :param input_keys: The keys corresponding to the values to take the mean of. 180 :param output_key: The key corresponding to the mean value, which gets 181 added to the record. 182 """ 183 super().__init__(input_keys, output_key) 184 self.register_input_output_keys(input_keys, [output_key]) 185 self.output_key = output_key 186 187 @validate_call 188 def __call__(self, record: Dict[str, Any]) -> Dict[str, Any]: 189 """Augment the input record with the computed mean. 190 :param record: The input record. 191 :returns: The input record with the mean added in. 192 """ 193 avg = np.mean([record[input_key] for input_key in self.input_keys]) 194 record[self.output_key] = avg 195 return record
This transform computes the arithmetic mean of specified values in a record and augments said record.
177 def __init__(self, input_keys: List[str], output_key: str): 178 """Mean initializer. 179 :param input_keys: The keys corresponding to the values to take the mean of. 180 :param output_key: The key corresponding to the mean value, which gets 181 added to the record. 182 """ 183 super().__init__(input_keys, output_key) 184 self.register_input_output_keys(input_keys, [output_key]) 185 self.output_key = output_key
Mean initializer.
Parameters
- input_keys: The keys corresponding to the values to take the mean of.
- output_key: The key corresponding to the mean value, which gets added to the record.
198class SplitWithDelimiter(Transform): 199 """This transform splits the target output a list of possible targets based on a target_output_delimiter 200 and augments the input record. 201 For example, if we had "England<OR>Uk" as a target output, record[output_key] = ["England", "Uk"]""" 202 203 def __init__(self, input_key: str, output_key: str, target_output_delimiter: str = "<OR>"): 204 """SplitWithDelimiter initializer. 205 :param input_keys: The key that corresponds to the target output string. 206 :param output_key: The key corresponding to the list of target outputs, which gets 207 added to the record. 208 """ 209 super().__init__(input_key, output_key, target_output_delimiter) 210 self.register_input_output_keys([input_key], [output_key]) 211 self.input_key = input_key 212 self.output_key = output_key 213 self.target_output_delimiter = target_output_delimiter 214 215 @validate_call 216 def __call__(self, record: Dict[str, Any]) -> Dict[str, Any]: 217 """Augment the input record with the new list of possible targets. 218 :param record: The input record. 219 :returns: The input record with the list of possible targets added in. 220 """ 221 record[self.output_key] = record[self.input_key].split(self.target_output_delimiter) 222 return record
This transform splits the target output a list of possible targets based on a target_output_delimiter
and augments the input record.
For example, if we had "England
203 def __init__(self, input_key: str, output_key: str, target_output_delimiter: str = "<OR>"): 204 """SplitWithDelimiter initializer. 205 :param input_keys: The key that corresponds to the target output string. 206 :param output_key: The key corresponding to the list of target outputs, which gets 207 added to the record. 208 """ 209 super().__init__(input_key, output_key, target_output_delimiter) 210 self.register_input_output_keys([input_key], [output_key]) 211 self.input_key = input_key 212 self.output_key = output_key 213 self.target_output_delimiter = target_output_delimiter
SplitWithDelimiter initializer.
Parameters
- input_keys: The key that corresponds to the target output string.
- output_key: The key corresponding to the list of target outputs, which gets added to the record.