fmeval.constants
1from enum import Enum 2from dataclasses import dataclass 3from collections import OrderedDict 4from typing import Optional 5 6# Output results path 7 8EVAL_RESULTS_PATH = "EVAL_RESULTS_PATH" 9DEFAULT_EVAL_RESULTS_PATH = "/tmp/eval_results/" 10 11# Number of actors to use 12PARALLELIZATION_FACTOR = "PARALLELIZATION_FACTOR" 13PARTITION_MULTIPLIER = 5 14 15# Environment variables for SageMaker endpoint urls 16SAGEMAKER_SERVICE_ENDPOINT_URL = "SAGEMAKER_SERVICE_ENDPOINT_URL" 17SAGEMAKER_RUNTIME_ENDPOINT_URL = "SAGEMAKER_RUNTIME_ENDPOINT_URL" 18 19# We default the boto3 client region to us-west-2 as the dataset bucket cannot be accessed from opt-in regions. 20BUILT_IN_DATASET_PREFIX = "s3://fmeval/datasets" 21BUILT_IN_DATASET_DEFAULT_REGION = "us-west-2" 22 23# Mapping of iso region to built in dataset region in the same partition 24BUILT_IN_DATASET_ISO_REGIONS = {"us-isof-south-1": "us-isof-south-1", "us-isof-east-1": "us-isof-south-1"} 25 26# Environment variable for disabling telemetry 27DISABLE_FMEVAL_TELEMETRY = "DISABLE_FMEVAL_TELEMETRY" 28 29 30@dataclass(frozen=True) 31class Column: 32 """ 33 This class represents a column in the Ray Dataset produced by 34 an evaluation algorithm's `evaluate` method. 35 36 Note that some columns are created during the "data loading" stage, 37 when the initial Ray Dataset object is created by data_loaders.util.get_dataset, 38 while the remaining columns are created during the execution of `evaluate`. 39 Only the contents of the columns created during the data loading stage 40 have the potential to be casted to strings. 41 42 :param name: The name of the column as it appears in the Ray Dataset. 43 :param should_cast: Whether the contents of this column should 44 be casted to strings during data loading. 45 This parameter is None (as opposed to False) for columns that do 46 not exist during data loading to make it clear that casting these 47 columns is not even a possibility to begin with. 48 """ 49 50 name: str 51 should_cast: Optional[bool] = None 52 53 54class DatasetColumns(Enum): 55 """ 56 This Enum represents the columns that appear in the finalized 57 Ray Dataset produced during the course of executing an eval algorithm's 58 `evaluate` method. 59 60 These are the only columns (aside from score columns) whose 61 data gets written to output records by `util.save_dataset`. 62 Other algorithm-specific columns that get produced as intermediate 63 results (for example, CLASSIFIED_MODEL_OUTPUT_COLUMN_NAME in 64 ClassificationAccuracy) are not included here, and thus won't 65 get saved by `util.save_dataset`. 66 """ 67 68 MODEL_INPUT = Column(name="model_input", should_cast=True) 69 PROMPT = Column(name="prompt") 70 MODEL_OUTPUT = Column(name="model_output", should_cast=True) 71 MODEL_LOG_PROBABILITY = Column(name="model_log_probability") 72 TARGET_OUTPUT = Column(name="target_output", should_cast=True) 73 CATEGORY = Column(name="category", should_cast=True) 74 CONTEXT = Column(name="context", should_cast=True) 75 SENT_MORE_INPUT = Column(name="sent_more_input", should_cast=True) 76 SENT_LESS_INPUT = Column(name="sent_less_input", should_cast=True) 77 SENT_MORE_PROMPT = Column(name="sent_more_prompt") 78 SENT_LESS_PROMPT = Column(name="sent_less_prompt") 79 SENT_MORE_LOG_PROB = Column(name="sent_more_log_prob", should_cast=False) 80 SENT_LESS_LOG_PROB = Column(name="sent_less_log_prob", should_cast=False) 81 ERROR = Column(name="error", should_cast=False) 82 83 84DATASET_COLUMNS = OrderedDict((col.value.name, col) for col in DatasetColumns) 85COLUMNS_WITH_LISTS = [DatasetColumns.CONTEXT.value.name] 86 87# This suffix must be included at the end of all 88# DataConfig attribute names where the attribute 89# represents a mechanism for locating the data for 90# a column. An example mechanism is a JMESPath query 91# (when the dataset format is JSON/JSON Lines). 92DATA_CONFIG_LOCATION_SUFFIX = "_location" 93 94# Supported MIME types 95MIME_TYPE_JSON = "application/json" 96MIME_TYPE_JSONLINES = "application/jsonlines" 97 98SUPPORTED_MIME_TYPES = [MIME_TYPE_JSON, MIME_TYPE_JSONLINES] 99 100# Aggregation methods 101MEAN = "mean" 102 103# Configures `save_dataset` behavior regarding how many 104# EvalOutputRecords to accumulate before writing them 105# to the output JSON Lines file. 106EVAL_OUTPUT_RECORDS_BATCH_SIZE = 1024 107 108# Dataloader seed 109SEED = 1234 110 111# Semantic robustness perturbation types 112BUTTER_FINGER = "butter_finger" 113RANDOM_UPPER_CASE = "random_upper_case" 114WHITESPACE_ADD_REMOVE = "whitespace_add_remove" 115 116PREFIX_FOR_DELTA_SCORES = "delta_" 117 118# Check if model is deterministic for first NUM_ROWS_DETERMINISTIC rows of dataset 119NUM_ROWS_DETERMINISTIC = 5 120 121MAX_ROWS_TO_TAKE = 100000 122 123# The absolute tolerance used when performing approximate numerical comparisons, 124# specifically, when comparing EvalScore objects. 125ABS_TOL = 1e-3 126 127# Jumpstart 128JUMPSTART_MODEL_ID = "jumpstart_model_id" 129JUMPSTART_MODEL_VERSION = "jumpstart_model_version" 130JUMPSTART_MODEL_TYPE = "jumpstart_model_type" 131MODEL_ID = "model_id" 132SPEC_KEY = "spec_key" 133DEFAULT_PAYLOADS = "default_payloads" 134SDK_MANIFEST_FILE = "models_manifest.json" 135PROPRIETARY_SDK_MANIFEST_FILE = "proprietary-sdk-manifest.json" 136JUMPSTART_BUCKET_BASE_URL_FORMAT = "https://jumpstart-cache-prod-{}.s3.{}.amazonaws.com" 137JUMPSTART_BUCKET_BASE_URL_FORMAT_ENV_VAR = "JUMPSTART_BUCKET_BASE_URL_FORMAT" 138GENERATED_TEXT_JMESPATH_EXPRESSION = "*.output_keys.generated_text" 139INPUT_LOG_PROBS_JMESPATH_EXPRESSION = "*.output_keys.input_logprobs" 140EMBEDDING_JMESPATH_EXPRESSION = "embedding" 141IS_EMBEDDING_MODEL = "is_embedding_model" 142 143# BERTScore 144BERTSCORE_DEFAULT_MODEL = "microsoft/deberta-xlarge-mnli" 145 146 147# S3 multi-part upload constants 148UPLOAD_ID = "UploadId" 149PARTS = "Parts" 150E_TAG = "ETag" 151PART_NUMBER = "PartNumber"
31@dataclass(frozen=True) 32class Column: 33 """ 34 This class represents a column in the Ray Dataset produced by 35 an evaluation algorithm's `evaluate` method. 36 37 Note that some columns are created during the "data loading" stage, 38 when the initial Ray Dataset object is created by data_loaders.util.get_dataset, 39 while the remaining columns are created during the execution of `evaluate`. 40 Only the contents of the columns created during the data loading stage 41 have the potential to be casted to strings. 42 43 :param name: The name of the column as it appears in the Ray Dataset. 44 :param should_cast: Whether the contents of this column should 45 be casted to strings during data loading. 46 This parameter is None (as opposed to False) for columns that do 47 not exist during data loading to make it clear that casting these 48 columns is not even a possibility to begin with. 49 """ 50 51 name: str 52 should_cast: Optional[bool] = None
This class represents a column in the Ray Dataset produced by
an evaluation algorithm's evaluate
method.
Note that some columns are created during the "data loading" stage,
when the initial Ray Dataset object is created by data_loaders.util.get_dataset,
while the remaining columns are created during the execution of evaluate
.
Only the contents of the columns created during the data loading stage
have the potential to be casted to strings.
Parameters
- name: The name of the column as it appears in the Ray Dataset.
- should_cast: Whether the contents of this column should be casted to strings during data loading. This parameter is None (as opposed to False) for columns that do not exist during data loading to make it clear that casting these columns is not even a possibility to begin with.
55class DatasetColumns(Enum): 56 """ 57 This Enum represents the columns that appear in the finalized 58 Ray Dataset produced during the course of executing an eval algorithm's 59 `evaluate` method. 60 61 These are the only columns (aside from score columns) whose 62 data gets written to output records by `util.save_dataset`. 63 Other algorithm-specific columns that get produced as intermediate 64 results (for example, CLASSIFIED_MODEL_OUTPUT_COLUMN_NAME in 65 ClassificationAccuracy) are not included here, and thus won't 66 get saved by `util.save_dataset`. 67 """ 68 69 MODEL_INPUT = Column(name="model_input", should_cast=True) 70 PROMPT = Column(name="prompt") 71 MODEL_OUTPUT = Column(name="model_output", should_cast=True) 72 MODEL_LOG_PROBABILITY = Column(name="model_log_probability") 73 TARGET_OUTPUT = Column(name="target_output", should_cast=True) 74 CATEGORY = Column(name="category", should_cast=True) 75 CONTEXT = Column(name="context", should_cast=True) 76 SENT_MORE_INPUT = Column(name="sent_more_input", should_cast=True) 77 SENT_LESS_INPUT = Column(name="sent_less_input", should_cast=True) 78 SENT_MORE_PROMPT = Column(name="sent_more_prompt") 79 SENT_LESS_PROMPT = Column(name="sent_less_prompt") 80 SENT_MORE_LOG_PROB = Column(name="sent_more_log_prob", should_cast=False) 81 SENT_LESS_LOG_PROB = Column(name="sent_less_log_prob", should_cast=False) 82 ERROR = Column(name="error", should_cast=False)
This Enum represents the columns that appear in the finalized
Ray Dataset produced during the course of executing an eval algorithm's
evaluate
method.
These are the only columns (aside from score columns) whose
data gets written to output records by util.save_dataset
.
Other algorithm-specific columns that get produced as intermediate
results (for example, CLASSIFIED_MODEL_OUTPUT_COLUMN_NAME in
ClassificationAccuracy) are not included here, and thus won't
get saved by util.save_dataset
.