fmeval.data_loaders.data_config

 1from typing import Optional
 2from dataclasses import dataclass
 3from fmeval.util import require
 4from fmeval.constants import SUPPORTED_MIME_TYPES
 5
 6
 7@dataclass
 8class DataConfig:
 9    """
10    Configures the information required by data-loading components.
11
12    Note that the term "location" used below refers to a string
13    that can be used to locate the data that comprises a single
14    column in the to-be-produced Ray Dataset. As an example,
15    when the dataset MIME type is JSON or JSON Lines, the "location"
16    is a JMESPath query.
17
18    **Note**:
19        Parsing logic used by data loaders make the assumption that
20        attributes in this class with the suffix "_location" correspond
21        to a "location" (defined above). When adding new attributes to this class,
22        if an attribute corresponds to a location, the attribute name must end
23        with "_location"
24
25    :param dataset_name: the dataset name
26    :param dataset_uri: either a local path or s3 URI representing where the dataset is stored
27    :param dataset_mime_type: the MIME type of the dataset file
28    :param model_input_location: the location  for model inputs
29    :param model_output_location: the location for model outputs
30    :param target_output_location: the location for target outputs
31    :param category_location: the location for categories
32    :param sent_more_input_location: the location for the "sent more"
33            inputs (used by the Prompt Stereotyping evaluation algorithm)
34    :param sent_less_input_location: the location for the "sent less"
35            inputs (used by the Prompt Stereotyping evaluation algorithm)
36    :param sent_more_log_prob_location: the location for the "sent more"
37            input log probability (used by the Prompt Stereotyping evaluation algorithm)
38    :param sent_less_log_prob_location: the location for the "sent less"
39            input log probability (used by the Prompt Stereotyping evaluation algorithm).
40    :param context_location: the location of the context for RAG evaluations.
41    """
42
43    dataset_name: str
44    dataset_uri: str
45    dataset_mime_type: str
46    model_input_location: Optional[str] = None
47    model_output_location: Optional[str] = None
48    target_output_location: Optional[str] = None
49    category_location: Optional[str] = None
50    sent_more_input_location: Optional[str] = None
51    sent_less_input_location: Optional[str] = None
52    sent_more_log_prob_location: Optional[str] = None
53    sent_less_log_prob_location: Optional[str] = None
54    context_location: Optional[str] = None
55
56    def __post_init__(self):
57        require(
58            self.dataset_mime_type in SUPPORTED_MIME_TYPES,
59            f"Unsupported MIME type: {self.dataset_mime_type}. "
60            f"The following mime types are supported: {SUPPORTED_MIME_TYPES}.",
61        )
@dataclass
class DataConfig:
 8@dataclass
 9class DataConfig:
10    """
11    Configures the information required by data-loading components.
12
13    Note that the term "location" used below refers to a string
14    that can be used to locate the data that comprises a single
15    column in the to-be-produced Ray Dataset. As an example,
16    when the dataset MIME type is JSON or JSON Lines, the "location"
17    is a JMESPath query.
18
19    **Note**:
20        Parsing logic used by data loaders make the assumption that
21        attributes in this class with the suffix "_location" correspond
22        to a "location" (defined above). When adding new attributes to this class,
23        if an attribute corresponds to a location, the attribute name must end
24        with "_location"
25
26    :param dataset_name: the dataset name
27    :param dataset_uri: either a local path or s3 URI representing where the dataset is stored
28    :param dataset_mime_type: the MIME type of the dataset file
29    :param model_input_location: the location  for model inputs
30    :param model_output_location: the location for model outputs
31    :param target_output_location: the location for target outputs
32    :param category_location: the location for categories
33    :param sent_more_input_location: the location for the "sent more"
34            inputs (used by the Prompt Stereotyping evaluation algorithm)
35    :param sent_less_input_location: the location for the "sent less"
36            inputs (used by the Prompt Stereotyping evaluation algorithm)
37    :param sent_more_log_prob_location: the location for the "sent more"
38            input log probability (used by the Prompt Stereotyping evaluation algorithm)
39    :param sent_less_log_prob_location: the location for the "sent less"
40            input log probability (used by the Prompt Stereotyping evaluation algorithm).
41    :param context_location: the location of the context for RAG evaluations.
42    """
43
44    dataset_name: str
45    dataset_uri: str
46    dataset_mime_type: str
47    model_input_location: Optional[str] = None
48    model_output_location: Optional[str] = None
49    target_output_location: Optional[str] = None
50    category_location: Optional[str] = None
51    sent_more_input_location: Optional[str] = None
52    sent_less_input_location: Optional[str] = None
53    sent_more_log_prob_location: Optional[str] = None
54    sent_less_log_prob_location: Optional[str] = None
55    context_location: Optional[str] = None
56
57    def __post_init__(self):
58        require(
59            self.dataset_mime_type in SUPPORTED_MIME_TYPES,
60            f"Unsupported MIME type: {self.dataset_mime_type}. "
61            f"The following mime types are supported: {SUPPORTED_MIME_TYPES}.",
62        )

Configures the information required by data-loading components.

Note that the term "location" used below refers to a string that can be used to locate the data that comprises a single column in the to-be-produced Ray Dataset. As an example, when the dataset MIME type is JSON or JSON Lines, the "location" is a JMESPath query.

Note: Parsing logic used by data loaders make the assumption that attributes in this class with the suffix "_location" correspond to a "location" (defined above). When adding new attributes to this class, if an attribute corresponds to a location, the attribute name must end with "_location"

Parameters
  • dataset_name: the dataset name
  • dataset_uri: either a local path or s3 URI representing where the dataset is stored
  • dataset_mime_type: the MIME type of the dataset file
  • model_input_location: the location for model inputs
  • model_output_location: the location for model outputs
  • target_output_location: the location for target outputs
  • category_location: the location for categories
  • sent_more_input_location: the location for the "sent more" inputs (used by the Prompt Stereotyping evaluation algorithm)
  • sent_less_input_location: the location for the "sent less" inputs (used by the Prompt Stereotyping evaluation algorithm)
  • sent_more_log_prob_location: the location for the "sent more" input log probability (used by the Prompt Stereotyping evaluation algorithm)
  • sent_less_log_prob_location: the location for the "sent less" input log probability (used by the Prompt Stereotyping evaluation algorithm).
  • context_location: the location of the context for RAG evaluations.
DataConfig( dataset_name: str, dataset_uri: str, dataset_mime_type: str, model_input_location: Optional[str] = None, model_output_location: Optional[str] = None, target_output_location: Optional[str] = None, category_location: Optional[str] = None, sent_more_input_location: Optional[str] = None, sent_less_input_location: Optional[str] = None, sent_more_log_prob_location: Optional[str] = None, sent_less_log_prob_location: Optional[str] = None, context_location: Optional[str] = None)
dataset_name: str
dataset_uri: str
dataset_mime_type: str
model_input_location: Optional[str] = None
model_output_location: Optional[str] = None
target_output_location: Optional[str] = None
category_location: Optional[str] = None
sent_more_input_location: Optional[str] = None
sent_less_input_location: Optional[str] = None
sent_more_log_prob_location: Optional[str] = None
sent_less_log_prob_location: Optional[str] = None
context_location: Optional[str] = None