# Sample config for the Data Science Team module.
# Provisions a SageMaker Studio domain with IAM auth, user profiles,
# lifecycle configs, custom images, S3 mini data lake with inventory,
# and team access controls for collaborative ML development.
#
# This is the comprehensive configuration demonstrating all compatible
# properties with IAM authentication mode. For SSO auth mode, see
# sample-config-sso.yaml.

# Complete data science team infrastructure configuration. Defines
# SageMaker Studio domain, S3 mini data lake, Athena workgroup,
# execution roles, and user profiles.
team:
  # See CONFIGURATION.md for role reference options (name, arn, id).
  # Admin roles granted access to team resources including KMS keys,
  # S3 buckets, and SageMaker resources. Roles can be referenced by
  # name, arn, or id.
  dataAdminRoles:
    - name: Admin

  # (Optional) Team member roles for accessing shared resources like
  # data lake, SageMaker Studio, and collaborative tools.
  teamUserRoles:
    - id: generated-role-id:data-scientist
    # Immutable roles (e.g. SSO roles) are provided access only to
    # the team bucket and KMS key via resource policies.
    - name: AWSReservedSSO_datascientist_abcdefg
      immutable: true

  # Execution role for SageMaker workloads including training jobs,
  # endpoints, and notebooks. Must have sagemaker.amazonaws.com
  # service trust with sts:AssumeRole and sts:SetSourceIdentity.
  teamExecutionRole:
    id: generated-role-id:team-execution-role

  # (Optional) Custom policy name prefix for portable naming across
  # accounts with SSO integration. When set, uses this prefix
  # instead of the naming module for policy names.
  verbatimPolicyNamePrefix: 'some-prefix'

  # (Optional) S3 inventory configurations for team data lake
  # bucket content analysis and governance.
  inventories:
    team-inventory:
      # S3 prefix to include in the inventory report
      prefix: 'data/'
      # (Optional) Destination bucket for inventory reports.
      # Defaults to the source bucket under /inventory prefix.
      destinationBucket: 'test-inventory-bucket'
      # (Optional) S3 prefix within the destination bucket for
      # inventory report storage
      destinationPrefix: 'inventory-reports/'
      # (Optional) AWS account ID owning the destination bucket
      # for cross-account inventory delivery
      destinationAccount: '{{context:account-2}}'
  # (Optional) SageMaker Studio domain configuration for the team's
  # collaborative ML development environment.
  studioDomainConfig:
    # Authentication mode (enum: IAM, SSO)
    authMode: IAM
    # VPC ID for Studio domain deployment
    # Often created by your VPC/networking stack.
    # Example SSM: ssm:/path/to/vpc/id
    vpcId: vpc-id
    # Subnet IDs for Studio user applications
    # Often created by your VPC/networking stack.
    # Example SSM: ssm:/path/to/subnet/id
    subnetIds:
      - subnet-id
    # (Optional) KMS key ARN for EFS encryption
    kmsKeyArn: 'arn:{{partition}}:kms:{{region}}:{{account}}:key/test-efs-key'
    # (Optional) Memory limit in MB for lifecycle asset deployment
    # Lambda
    assetDeploymentMemoryLimitMB: 512
    # (Optional) S3 prefix for lifecycle asset storage
    assetPrefix: 'lifecycle-assets/'
    # (Optional) Default execution role for Studio applications
    defaultExecutionRole:
      id: generated-role-id:team-execution-role
    # (Optional) Admin roles for domain management
    dataAdminRoles:
      - arn: 'arn:{{partition}}:iam::{{account}}:role/DomainAdmin'
    # (Optional) Security group ingress rules
    securityGroupIngress:
      # (Optional) IPv4 CIDR block rules for security group traffic
      # control defining IP address-based access restrictions
      ipv4:
        # CIDR block specification for network access control
        - cidr: 10.0.0.0/24
          # (Optional) Description for the rule
          description: Allow HTTPS from internal network
          port: 443
          protocol: tcp
          # (Optional) Ending port number for port range rules
          toPort: 443
      # (Optional) Security group rules for cross-security group
      # traffic control
      sg:
        # Security group identifier for SG-based access control
        - sgId: ssm:/ml/sm/sg/id
          port: 443
          protocol: tcp
      # (Optional) Prefix list rules for security group traffic
      # control defining managed prefix list-based access
      # restrictions
      prefixList:
        - prefixList: pl-test-ingress
          description: Ingress from managed prefix list
          protocol: tcp
          port: 443
    # (Optional) Security group egress rules
    securityGroupEgress:
      prefixList:
        # Prefix list identifier for managed IP range access control
        - prefixList: pl-4ea54027
          description: prefix list for com.amazonaws.{{region}}.dynamodb
          protocol: tcp
          port: 443
        - prefixList: pl-7da54014
          description: prefix list for com.amazonaws.{{region}}.s3
          protocol: tcp
          port: 443
          # (Optional) Ending port number for port range rules
          toPort: 443
      ipv4:
        - cidr: 0.0.0.0/0
          port: 443
          protocol: tcp
          # (Optional) Description for the rule
          description: Allow outbound HTTPS
      sg:
        - sgId: ssm:/ml/sm/sg/id
          port: 443
          protocol: tcp

    # (Optional) S3 prefix for shared notebook storage
    notebookSharingPrefix: notebooks

    # (Optional) Named user profiles for Studio domain. Key is the
    # user identifier: Session Name portion of aws:userid (IAM mode).
    userProfiles:
      example-user-id:
        # (Optional) Required for IAM AuthMode. The role from which
        # the user will launch the user profile in Studio.
        userRole:
          id: generated-role-id:data-scientist

    # (Optional) Default user settings for Studio applications
    defaultUserSettings:
      # (Optional) The kernel gateway app settings
      kernelGatewayAppSettings:
        # (Optional) A list of custom SageMaker images configured
        # to run as a KernelGateway app
        customImages:
          # The name of the AppImageConfig
          - appImageConfigName: 'appImageConfigName'
            # The name of the CustomImage
            imageName: 'imageName'
            # (Optional) The version number of the CustomImage
            imageVersionNumber: 1
        # (Optional) The default instance type and SageMaker image
        # ARN used by the KernelGateway app
        defaultResourceSpec:
          # (Optional) The instance type
          instanceType: 'ml.t3.medium'
          # (Optional) The ARN of the SageMaker image
          sageMakerImageArn: 'arn:{{partition}}:sagemaker:{{region}}:{{account}}:image/test-image'
          # (Optional) The ARN of the image version
          sageMakerImageVersionArn: 'arn:{{partition}}:sagemaker:{{region}}:{{account}}:image-version/test-image/1'
          # (Optional) The ARN of the Lifecycle Configuration
          lifecycleConfigArn: 'arn:{{partition}}:sagemaker:{{region}}:{{account}}:studio-lifecycle-config/test-lcc'
        # (Optional) The ARN of the Lifecycle Configurations
        # attached to the user profile or domain
        lifecycleConfigArns:
          - 'arn:{{partition}}:sagemaker:{{region}}:{{account}}:studio-lifecycle-config/test-kernel-lcc'
      # (Optional) The JupyterLab app settings
      jupyterLabAppSettings:
        # (Optional) Indicates whether idle shutdown is activated
        # for JupyterLab applications
        appLifecycleManagement:
          # (Optional) Settings related to idle shutdown of Studio
          # applications
          idleSettings:
            # (Optional) The time that SageMaker waits after the
            # application becomes idle before shutting it down
            idleTimeoutInMinutes: 60
            # (Optional) Indicates whether idle shutdown is
            # activated for the application type
            lifecycleManagement: ENABLED
            # (Optional) The maximum value in minutes that custom
            # idle shutdown can be set to by the user
            maxIdleTimeoutInMinutes: 120
            # (Optional) The minimum value in minutes that custom
            # idle shutdown can be set to by the user
            minIdleTimeoutInMinutes: 30
        # (Optional) The lifecycle configuration that runs before
        # the default lifecycle configuration
        builtInLifecycleConfigArn: 'arn:{{partition}}:sagemaker:{{region}}:{{account}}:studio-lifecycle-config/test-builtin-lcc'
        # (Optional) A list of Git repositories that SageMaker
        # automatically displays to users for cloning
        codeRepositories:
          # The URL of the Git repository
          - repositoryUrl: 'https://github.com/example/repo.git'
        # (Optional) A list of custom SageMaker images configured
        # to run as a JupyterLab app
        customImages:
          - appImageConfigName: 'jupyterLabAppImageConfig'
            imageName: 'jupyterLabImage'
            # (Optional) The version number of the CustomImage
            imageVersionNumber: 1
        # (Optional) The default instance type and SageMaker image
        # ARN used by the JupyterLab app
        defaultResourceSpec:
          # (Optional) The instance type
          instanceType: 'ml.t3.medium'
          # (Optional) The ARN of the SageMaker image
          sageMakerImageArn: 'arn:{{partition}}:sagemaker:{{region}}:{{account}}:image/test-jupyterlab-image'
          # (Optional) The ARN of the image version
          sageMakerImageVersionArn: 'arn:{{partition}}:sagemaker:{{region}}:{{account}}:image-version/test-jupyterlab-image/1'
          # (Optional) The ARN of the Lifecycle Configuration
          lifecycleConfigArn: 'arn:{{partition}}:sagemaker:{{region}}:{{account}}:studio-lifecycle-config/test-jupyterlab-default-lcc'
        # (Optional) The ARN of the lifecycle configurations
        # attached to the user profile or domain
        lifecycleConfigArns:
          - 'arn:{{partition}}:sagemaker:{{region}}:{{account}}:studio-lifecycle-config/test-jupyterlab-lcc'
      # (Optional) The Jupyter server's app settings
      jupyterServerAppSettings:
        # (Optional) The default instance type and SageMaker image
        # ARN used by the JupyterServer app
        defaultResourceSpec:
          # (Optional) The instance type (JupyterServer apps only
          # support the 'system' value)
          instanceType: 'system'
          # (Optional) The ARN of the SageMaker image
          sageMakerImageArn: 'arn:{{partition}}:sagemaker:{{region}}:{{account}}:image/test-jupyter-server-image'
          # (Optional) The ARN of the image version
          sageMakerImageVersionArn: 'arn:{{partition}}:sagemaker:{{region}}:{{account}}:image-version/test-jupyter-server-image/1'
          # (Optional) The ARN of the Lifecycle Configuration
          lifecycleConfigArn: 'arn:{{partition}}:sagemaker:{{region}}:{{account}}:studio-lifecycle-config/test-jupyter-server-default-lcc'
        # (Optional) The ARN of the Lifecycle Configurations
        # attached to the JupyterServerApp
        lifecycleConfigArns:
          - 'arn:{{partition}}:sagemaker:{{region}}:{{account}}:studio-lifecycle-config/test-jupyter-server-lcc'
      # (Optional) A collection of settings that configure the
      # RSessionGateway app
      rSessionAppSettings:
        # (Optional) A list of custom SageMaker images configured
        # to run as a RSession app
        customImages:
          - appImageConfigName: 'rSessionAppImageConfig'
            imageName: 'rSessionImage'
            # (Optional) The version number of the CustomImage
            imageVersionNumber: 1
        # (Optional) Specifies the ARNs of a SageMaker image and
        # image version, and the instance type
        defaultResourceSpec:
          # (Optional) The instance type
          instanceType: 'ml.t3.medium'
          # (Optional) The ARN of the SageMaker image
          sageMakerImageArn: 'arn:{{partition}}:sagemaker:{{region}}:{{account}}:image/test-rsession-image'
          # (Optional) The ARN of the image version
          sageMakerImageVersionArn: 'arn:{{partition}}:sagemaker:{{region}}:{{account}}:image-version/test-rsession-image/1'
          # (Optional) The ARN of the Lifecycle Configuration
          lifecycleConfigArn: 'arn:{{partition}}:sagemaker:{{region}}:{{account}}:studio-lifecycle-config/test-rsession-default-lcc'
      # (Optional) A collection of settings that configure user
      # interaction with the RStudioServerPro app
      rStudioServerProAppSettings:
        # (Optional) Indicates whether the current user has access
        # to the RStudioServerPro app
        accessStatus: 'ENABLED'
        # (Optional) The level of permissions that the user has
        # within the RStudioServerPro app (default: User)
        userGroup: 'R_STUDIO_ADMIN'
      # (Optional) The security groups for the VPC that Studio uses
      # for communication
      securityGroups:
        - 'sg-test-default-user-sg'
      # (Optional) Specifies options for sharing SageMaker Studio
      # notebooks
      sharingSettings:
        # (Optional) Whether to include the notebook cell output
        # when sharing the notebook (default: Disabled)
        notebookOutputOption: 'Allowed'
        # (Optional) KMS encryption key ID used to encrypt the
        # notebook cell output in S3
        s3KmsKeyId: 'arn:{{partition}}:kms:{{region}}:{{account}}:key/test-sharing-key'
        # (Optional) The S3 bucket used to store the shared
        # notebook snapshots
        s3OutputPath: 's3://test-sharing-bucket/notebooks/'
      # (Optional) Studio web portal state (enum: ENABLED, DISABLED)
      studioWebPortal: ENABLED

    # (Optional) Lifecycle configurations for Studio apps
    lifecycleConfigs:
      # (Optional) Lifecycle config for the main Jupyter App. Runs
      # each time the main Jupyter app container is launched.
      jupyter:
        # (Optional) Assets staged in S3, then copied to SageMaker
        # container before lifecycle commands run. Available under
        # $ASSETS_DIR/<asset_name>/
        assets:
          testing:
            # Local file or directory path to deploy
            sourcePath: ./testing_asset_dir
            # (Optional) Glob patterns to exclude from asset
            # packaging
            exclude:
              - '*.pyc'
              - '__pycache__'
        # Lifecycle commands to execute
        cmds:
          - echo "testing jupyter"
          - sh $ASSETS_DIR/testing/test.sh

      # (Optional) Kernel gateway app lifecycle config. Runs each
      # time a kernel gateway container is launched.
      kernel:
        assets:
          testing:
            sourcePath: ./testing_asset_dir
        cmds:
          - echo "testing kernel"
          - sh $ASSETS_DIR/testing/test.sh

      # (Optional) JupyterLab lifecycle script (Studio Latest).
      # Runs each time a JupyterLab app container is launched.
      jupyterLab:
        assets:
          testing:
            sourcePath: ./testing_asset_dir
        cmds:
          - echo "testing jupyterLab"
          - sh $ASSETS_DIR/testing/test.sh
