# Sample config for the DataOps Job module - project variant.
# Demonstrates Glue ETL and Python shell jobs with templates, job
# bookmarks, connections, and extra libraries, all wired to a
# DataOps project.

# (Optional) DataOps project name for job resource autowiring
# (databases, roles, security).
projectName: dataops-project-test

# (Optional) SNS topic ARN for job notifications and workflow alerts.
# Auto-resolved from project when projectName is set.
notificationTopicArn: arn:{{partition}}:sns:{{region}}:{{account}}:test-topic

# (Optional) Reusable job templates that can be inherited by job
# definitions via the template field. Template properties are
# deep-merged with job-specific overrides.
templates:
  ExamplePythonTemplate:
    # IAM role ARN for Glue job execution permissions.
    # Often created by the Roles module.
    # Example SSM: ssm:/{{org}}/{{domain}}/<roles_module_name>/role/<role_name>/arn
    executionRoleArn: some-arn
    # Job command configuration defining script and runtime
    # environment.
    command:
      # Job type. (enum: glueetl, pythonshell)
      name: 'glueetl'
      # (Optional) Python version for job runtime. (enum: 2, 3)
      pythonVersion: '3'
      # Relative path to the Glue script for job execution.
      scriptLocation: ./src/glue/python/job.py
    # Job description for documentation and management.
    description: Example of a Glue Job using an inline script
    # (Optional) Connection names for database and external system
    # access.
    connections:
      - project:connections/connectionVpc
    # (Optional) Default arguments passed to the job at runtime.
    defaultArguments:
      --job-bookmark-option: job-bookmark-enable
    # (Optional) Execution properties including maximum concurrent
    # runs.
    executionProperty:
      maxConcurrentRuns: 1
    # (Optional) Glue runtime version for the job.
    glueVersion: '2.0'
    # (Optional) Maximum DPU capacity for the job.
    # Note: Use maxCapacity OR workerType+numberOfWorkers, not both.
    maxCapacity: 1
    # (Optional) Maximum retry count before job failure.
    maxRetries: 3
    # (Optional) Notification settings for job monitoring and
    # alerting.
    notificationProperty:
      # After a job run starts, minutes to wait before sending a
      # delay notification.
      notifyDelayAfter: 1
    # (Optional) Job timeout in minutes.
    timeout: 60

  ExampleScalaTemplate:
    # Often created by the Roles module.
    # Example SSM: ssm:/{{org}}/{{domain}}/<roles_module_name>/role/<role_name>/arn
    executionRoleArn: some-arn
    # (Optional) Default arguments passed to the job at runtime.
    defaultArguments:
      --job-language: scala
    # (Optional) Glue runtime version for the job.
    glueVersion: '5.0'
# Map of job names to Glue job definitions for ETL processing and
# data transformation.
jobs:
  PythonJobOne:
    # (Optional) Template name for configuration inheritance.
    template: 'ExamplePythonTemplate'
    defaultArguments:
      --Input: s3://some-bucket/some-location1
    # (Optional) Number of capacity units allocated to the job.
    allocatedCapacity: 2
    # (Optional) Continuous logging configuration for real-time
    # monitoring.
    continuousLogging:
      # CloudWatch log group retention in days. Allowed:
      # 1,3,5,7,14,30,60,90,120,150,180,365,400,545,731,1827,3653,0.
      logGroupRetentionDays: 3

  PythonJobTwo:
    template: 'ExamplePythonTemplate'
    defaultArguments:
      --Input: s3://some-bucket/some-location2
      --enable-spark-ui: 'true'
      --spark-event-logs-path: s3://some-bucket/spark-event-logs-path/JobTwo/
    allocatedCapacity: 20
    # (Optional) Relative paths to additional Python scripts for
    # the job.
    additionalScripts:
      - ./src/glue/python/helper_etl.py
      - ./src/glue/python/utils/core.py
    # (Optional) Relative paths to additional files for the job.
    additionalFiles:
      - ./src/glue/scala/extra_file.txt

  ScalaJobOne:
    template: 'ExampleScalaTemplate'
    description: testing
    defaultArguments:
      --class: some.java.package.App
    allocatedCapacity: 2
    command:
      # Job type. (enum: glueetl, pythonshell)
      name: 'glueetl'
      # Relative path to the Glue script for job execution.
      scriptLocation: ./src/glue/scala/App.scala
    # (Optional) Relative paths to additional files for the job.
    additionalFiles:
      - ./src/glue/scala/extra_file.txt
    # (Optional) Relative paths to additional JAR files for the job.
    additionalJars:
      - ./src/glue/scala/lib/extra.jar

  # Python shell job exercising pythonshell command name and
  # Python version 2 enum value.
  PythonShellJob:
    # IAM role ARN for Glue job execution permissions.
    # Often created by the Roles module.
    # Example SSM: ssm:/{{org}}/{{domain}}/<roles_module_name>/role/<role_name>/arn
    executionRoleArn: some-arn
    # Job description for documentation and management.
    description: Python shell job for lightweight data processing
    command:
      # Job type. (enum: glueetl, pythonshell)
      name: 'pythonshell'
      # (Optional) Python version for job runtime. (enum: 2, 3)
      pythonVersion: '2'
      # Relative path to the Glue script for job execution.
      scriptLocation: ./src/glue/python/job.py
    # (Optional) Maximum DPU capacity for the job.
    # Note: pythonshell jobs use maxCapacity (0.0625 or 1).
    maxCapacity: 1
    # (Optional) Glue runtime version for the job.
    glueVersion: '1.0'
    # (Optional) Maximum retry count before job failure.
    maxRetries: 0
    # (Optional) Job timeout in minutes.
    timeout: 30

  # Data Quality evaluation job using the pre-built DQ script from DF.
  # Use the `asset:` prefix to reference bundled scripts instead of
  # providing a local file path.
  DqEvaluationJob:
    executionRoleArn: some-arn
    description: Run Glue Data Quality evaluations with optional SMUS publishing
    command:
      name: 'glueetl'
      # Use asset: prefix to reference the pre-built DQ main script
      scriptLocation: "asset:dq-main.py"
    glueVersion: '4.0'
    numberOfWorkers: 2
    workerType: 'G.1X'
    timeout: 60
    # Deploy the utils package alongside the main script
    additionalScripts:
      - "asset:dq_config.py"
      - "asset:smus.py"
    defaultArguments:
      --application_opts: '{"table": {"name": "my_table", "source": {"database": "my_db", "table_name": "my_table"}, "rulesets": {"basic": {"type": "dqdl", "value": "Rules = [IsComplete \"id\"]"}}}}'
