Complex Variables - pathfinder-analytics-uk/dab_project GitHub Wiki

Links and Resources


Project Code

clusters.yml

variables:
  ds3_v2_sn:
    type: "complex"
    default:
            cluster_name: ""
            spark_version: 15.4.x-scala2.12
            spark_conf:
              spark.master: local[*, 4]
              spark.databricks.cluster.profile: singleNode
            azure_attributes:
              first_on_demand: 1
              availability: SPOT_WITH_FALLBACK_AZURE
              spot_bid_max_price: -1
            node_type_id: Standard_DS3_v2
            driver_node_type_id: Standard_DS3_v2
            custom_tags:
              ResourceClass: SingleNode
            spark_env_vars:
              PYSPARK_PYTHON: /databricks/python3/bin/python3
            enable_elastic_disk: true
            data_security_mode: SINGLE_USER
            runtime_engine: STANDARD
            num_workers: 0

citibike_etl_pipeline_nb.job.yml

resources:
  jobs:
    citibike_etl_pipeline_nb:
      name: citibike_etl_pipeline_nb
      tasks:
        - task_key: 01_bronze_citibike
          notebook_task:
            notebook_path: ../citibike_etl/notebooks/01_bronze/01_bronze_citibike.ipynb
            base_parameters:
              pipeline_id: "{{job.id}}"
              run_id: "{{job.run_id}}"
              task_id: "{{task.run_id}}"
              processed_timestamp: "{{job.start_time.iso_datetime}}"
              catalog: "${var.catalog}"
            source: WORKSPACE
          job_cluster_key: ds3_v2_sn
        - task_key: 02_silver_citibike
          depends_on:
            - task_key: 01_bronze_citibike
          notebook_task:
            notebook_path: ../citibike_etl/notebooks/02_silver/02_silver_citibike.ipynb
            base_parameters:
              pipeline_id: "{{job.id}}"
              run_id: "{{job.run_id}}"
              task_id: "{{task.run_id}}"
              processed_timestamp: "{{job.start_time.iso_datetime}}"
              catalog: "${var.catalog}"
            source: WORKSPACE
          job_cluster_key: ds3_v2_sn
        - task_key: 03_gold_citibike_daily_ride_summary
          depends_on:
            - task_key: 02_silver_citibike
          notebook_task:
            notebook_path: ../citibike_etl/notebooks/03_gold/03_gold_citibike_daily_ride_summary.ipynb
            source: WORKSPACE
            base_parameters:
              catalog: "${var.catalog}"
          job_cluster_key: ds3_v2_sn
        - task_key: 03_gold_citibike_daily_station_performance
          depends_on:
            - task_key: 02_silver_citibike
          notebook_task:
            notebook_path: ../citibike_etl/notebooks/03_gold/03_gold_citibike_daily_station_performance.ipynb
            source: WORKSPACE
            base_parameters:
              catalog: "${var.catalog}"
          job_cluster_key: ds3_v2_sn
      job_clusters:
        - job_cluster_key: ds3_v2_sn
          new_cluster: "${var.ds3_v2_sn}"
      queue:
        enabled: true