Complex Variables - pathfinder-analytics-uk/dab_project GitHub Wiki
Links and Resources
Project Code
clusters.yml
variables:
ds3_v2_sn:
type: "complex"
default:
cluster_name: ""
spark_version: 15.4.x-scala2.12
spark_conf:
spark.master: local[*, 4]
spark.databricks.cluster.profile: singleNode
azure_attributes:
first_on_demand: 1
availability: SPOT_WITH_FALLBACK_AZURE
spot_bid_max_price: -1
node_type_id: Standard_DS3_v2
driver_node_type_id: Standard_DS3_v2
custom_tags:
ResourceClass: SingleNode
spark_env_vars:
PYSPARK_PYTHON: /databricks/python3/bin/python3
enable_elastic_disk: true
data_security_mode: SINGLE_USER
runtime_engine: STANDARD
num_workers: 0
citibike_etl_pipeline_nb.job.yml
resources:
jobs:
citibike_etl_pipeline_nb:
name: citibike_etl_pipeline_nb
tasks:
- task_key: 01_bronze_citibike
notebook_task:
notebook_path: ../citibike_etl/notebooks/01_bronze/01_bronze_citibike.ipynb
base_parameters:
pipeline_id: "{{job.id}}"
run_id: "{{job.run_id}}"
task_id: "{{task.run_id}}"
processed_timestamp: "{{job.start_time.iso_datetime}}"
catalog: "${var.catalog}"
source: WORKSPACE
job_cluster_key: ds3_v2_sn
- task_key: 02_silver_citibike
depends_on:
- task_key: 01_bronze_citibike
notebook_task:
notebook_path: ../citibike_etl/notebooks/02_silver/02_silver_citibike.ipynb
base_parameters:
pipeline_id: "{{job.id}}"
run_id: "{{job.run_id}}"
task_id: "{{task.run_id}}"
processed_timestamp: "{{job.start_time.iso_datetime}}"
catalog: "${var.catalog}"
source: WORKSPACE
job_cluster_key: ds3_v2_sn
- task_key: 03_gold_citibike_daily_ride_summary
depends_on:
- task_key: 02_silver_citibike
notebook_task:
notebook_path: ../citibike_etl/notebooks/03_gold/03_gold_citibike_daily_ride_summary.ipynb
source: WORKSPACE
base_parameters:
catalog: "${var.catalog}"
job_cluster_key: ds3_v2_sn
- task_key: 03_gold_citibike_daily_station_performance
depends_on:
- task_key: 02_silver_citibike
notebook_task:
notebook_path: ../citibike_etl/notebooks/03_gold/03_gold_citibike_daily_station_performance.ipynb
source: WORKSPACE
base_parameters:
catalog: "${var.catalog}"
job_cluster_key: ds3_v2_sn
job_clusters:
- job_cluster_key: ds3_v2_sn
new_cluster: "${var.ds3_v2_sn}"
queue:
enabled: true