Python Wheel Task - pathfinder-analytics-uk/dab_project GitHub Wiki
Project Code
src/dab_project/main.py
def main():
print("This is a no-op function")
src/dab_project/__ init __.py
setup.py
from setuptools import setup, find_packages
setup(
name="dab_project",
version="0.0.1",
description="This contains the code in the ./src directory of the project",
author="Your Name",
packages=find_packages(where="./src"),
package_dir={"":"./src"},
install_requires=["setuptools"],
entry_points={
"packages":[
"main=dab_project.main:main"
]
}
)
resources/citibike_etl_pipeline_nb.job.yml
resources:
jobs:
citibike_etl_pipeline_nb:
name: citibike_etl_pipeline_nb
tasks:
- task_key: 00_whl_upload
python_wheel_task:
package_name: dab_project
entry_point: main
job_cluster_key: ds3_v2_sn
libraries:
- whl: ../dist/*.whl
- task_key: 01_bronze_citibike
depends_on:
- task_key: 00_whl_upload
notebook_task:
notebook_path: ../citibike_etl/notebooks/01_bronze/01_bronze_citibike.ipynb
base_parameters:
pipeline_id: "{{job.id}}"
run_id: "{{job.run_id}}"
task_id: "{{task.run_id}}"
processed_timestamp: "{{job.start_time.iso_datetime}}"
catalog: "${var.catalog}"
source: WORKSPACE
job_cluster_key: ds3_v2_sn
- task_key: 02_silver_citibike
depends_on:
- task_key: 01_bronze_citibike
notebook_task:
notebook_path: ../citibike_etl/notebooks/02_silver/02_silver_citibike.ipynb
base_parameters:
pipeline_id: "{{job.id}}"
run_id: "{{job.run_id}}"
task_id: "{{task.run_id}}"
processed_timestamp: "{{job.start_time.iso_datetime}}"
catalog: "${var.catalog}"
source: WORKSPACE
job_cluster_key: ds3_v2_sn
- task_key: 03_gold_citibike_daily_ride_summary
depends_on:
- task_key: 02_silver_citibike
notebook_task:
notebook_path: ../citibike_etl/notebooks/03_gold/03_gold_citibike_daily_ride_summary.ipynb
source: WORKSPACE
base_parameters:
catalog: "${var.catalog}"
job_cluster_key: ds3_v2_sn
- task_key: 03_gold_citibike_daily_station_performance
depends_on:
- task_key: 02_silver_citibike
notebook_task:
notebook_path: ../citibike_etl/notebooks/03_gold/03_gold_citibike_daily_station_performance.ipynb
source: WORKSPACE
base_parameters:
catalog: "${var.catalog}"
job_cluster_key: ds3_v2_sn
job_clusters:
- job_cluster_key: ds3_v2_sn
new_cluster: "${var.ds3_v2_sn}"
queue:
enabled: true