Python Wheel Task - pathfinder-analytics-uk/dab_project GitHub Wiki

Project Code

src/dab_project/main.py

def main():
    print("This is a no-op function")

src/dab_project/__ init __.py

setup.py

from setuptools import setup, find_packages

setup(
    name="dab_project",
    version="0.0.1",
    description="This contains the code in the ./src directory of the project",
    author="Your Name",
    packages=find_packages(where="./src"),
    package_dir={"":"./src"},
    install_requires=["setuptools"],
    entry_points={
        "packages":[
            "main=dab_project.main:main"
        ]
    }
)

resources/citibike_etl_pipeline_nb.job.yml

resources:
  jobs:
    citibike_etl_pipeline_nb:
      name: citibike_etl_pipeline_nb
      tasks:
        - task_key: 00_whl_upload
          python_wheel_task:
            package_name: dab_project
            entry_point: main
          job_cluster_key: ds3_v2_sn
          libraries:
            - whl: ../dist/*.whl
        - task_key: 01_bronze_citibike
          depends_on:
            - task_key: 00_whl_upload
          notebook_task:
            notebook_path: ../citibike_etl/notebooks/01_bronze/01_bronze_citibike.ipynb
            base_parameters:
              pipeline_id: "{{job.id}}"
              run_id: "{{job.run_id}}"
              task_id: "{{task.run_id}}"
              processed_timestamp: "{{job.start_time.iso_datetime}}"
              catalog: "${var.catalog}"
            source: WORKSPACE
          job_cluster_key: ds3_v2_sn
        - task_key: 02_silver_citibike
          depends_on:
            - task_key: 01_bronze_citibike
          notebook_task:
            notebook_path: ../citibike_etl/notebooks/02_silver/02_silver_citibike.ipynb
            base_parameters:
              pipeline_id: "{{job.id}}"
              run_id: "{{job.run_id}}"
              task_id: "{{task.run_id}}"
              processed_timestamp: "{{job.start_time.iso_datetime}}"
              catalog: "${var.catalog}"
            source: WORKSPACE
          job_cluster_key: ds3_v2_sn
        - task_key: 03_gold_citibike_daily_ride_summary
          depends_on:
            - task_key: 02_silver_citibike
          notebook_task:
            notebook_path: ../citibike_etl/notebooks/03_gold/03_gold_citibike_daily_ride_summary.ipynb
            source: WORKSPACE
            base_parameters:
              catalog: "${var.catalog}"
          job_cluster_key: ds3_v2_sn
        - task_key: 03_gold_citibike_daily_station_performance
          depends_on:
            - task_key: 02_silver_citibike
          notebook_task:
            notebook_path: ../citibike_etl/notebooks/03_gold/03_gold_citibike_daily_station_performance.ipynb
            source: WORKSPACE
            base_parameters:
              catalog: "${var.catalog}"
          job_cluster_key: ds3_v2_sn
      job_clusters:
        - job_cluster_key: ds3_v2_sn
          new_cluster: "${var.ds3_v2_sn}"
      queue:
        enabled: true