Sample Datasets for Demos - ja-guzzle/guzzle_docs GitHub Wiki
-
https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/HG7NV7 (http://stat-computing.org/dataexpo/2009/the-data.html)
wget -O 1987.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/IXITH2 & wget -O 1988.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/TUYWU3 & wget -O 1989.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/T7EP3M & wget -O 1990.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/QJKL3I & wget -O 1991.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/EJ4WJO & wget -O 1992.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/PLPDQO & wget -O 1993.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/IOU9DX & wget -O 1994.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/BH5P0X & wget -O 1995.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/ZLTTDC & wget -O 1996.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/3KDWWL & wget -O 1997.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/RUGDRW & wget -O 1998.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/H07RX8 & wget -O 1999.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/IP6BL3 & wget -O 2000.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/YGU3TD & wget -O 2001.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/CI5CEM & wget -O 2002.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/OWJXH3 & wget -O 2003.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/KM2QOA & wget -O 2004.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/CCAZGT & wget -O 2005.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/JTFT25 & wget -O 2006.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/EPIFFT & wget -O 2007.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/2BHLWK & wget -O 2008.csv.bz2 https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/EIR0RA & wget -O airports.csv https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/XTPZZY & wget -O carriers.csv https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/3NOQ6Q & wget -O plane-data.csv https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/XXSL8A & wget -O variable-descriptions.csv https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/HG7NV7/YZWKHN &
- http://stat-computing.org/dataexpo/2009/supplemental-data.html
- https://openflights.org/data.html
- http://www.nyc.gov/html/tlc/html/about/trip_record_data.shtml
- Public holiday api - define rest datastore holidayapi with the url ; https://holidayapi.com and below job config
version: 1
job:
type: "ingestion"
tags:
- "airline"
source:
endpoint: "holidayapi"
properties:
format: "xml"
format_properties:
root_tag: "holidays"
row_tag: "holiday"
uri: "/v1/holidays?country=SG"
http_method: "get"
charset: "UTF-8"
parameters:
format: "xml"
year: "${year_of_ph}"
key: "e9e8b7ff-9648-4c8f-b678-33cbfc032a97"
day: "09"
month: "08"
schema:
strict_schema_check: true
schema_derivation_strategy: "source"
discard: true
columns:
- nullable:
value: true
discard: true
name: "w_refresh_date"
validate: false
partition_column: false
data_type:
value: "timestamp"
discard: true
transform_sql:
value: "current_timestamp"
primary_key:
value: false
discard: true
- nullable:
value: true
discard: true
name: "w_job_instance_id"
validate: false
partition_column: false
data_type:
value: "bigint"
discard: true
transform_sql:
value: "${job_instance_id}"
primary_key:
value: false
discard: true
target:
endpoint: "admstg"
properties:
auto_create_table: true
table: "src_holiday"
operation: "append"