Evaluation Declaration Example 1 - NOAA-OWP/wres GitHub Wiki
# This is a test of real, HEFS baseline validation streamflow data verified through WRES
# where the observations are 24h mean streamflow (QME) provided in a PI-timeseries XML
# file and the HEFS ensemble forecasts are of 6h instantaneous stream flow (QINE)
# provided in a gzipped tarball. The HEFS forecasts are compared against ESP forecasts
# of the same variable allowing for the computation of skill scores; they are also
# provided in a gzipped tarball. The evaluation includes temporal rescaling of the
# forecast data and the computation of a temporal offset (due to time zones) in order to
# align the forecasts and observations.
label: Example 1
# Observations are provided in a PI-timeseries file as 24h QME.
observed:
sources: data/abrfcExample/LGNN5_QME.xml
variable: QME
type: observations
# The forecasts are HEFS provided in a gzipped tarball of PI-timeseries XML files.
predicted:
label: HEFS
sources: data/abrfcExample/LGNN5_hefs_flow_1yr.tgz
variable: QINE
type: ensemble forecasts
# The baseline for calculating skill scores are ESP forecasts also provided in a
# gzipped tarball of PI-timeseries XML files.
baseline:
label: ESP
sources: data/abrfcExample/LGNN5_hefs-mefp_flow_1yr.tgz
variable: QINE
type: ensemble forecasts
# The measurement unit of the evaluation
unit: CMS
# The evaluation is restricted to pairs that have a lead time of 42h. In general,
# it may be more useful to evaluate a longer period of lead durations, setting
# minimum to be "0" hours and maximum to be "120", "240", or even longer depending
# on the objectives of the evaluation. By default, one big pools is created for
# all lead durations. To create one pool for each lead duration, use an explicit
# lead_time_pools
lead_times:
minimum: 42
maximum: 42
unit: hours
# The time scale of the evaluation, which matches that of the observations.
time_scale:
function: mean
period: 24
unit: hours
# Specifies the thresholds for which to perform the evaluation as quantiles of the
# distribution of the "observed" data.
probability_thresholds:
values: [0.1, 0.25, 0.5, 0.75, 0.9, 0.95]
operator: greater equal
# The metrics to calculate
metrics:
- mean error
- continuous ranked probability skill score
- relative operating characteristic score
- mean square error skill score
- brier skill score
- relative operating characteristic diagram
- brier score
- sample size
- continuous ranked probability score
# The decimal format to use when writing numeric outputs.
decimal_format: '#0.000000'
# The output formats to write.
output_formats:
- csv
- pairs
- format: png
orientation: lead threshold # Plot by lead time, then threshold