Evaluation Declaration Example 2 - NOAA-OWP/wres GitHub Wiki

# An example of an evaluation of single-valued, operational stream flow forecasts against
# observations provided in an NWSRFS datacard file. It includes a comparison against 
# persistence forecasts generated from the same datacard observations. The evaluation is 
# performed at a 24-hour, mean desired time scale with lead times pooled into windows that
# are adjacent, non-overlapping, and 24-hours in width. More details are proivded below.
label: Example 2

# Observations are provided in an NWSRFS datacard format file. With such data
# typically being provided in local standard time, the zoneOffset attribute is 
# included to allow the user to specify it. The variable of QINE indicates that
# the data is instantaneous streamflow. Note that the variable must be specified;
# WRES will not parse the file name in order to identify the variable.
observed:
  label: Observations
  sources: data/singleValuedEx_ABRFC_ARCFUL_OBS/GLOO2X.QINE.19951101.20170905.datacard
  variable: QINE
  time_zone_offset: '-0600'
  time_scale:
    function: mean
    period: 1
    unit: seconds

# Forecasts are provided as PI-timeseries XML files contained beneath a directory.
# The directory is navigated recursively with every file found processed.
predicted:
  label: Operational Single-Valued Forecasts
  sources: data/singleValuedEx_ABRFC_ARCFUL_QINE
  variable: QINE

# The baseline forecast source is persistence, which is computed from the source 
# specified, which matches the "observed" source above. The default order is "1", i.e.  
# the persisted baseline value is obtained from the "observed" source, one timestep 
# prior to the forecast reference time for each "predicted" time-series.
baseline:
  sources: data/singleValuedEx_ABRFC_ARCFUL_OBS/GLOO2X.QINE.19951101.20170905.datacard
  variable: QINE
  time_zone_offset: '-0600'
  method: persistence

# The feature name associated with the evaluation for each side of data; 
# in this case an NWS "Handbook 5" id.
features:
  - {observed: GLOO2X, predicted: GLOO2, baseline: GLOO2X}

# The measurement unit of the evaluation.
unit: CMS

# The evaluation is restricted to the first 48 hours of lead times.
lead_times:
  minimum: 0
  maximum: 48
  unit: hours

# Specifies how to pool the data by lead time.  In this case, since the lead_times
# maximum is 48 hours, two windows are defined: (0h, 24h] and (24h, 48h].
lead_time_pools:
  period: 24
  frequency: 24
  unit: hours

# The time scale at which to perform the evaluation, which is a 24-hour mean.
time_scale:
  function: mean
  period: 24
  unit: hours

# Specifies the thresholds for which to perform the evaluation as quantiles of the
# distribution of the "observed" data. 
probability_thresholds:
  values: [0.1, 0.25, 0.5, 0.75, 0.9, 0.95]
  operator: greater equal

# The selected metrics.
metrics:
  - mean square error skill score
  - mean error
  - sample size

# The decimal format to use when writing numeric outputs.
decimal_format: '#0.000000'

# The output formats to write.
output_formats:
  - csv
  - pairs
  - format: png
    orientation: lead threshold  # Plot by lead time and then threshold.