Example Configuration Scripts (v4) - ovis-hpc/ovis-wiki GitHub Wiki

Table of Contents

Obtaining these scripts

  • In the downloadable source, similar configuration files are at XXX/ldms/etc
  • In an install, similar configuration files are at _prefix_/etc/ldms

Systemd sampler environmental file (ldmsd.sampler.env)

 # LDMS transport option (sock, rdma, or ugni)
 LDMSD_XPRT=sock
 
 # LDMS Daemon service port
 LDMSD_PORT=411
 
 # LDMS memory allocation
 LDMSD_MEM=512K
 
 #LDMSD_VERBOSE=DEBUG
 LDMSD_VERBOSE=CRITICAL
 #LDMSD_VERBOSE=QUIET
 
 # Log file control. The default is to log to syslog.
 LDMSD_LOG_OPTION="-l /var/log/ldmsd.log"
 
 # Authentication method
 LDMSD_AUTH_PLUGIN=munge
 
 # Authentication options
 #LDMSD_AUTH_OPTION="-a ${LDMSD_AUTH_PLUGIN}"
 #LDMSD_AUTH_OPTION="-A conf=/opt/ovis/etc/ldms/ldmsauth.conf"
 
 # LDMS plugin configuration file, see /opt/ovis/etc/ldms/sampler.conf for an example
 LDMSD_PLUGIN_CONFIG_FILE=/opt/ovis/etc/ldms/sampler.conf
 
 # These are configured by configure script, no need to change.
 LDMSD_PLUGIN_LIBPATH=/opt/ovis/lib64/ovis-ldms
 ZAP_LIBPATH=/opt/ovis/lib64/ovis-lib

Sampler configuration file (sampler.conf)

 env COMPONENT_ID=$(cat /proc/cray_xt/nid)
 env NID=$(printf nid%05d $COMPONENT_ID)
 env NODENUM=$(cat /proc/cray_xt/cname | awk -F "n" '{print $2}')
 env SAMPLE_INTERVAL=1000000
 env SAMPLE_OFFSET=0
 env PROCTYPE=$(cat /sys/devices/system/cpu/modalias | cut -d ':' -f 3 | tr ',' '_')
 load name=jobinfo
 config name=jobinfo producer=${NID} instance=${NID}/jobinfo uid=0 gid=44476 perm=0777
  
 load name=vmstat
 config name=vmstat producer=${NID} instance=${NID}/vmstat component_id=${COMPONENT_ID} schema=vmstat_${PROCTYPE} job_set=${NID}/jobinfo uid=0 gid=44476 perm=0777
 start name=vmstat interval=${SAMPLE_INTERVAL} offset=${SAMPLE_OFFSET}
 
 load name=meminfo
 config name=meminfo producer=${NID} instance=${NID}/meminfo component_id=${COMPONENT_ID} schema=meminfo_${PROCTYPE} job_set=${NID}/jobinfo uid=0 gid=44476 perm=0770
 start name=meminfo interval=${SAMPLE_INTERVAL} offset=${SAMPLE_OFFSET}
 
 load name=procstat
 config name=procstat producer=${NID} instance=${NID}/procstat component_id=${COMPONENT_ID} schema=procstat_${PROCTYPE} job_set=${NID}/jobinfo uid=0 gid=0 perm=0700
 start name=procstat interval=${SAMPLE_INTERVAL} offset=${SAMPLE_OFFSET}

Systemd aggregator environmental file (ldmsd.aggregator.env)

 # LDMS transport option (sock, rdma, or ugni)
 LDMSD_XPRT=sock
 
 # LDMS Daemon service port
 LDMSD_PORT=412
 
 # LDMS memory allocation
 LDMSD_MEM=2G
 
 # Number of event threads
 LDMSD_NUM_THREADS=8
 
 LDMSD_ULIMIT_NOFILE=100000
 
 #LDMSD_VERBOSE=DEBUG
 LDMSD_VERBOSE=CRITICAL
 
 # Log file control. The default is to log to syslog.
 LDMSD_LOG_OPTION="-l /var/log/ldmsd_agg.log"
 
 # Authentication method
 LDMSD_AUTH_PLUGIN=munge
 
 # Authentication options
 # LDMSD_AUTH_OPTION="-A conf=/opt/ovis/etc/ldms/ldmsauth.conf"
 
 LDMSD_PLUGIN_CONFIG_FILE=/opt/ovis/etc/ldms/aggregator.conf
 
 # These are configured by configure script, no need to change.
 LDMSD_PLUGIN_LIBPATH=/opt/ovis/lib64/ovis-ldms
 ZAP_LIBPATH=/opt/ovis/lib64/ovis-lib

Aggregator configuration file (aggregator.conf)

 prdcr_add name=grp1.nid00002.411 host=nid00002 port=411 xprt=ugni type=active interval=30000000
 prdcr_start name=grp1.nid00002.411
 prdcr_add name=grp1.nid00005.411 host=nid00005 port=411 xprt=ugni type=active interval=30000000
 prdcr_start name=grp1.nid00005.411
 prdcr_add name=grp1.nid00009.411 host=nid00009 port=411 xprt=ugni type=active interval=30000000
 prdcr_start name=grp1.nid00009.411
 
 updtr_add name=updtr1 interval=1000000 offset=300000
 updtr_prdcr_add name=updtr1 regex=grp1\..*
 updtr_start name=updtr1
  
 load name=store_csv
 config name=store_csv path=/projects/brandt/LDMS_CSV action=init altheader=1 rollover=86400 rolltype=1 buffer=1
 
 strgp_add name=meminfo_<PROCTYPE>-store_csv plugin=store_csv container=metric_data schema=meminfo_<PROCTYPE>
 strgp_start name=meminfo_<PROCTYPE>-store_csv
 
 strgp_add name=vmstat_<PROCTYPE>-store_csv plugin=store_csv container=metric_data schema=vmstat_<PROCTYPE>
 strgp_start name=vmstat_<PROCTYPE>-store_csv
 
 strgp_add name=procstat_<PROCTYPE>-store_csv plugin=store_csv container=metric_data schema=procstat_<PROCTYPE>
 strgp_start name=meminfo_<PROCTYPE>-store_csv
⚠️ **GitHub.com Fallback** ⚠️