Data Checks - statnett/Talk2PowerSystem GitHub Wiki

This page lists checks for data quality and consistency

URIs without object

PREFIX quantitykind: <http://qudt.org/vocab/quantitykind/>
PREFIX unit: <http://qudt.org/vocab/unit/>
select distinct ?s where {
   [] ?p ?s .
   filter(isURI(?s)) 
   filter not exists {
        ?s ?p1 []
	}
    filter(!contains(str(?s),str(unit:)))
    filter(!contains(str(?s),str(quantitykind:)))
}

58 results!

MRID presence and correctness

see #52

Check that mRID exists for all objects and conforms to the URIs (id is suffix of URI)

All with a mRID match the URI

PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select * { 
  ?x a cim:IdentifiedObject ; cim:IdentifiedObject.mRID ?id .
    filter(!strafter(str(?x),"http://www.Statnett.no/IGM/Nordic44_CGM#_")=?id)
}

241 objects have no mRID

PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select * { 
  ?x a cim:IdentifiedObject 
  filter not exists {
        ?x cim:IdentifiedObject.mRID []
  }
}

Discrepancy source:

PREFIX dct: <http://purl.org/dc/terms/>
PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select * {
	?x a cim:IdentifiedObject 
        filter not exists {
            ?x cim:IdentifiedObject.mRID []
        }
    filter not exists {
        graph ?g {
            ?x a ?type .
            ?g dct:conformsTo ?c. 
        }
    }
}

check that names are normalized/canonical strings

see #53

Names should be canonical strings, i.e. no leading, trailing or consecutive spaces. Otherwise when you print them, you can't tell apart two names that differ only in spacing.

450 have non canonical strings

PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select * {
    {
        ?x cim:IdentifiedObject.name ?name .
        filter(regex(?name,"  "))
    } union {
        ?x cim:IdentifiedObject.name ?name .
        filter(regex(?name," $"))
    }
    union {
        ?x cim:IdentifiedObject.name ?name .
        filter(regex(?name,"^ "))
    }
}

40 differ only by spaces

PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select ?nospace (count(distinct ?name) as ?c) {
    ?x cim:IdentifiedObject.name ?name .
    bind(replace(?name," ","") as ?nospace)
} group by ?nospace having(?c > 1)

Example

PREFIX sesame: <http://www.openrdf.org/schema/sesame#>
PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select *  {
    ?x cim:IdentifiedObject.name ?name ; sesame:directType ?type
    filter(replace(?name," ","")="OSKARSHAMN")
}

check uniqueness of names

see #14

PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX sesame: <http://www.openrdf.org/schema/sesame#>
select ?name (count(*) as ?c)  
{     ?x cim:IdentifiedObject.name ?name.
} group by ?name order by desc(?c)