Data Checks - statnett/Talk2PowerSystem GitHub Wiki
This page lists checks for data quality and consistency
URIs without object
PREFIX quantitykind: <http://qudt.org/vocab/quantitykind/>
PREFIX unit: <http://qudt.org/vocab/unit/>
select distinct ?s where {
[] ?p ?s .
filter(isURI(?s))
filter not exists {
?s ?p1 []
}
filter(!contains(str(?s),str(unit:)))
filter(!contains(str(?s),str(quantitykind:)))
}
58 results!
MRID presence and correctness
see #52
Check that mRID exists for all objects and conforms to the URIs (id is suffix of URI)
All with a mRID match the URI
PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select * {
?x a cim:IdentifiedObject ; cim:IdentifiedObject.mRID ?id .
filter(!strafter(str(?x),"http://www.Statnett.no/IGM/Nordic44_CGM#_")=?id)
}
241 objects have no mRID
PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select * {
?x a cim:IdentifiedObject
filter not exists {
?x cim:IdentifiedObject.mRID []
}
}
Discrepancy source:
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select * {
?x a cim:IdentifiedObject
filter not exists {
?x cim:IdentifiedObject.mRID []
}
filter not exists {
graph ?g {
?x a ?type .
?g dct:conformsTo ?c.
}
}
}
check that names are normalized/canonical strings
see #53
Names should be canonical strings, i.e. no leading, trailing or consecutive spaces. Otherwise when you print them, you can't tell apart two names that differ only in spacing.
450 have non canonical strings
PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select * {
{
?x cim:IdentifiedObject.name ?name .
filter(regex(?name," "))
} union {
?x cim:IdentifiedObject.name ?name .
filter(regex(?name," $"))
}
union {
?x cim:IdentifiedObject.name ?name .
filter(regex(?name,"^ "))
}
}
40 differ only by spaces
PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select ?nospace (count(distinct ?name) as ?c) {
?x cim:IdentifiedObject.name ?name .
bind(replace(?name," ","") as ?nospace)
} group by ?nospace having(?c > 1)
Example
PREFIX sesame: <http://www.openrdf.org/schema/sesame#>
PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select * {
?x cim:IdentifiedObject.name ?name ; sesame:directType ?type
filter(replace(?name," ","")="OSKARSHAMN")
}
check uniqueness of names
see #14
PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX sesame: <http://www.openrdf.org/schema/sesame#>
select ?name (count(*) as ?c)
{ ?x cim:IdentifiedObject.name ?name.
} group by ?name order by desc(?c)