Internal data model - Gapminder/waffle-server GitHub Wiki

WS data model (a part that maps to DDF) consists of the following schemas:

Table of Contents

Types

List of types with not quite intuitive names:

  • Mixed - can contain any value
  • ObjectId - contains a reference to the document in another document collection.
  • {} - contains set of key-value pairs as a single value

Concepts

{
  // 1.
  // Unique identifier of the concept inside of WS and per dataset.
  // Corresponds to the cell in DDF csv file under concepts `primaryKey` column.
  gid: {type: String, match: /^[a-z0-9_]*$/, required: true},

  // 2.
  // This is internal WS property that is used in versioning as a link between different versions of the same document 
  originId: {type: ObjectId},

  // 3.
  // Type corresponds to concept_type column in DDF concepts csv file
  type: {
    type: String,
    enum: ['entity_domain', 'entity_set', 'string', 'measure', 'time', 'year', 'week', 'month', 'day', 'quarter'],
    default: 'string',
    required: true
  },

  // 4.
  // Files from which concept was imported
  sources: [{type: String, required: true}],

  // 5.
  // All the columns with corresponding values in concepts file are stored here
  properties: {},

  // 6.
  // Found translations are stored here in a { [LANGUAGE]: { TRANSALATIONS_DICTIONARY } } format 
  languages: {},

  // 7.
  // This property is used to indicate domain of the current concept (if there is one)
  domain: {type: ObjectId, ref: 'Concepts'},

  // 8.
  //  This property is used to indicate a version of the dataset in which this concept was introduced.
  // Corresponds to TRANSACTION creation time.
  from: {type: Number, required: true},

  // 9.
  // This property is used to indicate a version of the dataset in which
  // this concept does NOT exist anymore (was deleted) OR exists as a newer version (was updated).
  // Corresponds to TRANSACTION creation time.
  to: {type: Number, required: true, default: MAX_VERSION},

  // 10.
  // Dataset to which concept belongs
  dataset: {type: ObjectId, ref: 'Datasets', required: true},
};

Datapoints

{
  // 1.
  // Value of the datapoint
  value: {type: Mixed, required: true},

  // 2.
  // Files from which datapoint was imported
  sources: [{type: String, required: true}],

  // 3.
  // This property contains a reference to concept to which datapoint belongs.
  // Measure is a bit misleading name, cause technically not datapoints might belong not only to measures
  measure: {type: ObjectId, ref: 'Concepts', required: true},

  // 4.
  // Set of references to entities which form dimensions of the datapoint
  dimensions: [{type: ObjectId, ref: 'Entities'}],

  // 5.
  // Set of references to concepts (entity_set's and entity_domain's) to which entities from dimensions belong
  dimensionsConcepts: [{type: ObjectId, ref: 'Concepts'}],

  // 6.
  // This field contains row from which datapoint was imported in a form of Javascript object or simple dictionary.
  properties: {},

  // 7.
  // languages field contains translations for the datapoint
  languages: {},

  // 8.
  // This property is used to indicate version of dataset in which this datapoint was introduced.
  // Corresponds to TRANSACTION creation time.
  from: {type: Number, required: true},

  // 9.
  // This property is used to indicate a version of the dataset in which
  // datapoint does NOT exist anymore (was deleted) OR exists as a newer version (was updated).
  // Corresponds to TRANSACTION creation time.
  to: {type: Number, required: true, 'default': MAX_VERSION},

  // 10.
  // Dataset to which datapoint belongs
  dataset: {type: ObjectId, ref: 'Datasets', required: true},

  // 11.
  // This is internal WS property that is used in versioning as a link between different versions of the same document 
  originId: {type: ObjectId},
}

Entities

{
  // 1.
  // Unique identifier of the entity inside of WS and per dataset.
  // Corresponds to cell in DDF csv file under entities' `primaryKey` column.
  gid: {type: Mixed, match: /^[a-z0-9_]*$/, required: true},

  // 2.
  // This is internal WS property that is used in versioning as a link between different versions of the same document
  originId: {type: ObjectId},

  // 3.
  // Files from which entity was imported
  sources: [{type: String, required: true}],

  // 4.
  // This field contains row from which entity was imported in the form of a Javascript object or simply dictionary.
  properties: {},

  // 5.
  // languages field contains translations for the entity
  languages: {},

  // 6.
  // Domain to which current entity belongs
  domain: {type: ObjectId, ref: 'Concepts', required: true},
  
  // 7.
  // EntitySets to which current entity belongs
  sets: [{type: ObjectId, ref: 'Concepts'}],

  // 8.
  // This property is used to indicate a version of a dataset in which this entity was introduced.
  // Corresponds to TRANSACTION creation time.
  from: {type: Number, required: true},
  
  // 9.
  // This property is used to indicate a version of the entity in which
  // this entity does NOT exist anymore (was deleted) OR exists as a newer version (was updated).
  // Corresponds to TRANSACTION creation time.
  to: {type: Number, required: true, 'default': MAX_VERSION},

  // 10.
  // Dataset to which entity belongs
  dataset: {type: ObjectId, ref: 'Datasets', required: true},
}

Schemas (model for supporting availability queries)

{
  // 1.
  // If the object represented by this model contains 2 or more elements in key field then this object describes datapoint
  // If key contains only one element and this element equals to 'concept' string, then it describes concept
  // Otherwise it describes entity
  key: [{type: String, required: true}],
  
  // 2.
  // Contains concept which schema described (this value is not unique cause same concept might be described in multiple files)
  value: {type: Mixed, required: true},

  // 3.
  // Contains files from which current schema instance was generated
  source: [{type: String, required: true}],

  // 4.
  // This is internal WS field for faster searching of concepts corresponding to schema's key
  keyOriginIds: [{type: ObjectId, ref: 'Concepts'}],

  // 5.
  // This is internal WS field for faster searching of concepts corresponding to schema's value
  valueOriginId: {type: ObjectId, ref: 'Concepts'},

  // 6.
  // Type indicates which type of data current schema describes
  // This might be either: 'concepts' or 'entities' or 'datapoints'
  type: {type: String, 'enum': ['concepts', 'entities', 'datapoints']},

  // 7.
  // Dataset which data this schema describes
  dataset: {type: ObjectId, ref: 'Datasets', required: true},

  // 8.
  // Transaction in which schema was generated
  transaction: {type: ObjectId, ref: 'DatasetTransactions', required: true}
}