Internal data model - Gapminder/waffle-server GitHub Wiki
WS data model (a part that maps to DDF) consists of the following schemas:
Table of Contents
Types
List of types with not quite intuitive names:
Mixed
- can contain any valueObjectId
- contains a reference to the document in another document collection.{}
- contains set of key-value pairs as a single value
Concepts
{
// 1.
// Unique identifier of the concept inside of WS and per dataset.
// Corresponds to the cell in DDF csv file under concepts `primaryKey` column.
gid: {type: String, match: /^[a-z0-9_]*$/, required: true},
// 2.
// This is internal WS property that is used in versioning as a link between different versions of the same document
originId: {type: ObjectId},
// 3.
// Type corresponds to concept_type column in DDF concepts csv file
type: {
type: String,
enum: ['entity_domain', 'entity_set', 'string', 'measure', 'time', 'year', 'week', 'month', 'day', 'quarter'],
default: 'string',
required: true
},
// 4.
// Files from which concept was imported
sources: [{type: String, required: true}],
// 5.
// All the columns with corresponding values in concepts file are stored here
properties: {},
// 6.
// Found translations are stored here in a { [LANGUAGE]: { TRANSALATIONS_DICTIONARY } } format
languages: {},
// 7.
// This property is used to indicate domain of the current concept (if there is one)
domain: {type: ObjectId, ref: 'Concepts'},
// 8.
// This property is used to indicate a version of the dataset in which this concept was introduced.
// Corresponds to TRANSACTION creation time.
from: {type: Number, required: true},
// 9.
// This property is used to indicate a version of the dataset in which
// this concept does NOT exist anymore (was deleted) OR exists as a newer version (was updated).
// Corresponds to TRANSACTION creation time.
to: {type: Number, required: true, default: MAX_VERSION},
// 10.
// Dataset to which concept belongs
dataset: {type: ObjectId, ref: 'Datasets', required: true},
};
Datapoints
{
// 1.
// Value of the datapoint
value: {type: Mixed, required: true},
// 2.
// Files from which datapoint was imported
sources: [{type: String, required: true}],
// 3.
// This property contains a reference to concept to which datapoint belongs.
// Measure is a bit misleading name, cause technically not datapoints might belong not only to measures
measure: {type: ObjectId, ref: 'Concepts', required: true},
// 4.
// Set of references to entities which form dimensions of the datapoint
dimensions: [{type: ObjectId, ref: 'Entities'}],
// 5.
// Set of references to concepts (entity_set's and entity_domain's) to which entities from dimensions belong
dimensionsConcepts: [{type: ObjectId, ref: 'Concepts'}],
// 6.
// This field contains row from which datapoint was imported in a form of Javascript object or simple dictionary.
properties: {},
// 7.
// languages field contains translations for the datapoint
languages: {},
// 8.
// This property is used to indicate version of dataset in which this datapoint was introduced.
// Corresponds to TRANSACTION creation time.
from: {type: Number, required: true},
// 9.
// This property is used to indicate a version of the dataset in which
// datapoint does NOT exist anymore (was deleted) OR exists as a newer version (was updated).
// Corresponds to TRANSACTION creation time.
to: {type: Number, required: true, 'default': MAX_VERSION},
// 10.
// Dataset to which datapoint belongs
dataset: {type: ObjectId, ref: 'Datasets', required: true},
// 11.
// This is internal WS property that is used in versioning as a link between different versions of the same document
originId: {type: ObjectId},
}
Entities
{
// 1.
// Unique identifier of the entity inside of WS and per dataset.
// Corresponds to cell in DDF csv file under entities' `primaryKey` column.
gid: {type: Mixed, match: /^[a-z0-9_]*$/, required: true},
// 2.
// This is internal WS property that is used in versioning as a link between different versions of the same document
originId: {type: ObjectId},
// 3.
// Files from which entity was imported
sources: [{type: String, required: true}],
// 4.
// This field contains row from which entity was imported in the form of a Javascript object or simply dictionary.
properties: {},
// 5.
// languages field contains translations for the entity
languages: {},
// 6.
// Domain to which current entity belongs
domain: {type: ObjectId, ref: 'Concepts', required: true},
// 7.
// EntitySets to which current entity belongs
sets: [{type: ObjectId, ref: 'Concepts'}],
// 8.
// This property is used to indicate a version of a dataset in which this entity was introduced.
// Corresponds to TRANSACTION creation time.
from: {type: Number, required: true},
// 9.
// This property is used to indicate a version of the entity in which
// this entity does NOT exist anymore (was deleted) OR exists as a newer version (was updated).
// Corresponds to TRANSACTION creation time.
to: {type: Number, required: true, 'default': MAX_VERSION},
// 10.
// Dataset to which entity belongs
dataset: {type: ObjectId, ref: 'Datasets', required: true},
}
Schemas (model for supporting availability queries)
{
// 1.
// If the object represented by this model contains 2 or more elements in key field then this object describes datapoint
// If key contains only one element and this element equals to 'concept' string, then it describes concept
// Otherwise it describes entity
key: [{type: String, required: true}],
// 2.
// Contains concept which schema described (this value is not unique cause same concept might be described in multiple files)
value: {type: Mixed, required: true},
// 3.
// Contains files from which current schema instance was generated
source: [{type: String, required: true}],
// 4.
// This is internal WS field for faster searching of concepts corresponding to schema's key
keyOriginIds: [{type: ObjectId, ref: 'Concepts'}],
// 5.
// This is internal WS field for faster searching of concepts corresponding to schema's value
valueOriginId: {type: ObjectId, ref: 'Concepts'},
// 6.
// Type indicates which type of data current schema describes
// This might be either: 'concepts' or 'entities' or 'datapoints'
type: {type: String, 'enum': ['concepts', 'entities', 'datapoints']},
// 7.
// Dataset which data this schema describes
dataset: {type: ObjectId, ref: 'Datasets', required: true},
// 8.
// Transaction in which schema was generated
transaction: {type: ObjectId, ref: 'DatasetTransactions', required: true}
}