note:2024‐06‐13 - ddbj/rdf GitHub Wiki
ゴール
contextが定義されたBioProject, BioSampleのJSON-LDをNII-RCOSに提供する
方針
- DDBJ SearchのJSON(-LD)を見直して、正しいJSON-LDを準備する。合わせて、context.jsonld を配置する。
- 語彙がなければBioSchemasに提案していく。(2024-05-08)
TODO
- JSON-LDのcontextファイルを設計し、開発のための小規模データドラフト版を準備する。 (2024年6月)
- Resourcelistを作成する (2024年7~8月目標) (例)https://www.hi.u-tokyo.ac.jp/di/resource/resourcesync/resourcelist_hi-T50.xml
やったこと
JSON2LD mapperを使ってJSONをJSON-LD化
メモ:現在のJSONのトップ階層はschema.orgでほぼOK、properties以下などXMLタグがそのままなので、contextを定義するときに一通りURIにマッピングする必要がある(現在json2ld名前空間のもの)
schema.org/bioschemasマッピングの検討
-
Project
-
Submitter
-
Organization
-
Grant
- Thing > Intangible > Grant
-
Publication
- Thing > CreativeWork > Article
-
dbXref
- Thing > CreativeWork > WebPage
-
Sample
-
BioSample Attribute
- Thing > Property :: additionalProperty で記述 (例) SAMEA104383111_jsonld.json
-
Organism
- Thing > Taxon (2023-05-01)
- Taxon Thing > CreativeWork > TaxonName (2023-05-01)
メモ:
-
Taxon (v1.0-RELEASE) , TaxonName (v1.0-RELEASE) は2023年5月1.0リリースだが、BioSample (v0.1-RELEASE-2019_06_19) は古いまま。
-
Bioschemas BioSampleのpropertyはgenderなどメディカルよりのプロパティが定義。
-
BioProjectは Schema.orgで、BioSampleはvalueReferenceのオントロジーマッピング以外はbioschemasの例のようなモデルで表現できそう(藤澤)
-
OrganismはINSDCの語彙を使うべき、IDが重要(有田)
-
Taxonomy.owlはDDBJで10年 ホストしている。taxonのIDはindentifiers.orgを使っている。
-
4年前の例
Togothon関連
- https://docs.google.com/presentation/d/1XC44UlxWTXmamA5XxzBRNDFCYBTMhubOSjp9Hr2QBt4/edit#slide=id.g2713719021e_1_12
- https://docs.google.com/presentation/d/1TdJ6glw7szOYZe_OUaqZ6Zv59VCyak8UdLIzjUJoCPc/edit#slide=id.g2e4d8fdf8cd_19_0
今後の予定
6/24-28のBH24.6でDDBJ SearchのJSONから検討したJSON-LDに変換する
##JSON2LD Mapper生成したcontext.jsonld
BioProject
{
"@context": [
"https://schema.org/docs/jsonldcontext.json",
{
"@vocab": "http://json2ld.mapper.tokyo/ns/",
"json2ld": "http://json2ld.mapper.tokyo/ns/",
"identifier": {
"@id": "schema:identifier"
},
"title": {
"@id": "schema:title"
},
"description": {
"@id": "schema:description"
},
"name": {
"@id": "schema:name"
},
"type": {
"@id": "json2ld:type"
},
"url": {
"@id": "schema:url"
},
"sameAs": {
"@id": "schema:sameAs"
},
"isPartOf": {
"@id": "schema:isPartOf"
},
"organism": {
"@id": "json2ld:organism"
},
"dbXrefs": {
"@id": "json2ld:dbXrefs"
},
"dbXrefsStatistics": {
"@id": "json2ld:dbXrefsStatistics"
},
"count": {
"@id": "json2ld:count"
},
"properties": {
"@id": "json2ld:properties"
},
"Project": {
"@id": "schema:Project"
},
"Submission": {
"@id": "json2ld:Submission"
},
"ProjectID": {
"@id": "json2ld:ProjectID"
},
"ProjectDescr": {
"@id": "json2ld:ProjectDescr"
},
"ProjectType": {
"@id": "json2ld:ProjectType"
},
"ArchiveID": {
"@id": "json2ld:ArchiveID"
},
"accession": {
"@id": "json2ld:accession"
},
"archive": {
"@id": "json2ld:archive"
},
"id": {
"@id": "json2ld:id"
},
"Name": {
"@id": "json2ld:Name"
},
"Title": {
"@id": "json2ld:Title"
},
"Description": {
"@id": "json2ld:Description"
},
"ExternalLink": {
"@id": "json2ld:ExternalLink"
},
"Publication": {
"@id": "json2ld:Publication"
},
"ProjectReleaseDate": {
"@id": "json2ld:ProjectReleaseDate"
},
"LocusTagPrefix": {
"@id": "json2ld:LocusTagPrefix"
},
"label": {
"@id": "json2ld:label"
},
"category": {
"@id": "schema:category"
},
"URL": {
"@id": "schema:URL"
},
"status": {
"@id": "schema:status"
},
"Reference": {
"@id": "json2ld:Reference"
},
"StructuredCitation": {
"@id": "json2ld:StructuredCitation"
},
"Journal": {
"@id": "json2ld:Journal"
},
"AuthorSet": {
"@id": "json2ld:AuthorSet"
},
"JournalTitle": {
"@id": "json2ld:JournalTitle"
},
"Year": {
"@id": "json2ld:Year"
},
"Volume": {
"@id": "json2ld:Volume"
},
"Issue": {
"@id": "json2ld:Issue"
},
"PagesFrom": {
"@id": "json2ld:PagesFrom"
},
"PagesTo": {
"@id": "json2ld:PagesTo"
},
"Author": {
"@id": "json2ld:Author"
},
"First": {
"@id": "json2ld:First"
},
"Last": {
"@id": "json2ld:Last"
},
"Consortium": {
"@id": "schema:Consortium"
},
"DbType": {
"@id": "json2ld:DbType"
},
"biosample_id": {
"@id": "json2ld:biosample_id"
},
"assembly_id": {
"@id": "json2ld:assembly_id"
},
"content": {
"@id": "json2ld:content"
},
"ProjectTypeSubmission": {
"@id": "json2ld:ProjectTypeSubmission"
},
"Target": {
"@id": "json2ld:Target"
},
"Method": {
"@id": "json2ld:Method"
},
"Objectives": {
"@id": "json2ld:Objectives"
},
"ProjectDataTypeSet": {
"@id": "json2ld:ProjectDataTypeSet"
},
"sample_scope": {
"@id": "json2ld:sample_scope"
},
"material": {
"@id": "schema:material"
},
"capture": {
"@id": "json2ld:capture"
},
"Organism": {
"@id": "json2ld:Organism"
},
"taxID": {
"@id": "schema:taxID"
},
"species": {
"@id": "json2ld:species"
},
"OrganismName": {
"@id": "json2ld:OrganismName"
},
"Strain": {
"@id": "json2ld:Strain"
},
"Supergroup": {
"@id": "json2ld:Supergroup"
},
"BiologicalProperties": {
"@id": "json2ld:BiologicalProperties"
},
"RepliconSet": {
"@id": "json2ld:RepliconSet"
},
"GenomeSize": {
"@id": "json2ld:GenomeSize"
},
"Morphology": {
"@id": "json2ld:Morphology"
},
"Environment": {
"@id": "json2ld:Environment"
},
"Gram": {
"@id": "json2ld:Gram"
},
"Motility": {
"@id": "json2ld:Motility"
},
"OxygenReq": {
"@id": "json2ld:OxygenReq"
},
"TemperatureRange": {
"@id": "json2ld:TemperatureRange"
},
"Habitat": {
"@id": "json2ld:Habitat"
},
"Replicon": {
"@id": "json2ld:Replicon"
},
"Count": {
"@id": "json2ld:Count"
},
"order": {
"@id": "json2ld:order"
},
"Type": {
"@id": "json2ld:Type"
},
"location": {
"@id": "schema:location"
},
"Size": {
"@id": "json2ld:Size"
},
"units": {
"@id": "json2ld:units"
},
"repliconType": {
"@id": "json2ld:repliconType"
},
"method_type": {
"@id": "json2ld:method_type"
},
"Data": {
"@id": "json2ld:Data"
},
"data_type": {
"@id": "json2ld:data_type"
},
"DataType": {
"@id": "schema:DataType"
},
"submitted": {
"@id": "json2ld:submitted"
},
"Organization": {
"@id": "schema:Organization"
},
"Access": {
"@id": "json2ld:Access"
},
"role": {
"@id": "json2ld:role"
},
"abbr": {
"@id": "json2ld:abbr"
},
"search": {
"@id": "json2ld:search"
},
"distribution": {
"@id": "schema:distribution"
},
"encodingFormat": {
"@id": "schema:encodingFormat"
},
"contentUrl": {
"@id": "schema:contentUrl"
},
"downloadUrl": {
"@id": "schema:downloadUrl"
},
"ftpUrl": {
"@id": "json2ld:ftpUrl"
},
"visibility": {
"@id": "json2ld:visibility"
},
"dateCreated": {
"@id": "schema:dateCreated"
},
"dateModified": {
"@id": "schema:dateModified"
},
"datePublished": {
"@id": "schema:datePublished"
}
}
]
}
BioSample
{
"@context": [
"https://schema.org/docs/jsonldcontext.json",
{
"@vocab": "http://json2ld.mapper.tokyo/ns/",
"json2ld": "http://json2ld.mapper.tokyo/ns/",
"identifier": {
"@id": "schema:identifier"
},
"title": {
"@id": "schema:title"
},
"description": {
"@id": "schema:description"
},
"name": {
"@id": "schema:name"
},
"type": {
"@id": "json2ld:type"
},
"url": {
"@id": "schema:url"
},
"sameAs": {
"@id": "schema:sameAs"
},
"isPartOf": {
"@id": "schema:isPartOf"
},
"organism": {
"@id": "json2ld:organism"
},
"dbXrefs": {
"@id": "json2ld:dbXrefs"
},
"dbXrefsStatistics": {
"@id": "json2ld:dbXrefsStatistics"
},
"properties": {
"@id": "json2ld:properties"
},
"access": {
"@id": "json2ld:access"
},
"publication_date": {
"@id": "json2ld:publication_date"
},
"last_update": {
"@id": "json2ld:last_update"
},
"Ids": {
"@id": "json2ld:Ids"
},
"Description": {
"@id": "json2ld:Description"
},
"Owner": {
"@id": "json2ld:Owner"
},
"Models": {
"@id": "json2ld:Models"
},
"Attributes": {
"@id": "json2ld:Attributes"
},
"Id": {
"@id": "json2ld:Id"
},
"is_primary": {
"@id": "json2ld:is_primary"
},
"namespace": {
"@id": "json2ld:namespace"
},
"content": {
"@id": "json2ld:content"
},
"SampleName": {
"@id": "json2ld:SampleName"
},
"Title": {
"@id": "json2ld:Title"
},
"Organism": {
"@id": "json2ld:Organism"
},
"taxonomy_id": {
"@id": "json2ld:taxonomy_id"
},
"OrganismName": {
"@id": "json2ld:OrganismName"
},
"Name": {
"@id": "json2ld:Name"
},
"Model": {
"@id": "json2ld:Model"
},
"Attribute": {
"@id": "json2ld:Attribute"
},
"attribute_name": {
"@id": "json2ld:attribute_name"
},
"search": {
"@id": "json2ld:search"
},
"distribution": {
"@id": "schema:distribution"
},
"encodingFormat": {
"@id": "schema:encodingFormat"
},
"contentUrl": {
"@id": "schema:contentUrl"
},
"downloadUrl": {
"@id": "schema:downloadUrl"
},
"ftpUrl": {
"@id": "json2ld:ftpUrl"
},
"status": {
"@id": "schema:status"
},
"visibility": {
"@id": "json2ld:visibility"
},
"dateCreated": {
"@id": "schema:dateCreated"
},
"dateModified": {
"@id": "schema:dateModified"
},
"datePublished": {
"@id": "schema:datePublished"
}
}
]
}