note:2024‐06‐13 - ddbj/rdf GitHub Wiki

ゴール

contextが定義されたBioProject, BioSampleのJSON-LDをNII-RCOSに提供する

方針

  • DDBJ SearchのJSON(-LD)を見直して、正しいJSON-LDを準備する。合わせて、context.jsonld を配置する。
  • 語彙がなければBioSchemasに提案していく。(2024-05-08)

TODO

やったこと

JSON2LD mapperを使ってJSONをJSON-LD化

メモ:現在のJSONのトップ階層はschema.orgでほぼOK、properties以下などXMLタグがそのままなので、contextを定義するときに一通りURIにマッピングする必要がある(現在json2ld名前空間のもの)

schema.org/bioschemasマッピングの検討

メモ:

  • Taxon (v1.0-RELEASE) , TaxonName (v1.0-RELEASE) は2023年5月1.0リリースだが、BioSample (v0.1-RELEASE-2019_06_19) は古いまま。

  • Bioschemas BioSampleのpropertyはgenderなどメディカルよりのプロパティが定義。

  • BioProjectは Schema.orgで、BioSampleはvalueReferenceのオントロジーマッピング以外はbioschemasの例のようなモデルで表現できそう(藤澤)

  • OrganismはINSDCの語彙を使うべき、IDが重要(有田)

  • Taxonomy.owlはDDBJで10年 ホストしている。taxonのIDはindentifiers.orgを使っている。

  • 4年前の例

Togothon関連

今後の予定

6/24-28のBH24.6でDDBJ SearchのJSONから検討したJSON-LDに変換する


##JSON2LD Mapper生成したcontext.jsonld

BioProject

{
	"@context": [
		"https://schema.org/docs/jsonldcontext.json",
		{
			"@vocab": "http://json2ld.mapper.tokyo/ns/",
			"json2ld": "http://json2ld.mapper.tokyo/ns/",
			"identifier": {
				"@id": "schema:identifier"
			},
			"title": {
				"@id": "schema:title"
			},
			"description": {
				"@id": "schema:description"
			},
			"name": {
				"@id": "schema:name"
			},
			"type": {
				"@id": "json2ld:type"
			},
			"url": {
				"@id": "schema:url"
			},
			"sameAs": {
				"@id": "schema:sameAs"
			},
			"isPartOf": {
				"@id": "schema:isPartOf"
			},
			"organism": {
				"@id": "json2ld:organism"
			},
			"dbXrefs": {
				"@id": "json2ld:dbXrefs"
			},
			"dbXrefsStatistics": {
				"@id": "json2ld:dbXrefsStatistics"
			},
			"count": {
				"@id": "json2ld:count"
			},
			"properties": {
				"@id": "json2ld:properties"
			},
			"Project": {
				"@id": "schema:Project"
			},
			"Submission": {
				"@id": "json2ld:Submission"
			},
			"ProjectID": {
				"@id": "json2ld:ProjectID"
			},
			"ProjectDescr": {
				"@id": "json2ld:ProjectDescr"
			},
			"ProjectType": {
				"@id": "json2ld:ProjectType"
			},
			"ArchiveID": {
				"@id": "json2ld:ArchiveID"
			},
			"accession": {
				"@id": "json2ld:accession"
			},
			"archive": {
				"@id": "json2ld:archive"
			},
			"id": {
				"@id": "json2ld:id"
			},
			"Name": {
				"@id": "json2ld:Name"
			},
			"Title": {
				"@id": "json2ld:Title"
			},
			"Description": {
				"@id": "json2ld:Description"
			},
			"ExternalLink": {
				"@id": "json2ld:ExternalLink"
			},
			"Publication": {
				"@id": "json2ld:Publication"
			},
			"ProjectReleaseDate": {
				"@id": "json2ld:ProjectReleaseDate"
			},
			"LocusTagPrefix": {
				"@id": "json2ld:LocusTagPrefix"
			},
			"label": {
				"@id": "json2ld:label"
			},
			"category": {
				"@id": "schema:category"
			},
			"URL": {
				"@id": "schema:URL"
			},
			"status": {
				"@id": "schema:status"
			},
			"Reference": {
				"@id": "json2ld:Reference"
			},
			"StructuredCitation": {
				"@id": "json2ld:StructuredCitation"
			},
			"Journal": {
				"@id": "json2ld:Journal"
			},
			"AuthorSet": {
				"@id": "json2ld:AuthorSet"
			},
			"JournalTitle": {
				"@id": "json2ld:JournalTitle"
			},
			"Year": {
				"@id": "json2ld:Year"
			},
			"Volume": {
				"@id": "json2ld:Volume"
			},
			"Issue": {
				"@id": "json2ld:Issue"
			},
			"PagesFrom": {
				"@id": "json2ld:PagesFrom"
			},
			"PagesTo": {
				"@id": "json2ld:PagesTo"
			},
			"Author": {
				"@id": "json2ld:Author"
			},
			"First": {
				"@id": "json2ld:First"
			},
			"Last": {
				"@id": "json2ld:Last"
			},
			"Consortium": {
				"@id": "schema:Consortium"
			},
			"DbType": {
				"@id": "json2ld:DbType"
			},
			"biosample_id": {
				"@id": "json2ld:biosample_id"
			},
			"assembly_id": {
				"@id": "json2ld:assembly_id"
			},
			"content": {
				"@id": "json2ld:content"
			},
			"ProjectTypeSubmission": {
				"@id": "json2ld:ProjectTypeSubmission"
			},
			"Target": {
				"@id": "json2ld:Target"
			},
			"Method": {
				"@id": "json2ld:Method"
			},
			"Objectives": {
				"@id": "json2ld:Objectives"
			},
			"ProjectDataTypeSet": {
				"@id": "json2ld:ProjectDataTypeSet"
			},
			"sample_scope": {
				"@id": "json2ld:sample_scope"
			},
			"material": {
				"@id": "schema:material"
			},
			"capture": {
				"@id": "json2ld:capture"
			},
			"Organism": {
				"@id": "json2ld:Organism"
			},
			"taxID": {
				"@id": "schema:taxID"
			},
			"species": {
				"@id": "json2ld:species"
			},
			"OrganismName": {
				"@id": "json2ld:OrganismName"
			},
			"Strain": {
				"@id": "json2ld:Strain"
			},
			"Supergroup": {
				"@id": "json2ld:Supergroup"
			},
			"BiologicalProperties": {
				"@id": "json2ld:BiologicalProperties"
			},
			"RepliconSet": {
				"@id": "json2ld:RepliconSet"
			},
			"GenomeSize": {
				"@id": "json2ld:GenomeSize"
			},
			"Morphology": {
				"@id": "json2ld:Morphology"
			},
			"Environment": {
				"@id": "json2ld:Environment"
			},
			"Gram": {
				"@id": "json2ld:Gram"
			},
			"Motility": {
				"@id": "json2ld:Motility"
			},
			"OxygenReq": {
				"@id": "json2ld:OxygenReq"
			},
			"TemperatureRange": {
				"@id": "json2ld:TemperatureRange"
			},
			"Habitat": {
				"@id": "json2ld:Habitat"
			},
			"Replicon": {
				"@id": "json2ld:Replicon"
			},
			"Count": {
				"@id": "json2ld:Count"
			},
			"order": {
				"@id": "json2ld:order"
			},
			"Type": {
				"@id": "json2ld:Type"
			},
			"location": {
				"@id": "schema:location"
			},
			"Size": {
				"@id": "json2ld:Size"
			},
			"units": {
				"@id": "json2ld:units"
			},
			"repliconType": {
				"@id": "json2ld:repliconType"
			},
			"method_type": {
				"@id": "json2ld:method_type"
			},
			"Data": {
				"@id": "json2ld:Data"
			},
			"data_type": {
				"@id": "json2ld:data_type"
			},
			"DataType": {
				"@id": "schema:DataType"
			},
			"submitted": {
				"@id": "json2ld:submitted"
			},
			"Organization": {
				"@id": "schema:Organization"
			},
			"Access": {
				"@id": "json2ld:Access"
			},
			"role": {
				"@id": "json2ld:role"
			},
			"abbr": {
				"@id": "json2ld:abbr"
			},
			"search": {
				"@id": "json2ld:search"
			},
			"distribution": {
				"@id": "schema:distribution"
			},
			"encodingFormat": {
				"@id": "schema:encodingFormat"
			},
			"contentUrl": {
				"@id": "schema:contentUrl"
			},
			"downloadUrl": {
				"@id": "schema:downloadUrl"
			},
			"ftpUrl": {
				"@id": "json2ld:ftpUrl"
			},
			"visibility": {
				"@id": "json2ld:visibility"
			},
			"dateCreated": {
				"@id": "schema:dateCreated"
			},
			"dateModified": {
				"@id": "schema:dateModified"
			},
			"datePublished": {
				"@id": "schema:datePublished"
			}
		}
	]
}

BioSample

{
	"@context": [
		"https://schema.org/docs/jsonldcontext.json",
		{
			"@vocab": "http://json2ld.mapper.tokyo/ns/",
			"json2ld": "http://json2ld.mapper.tokyo/ns/",
			"identifier": {
				"@id": "schema:identifier"
			},
			"title": {
				"@id": "schema:title"
			},
			"description": {
				"@id": "schema:description"
			},
			"name": {
				"@id": "schema:name"
			},
			"type": {
				"@id": "json2ld:type"
			},
			"url": {
				"@id": "schema:url"
			},
			"sameAs": {
				"@id": "schema:sameAs"
			},
			"isPartOf": {
				"@id": "schema:isPartOf"
			},
			"organism": {
				"@id": "json2ld:organism"
			},
			"dbXrefs": {
				"@id": "json2ld:dbXrefs"
			},
			"dbXrefsStatistics": {
				"@id": "json2ld:dbXrefsStatistics"
			},
			"properties": {
				"@id": "json2ld:properties"
			},
			"access": {
				"@id": "json2ld:access"
			},
			"publication_date": {
				"@id": "json2ld:publication_date"
			},
			"last_update": {
				"@id": "json2ld:last_update"
			},
			"Ids": {
				"@id": "json2ld:Ids"
			},
			"Description": {
				"@id": "json2ld:Description"
			},
			"Owner": {
				"@id": "json2ld:Owner"
			},
			"Models": {
				"@id": "json2ld:Models"
			},
			"Attributes": {
				"@id": "json2ld:Attributes"
			},
			"Id": {
				"@id": "json2ld:Id"
			},
			"is_primary": {
				"@id": "json2ld:is_primary"
			},
			"namespace": {
				"@id": "json2ld:namespace"
			},
			"content": {
				"@id": "json2ld:content"
			},
			"SampleName": {
				"@id": "json2ld:SampleName"
			},
			"Title": {
				"@id": "json2ld:Title"
			},
			"Organism": {
				"@id": "json2ld:Organism"
			},
			"taxonomy_id": {
				"@id": "json2ld:taxonomy_id"
			},
			"OrganismName": {
				"@id": "json2ld:OrganismName"
			},
			"Name": {
				"@id": "json2ld:Name"
			},
			"Model": {
				"@id": "json2ld:Model"
			},
			"Attribute": {
				"@id": "json2ld:Attribute"
			},
			"attribute_name": {
				"@id": "json2ld:attribute_name"
			},
			"search": {
				"@id": "json2ld:search"
			},
			"distribution": {
				"@id": "schema:distribution"
			},
			"encodingFormat": {
				"@id": "schema:encodingFormat"
			},
			"contentUrl": {
				"@id": "schema:contentUrl"
			},
			"downloadUrl": {
				"@id": "schema:downloadUrl"
			},
			"ftpUrl": {
				"@id": "json2ld:ftpUrl"
			},
			"status": {
				"@id": "schema:status"
			},
			"visibility": {
				"@id": "json2ld:visibility"
			},
			"dateCreated": {
				"@id": "schema:dateCreated"
			},
			"dateModified": {
				"@id": "schema:dateModified"
			},
			"datePublished": {
				"@id": "schema:datePublished"
			}
		}
	]
}