Skip to content

DDBJ Annotated Sequence Validator

Takatomo Fujisawa edited this page Apr 11, 2016 · 19 revisions

DDBJ Annotated Sequence Validator開発

Validator 仕様

入力JSON形式仕様

JSON data model for DDBJ submission format

{
  "COMMON": {
    "COMMON#feature::DIVISION": {
      "location": null,
      "qualifiers": [
        {
          "key": "division",
          "value": "HTG"
        }
      ],
      "feature": "DIVISION"
    },
    "COMMON#feature::SUBMITTER": {
      "location": null,
      "qualifiers": [
        {
          "key": "ab_name",
          "value": "Fujisawa,T."
        },
        {
          "key": "contact",
          "value": "Takatomo Fujisawa"
        },
        {
          "key": "email",
          "value": "[email protected]"
        },
        {
          "key": "url",
          "value": "http://www.bio.nite.go.jp/"
        },
        {
          "key": "phone",
          "value": "81-438-52-3951"
        },
        {
          "key": "fax",
          "value": "81-438-52-3918"
        },
        {
          "key": "institute",
          "value": "National Institute of Technology and Evaluation, NITE"
        },
        {
          "key": "department",
          "value": "Bioresource Information Center, Department of Biotechnology"
        },
        {
          "key": "country",
          "value": "Japan"
        },
        {
          "key": "state",
          "value": "Tokyo"
        },
        {
          "key": "city",
          "value": "Shibuya"
        },
        {
          "key": "street",
          "value": "2-49-10 Nishihara"
        },
        {
          "key": "zip",
          "value": "151-0066"
        }
      ],
      "feature": "SUBMITTER"
    },
.
.
.
  "Chromosome": {
    "Chromosome#feature:1..6788435:source": {
      "location": "1..6788435",
      "qualifiers": [
        {
          "key": "organism",
          "value": "Arthrospira platensis NIES-39"
        },
        {
          "key": "mol_type",
          "value": "genomic DNA"
        },
        {
          "key": "strain",
          "value": "NIES-39"
        },
        {
          "key": "ff_definition",
          "value": "Arthrospira platensis NIES-39 DNA, nearly complete genome"
        }
      ],
      "feature": "source"
    },
    "Chromosome#feature:1..6788435:TOPOLOGY": {
      "location": "1..6788435",
      "qualifiers": [
        {
          "key": "circular",
          "value": null
        }
      ],
      "feature": "TOPOLOGY"
    },
    "Chromosome#feature:870293..877493:gap": {
      "location": "870293..877493",
      "qualifiers": [
        {
          "key": "estimated_length",
          "value": "known"
        }
      ],
      "feature": "gap"
    },
    "Chromosome#feature:1018568..1021302:gap": {
      "location": "1018568..1021302",
      "qualifiers": [
        {
          "key": "estimated_length",
          "value": "known"
        }
      ],
      "feature": "gap"
    },
    "Chromosome#feature:1582613..1585836:gap": {
      "location": "1582613..1585836",
      "qualifiers": [
        {
          "key": "estimated_length",
          "value": "known"
        }
      ],
      "feature": "gap"
    },
.
.
.
    "Chromosome#feature:152..412:CDS": {
      "location": "152..412",
      "qualifiers": [
        {
          "key": "locus_tag",
          "value": "NIES39_A00010"
        },
        {
          "key": "codon_start",
          "value": "1"
        },
        {
          "key": "transl_table",
          "value": "11"
        },
        {
          "key": "product",
          "value": "hypothetical protein"
        }
      ],
      "feature": "CDS"
    },
    "Chromosome#feature:complement(377..724):CDS": {
      "location": "complement(377..724)",
      "qualifiers": [
        {
          "key": "locus_tag",
          "value": "NIES39_A00020"
        },
        {
          "key": "codon_start",
          "value": "1"
        },
        {
          "key": "transl_table",
          "value": "11"
        },
        {
          "key": "product",
          "value": "hypothetical protein"
        }
      ],
      "feature": "CDS"
    },
.
.
.
}

実装

  • 変換スクリプトを作成し以下の操作でJSONを出力
[火  2 23 17:58] tf@~/github/ddbj_validator/misc/tf
%ruby submission_tsv2json ~/Dropbox/apl/DDBJ/NIES39_v17.ddbj > test.json
[月  4 11 14:51] tf@~/github/ddbj_validator/misc/tf
%ruby submission_tsv2json ../../kyulee/sample01_WGS_PRJDB4174.ann 
  • 設計した入力JSONを利用して、バリデーションスクリプトをperlで実装 ddbj_annotated_sequence_validator.pl
[火  2 23 18:00] tf@~/github/ddbj_validator/misc/tf
%perl ddbj_annotated_sequence_validator.pl 

ddbj_annotated_sequence_validator.pl

#!/usr/bin/env perl
#
use strict;
use warnings;
use JSON::XS;
use Data::Dumper;


my $input_file = shift || 'test.json';
my $validator  = __FILE__;

open my $fh, '<', $input_file
    or die "failed to open: $!";
my $input_json = ''; 
$input_json .= $_ while <$fh>;

my $struct = decode_json($input_json);
#print Dumper $struct;

while (my ($entry, $features) = each(%$struct)){
    #print Dumper [$entry, $features];
    while(my ($feature_id, $feature) = each(%$features)){
        #print Dumper $feature['qualifiers'];
        foreach my $qualifier (@{$feature->{'qualifiers'}}){
            #print Dumper $qualifier;
            if ($qualifier->{'key'} eq 'product'){
                exec_togoannotator($qualifier->{'value'});
            }   
        }   
    }   
}


sub exec_togoannotator {
    my $product = shift @_; 
    #TODO:  exec togoannotator
    my $suggest_product =  $product . '_curation';
    print <<EOF;
{
\"id\": \"error_code_id\",
\"message\": \"\",
\"message_ja\": \"\",
\"reference\": \"htttp:hoge.org\",
\"level\": \"warning\",
\"method\": $validator,
annotation:
  [ 
    { \"key\": \"biosample package\",
     \"source\": \"$input_file\", 
     \"location\": 
     \"value\": [\"$product\", \"$suggest_product\"]
     }
  ]
}
EOF
}

動作確認2016.04.11

DDBJ登録形式ファイルからJSONに変換

ruby misc/tf/submission_tsv2json kyulee/sample01_WGS_PRJDB4174.ann > /tmp/sample01_WGS_PRJDB4174.json

JSONを入力してバリデーターを実行

perl src/annotated_sequence_validator/ddbj_annotated_sequence_validator.pl /tmp/sample01_WGS_PRJDB4174.json

課題

  • DDBJ (Annotated Seuqnence) 登録形式から変換されたJSON形式仕様を確定
  • validatorをTogoAnnotator実装に合わせてperlで実装
  • validatorにTogoAnnotator組み込み【Todo】
  • bitbucketレポジトリを配置し、パスを通してTogoAnnotatorモジュールを呼び出す
  • validation対象(location)の記法
  • 候補、JSONPath - XPath for JSON http://goessner.net/articles/JsonPath/
  • 李さん査定バリデーションルールの追加【Todo】
  • product qualifierのvalueにホワイトリスト登録以外の単語が使用されていた場合のwarning
  • 小菅さん作成の査定用Table形式の入力拡張【Todo】

H28年度以降でも可

  • ルールJSONの取得
  • ルールとメソッドorサブルーチンの対応づけるメタプログラミングな仕組み

H28年度以降

submission/validation api

Clone this wiki locally