-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path05_fetch_files.py
109 lines (98 loc) · 3.05 KB
/
05_fetch_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import argparse
from gdcutil import GDCClient
BASE_URL = 'https://api.gdc.cancer.gov'
ENDPOINT = '/files'
fields = [
"file_id",
"submitter_id",
"file_name",
"file_size",
"md5sum",
"type",
"data_category",
"data_type",
"data_format",
"experimental_strategy",
"platform",
"access",
# "tags",
# "revision",
"created_datetime",
"updated_datetime",
"state",
"associated_entities.entity_submitter_id",
"associated_entities.entity_type",
"associated_entities.case_id",
"associated_entities.entity_id",
"cases.case_id",
"cases.submitter_id",
"analysis.analysis_id",
"analysis.analysis_type",
"analysis.submitter_id",
"analysis.input_files.file_id",
"analysis.input_files.file_name",
"analysis.workflow_type",
"analysis.workflow_version",
"analysis.workflow_link",
"analysis.state",
"index_files.file_id",
"index_files.file_name",
# "downstream_analyses.analysis_id",
# "downstream_analyses.analysis_type",
# "downstream_analyses.submitter_id",
# "downstream_analyses.output_files.file_id",
# "downstream_analyses.output_files.file_name",
# "downstream_analyses.workflow_type",
# "downstream_analyses.workflow_version",
# "downstream_analyses.workflow_link",
# "downstream_analyses.state",
# "center.center_id", # This field is not available ???
# "center.center_type",
# "center.code",
# "center.name",
# "center.namespace",
# "center.short_name",
# "metadata_files.file_id", # This field is not available ???
# "metadata_files.file_name",
]
filters = {
"op": "and",
"content": [
{
"op": "=",
"content": {
"field": "cases.project.program.name",
"value": ["TCGA"]
}
},
{
"op": "in",
"content": {
"field":"files.data_type",
"value":[
"Annotated Somatic Mutation",
"Raw Simple Somatic Mutation",
"Aggregated Somatic Mutation",
"Copy Number Segment",
"Allele-specific Copy Number Segment",
"Masked Copy Number Segment",
"Masked Somatic Mutation",
"Structural Rearrangement",
"Simple Germline Variation",
"Methylation Beta Value",
"Gene Expression Quantification",
"miRNA Expression Quantification",
"Isoform Expression Quantification",
"Splice Junction Quantification",
"Transcript Fusion"
]
}
},
]
}
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Fetch annotation metadata from GDC API')
parser.add_argument('-o', '--output', type=str, required=True, help='Output file path')
args = parser.parse_args()
client = GDCClient(ENDPOINT, fields, filters)
client.to_json(args.output)