|
2 | 2 | "cells": [ |
3 | 3 | { |
4 | 4 | "cell_type": "code", |
5 | | - "execution_count": 2, |
| 5 | + "execution_count": null, |
6 | 6 | "metadata": {}, |
7 | 7 | "outputs": [], |
8 | 8 | "source": [ |
|
24 | 24 | "# Configuration data\n", |
25 | 25 | "# ---------------\n", |
26 | 26 | "\n", |
27 | | - "graph_name = 'https://github.com/HeardLibrary/linked-data/blob/29e5d02aaf00cb890792d7dee73707603a506b3e/json_schema/bluffton_presidents.csv'\n", |
| 27 | + "graph_name = 'https://raw.githubusercontent.com/HeardLibrary/linked-data/54bd94c609e9c5af6c558cd926939ded67cba2ae/json_schema/bluffton_presidents.csv'\n", |
28 | 28 | "accept_media_type = 'text/turtle'\n", |
29 | 29 | "sparql_endpoint = \"https://sparql.vanderbilt.edu/sparql\"\n", |
30 | 30 | "request_header_dictionary = {\n", |
|
59 | 59 | " exit()\n", |
60 | 60 | " return(cred)\n", |
61 | 61 | "\n", |
62 | | - "def retrieve_direct_statements(sparql_endpoint):\n", |
| 62 | + "def retrieve_direct_statements(sparql_endpoint, graph_name):\n", |
63 | 63 | " query = '''\n", |
64 | 64 | "construct {?item ?directProp ?value.}\n", |
65 | 65 | "from <''' + graph_name + '''>\n", |
|
76 | 76 | " r = requests.get(sparql_endpoint, params={'query' : query}, headers=request_header_dictionary)\n", |
77 | 77 | " return r.text\n", |
78 | 78 | "\n", |
| 79 | + "def retrieve_time_statements(sparql_endpoint, graph_name, subject_type):\n", |
| 80 | + " # Happily, each subject type: \"statement\", \"reference\", and \"qualifier\" contains 9 characters.\n", |
| 81 | + " # so the string extraction is the same for all.\n", |
| 82 | + " query = '''\n", |
| 83 | + "prefix wikibase: <http://wikiba.se/ontology#>\n", |
| 84 | + "construct {?subject ?directProp ?timeValue.}\n", |
| 85 | + "from <''' + graph_name + '''>\n", |
| 86 | + "where {\n", |
| 87 | + " ?subject ?valueProperty ?value.\n", |
| 88 | + " ?value wikibase:timeValue ?timeValue.\n", |
| 89 | + " filter(substr(str(?valueProperty),1,45)=\"http://www.wikidata.org/prop/''' + subject_type + '''/value/\")\n", |
| 90 | + " bind(substr(str(?valueProperty),46) as ?id)\n", |
| 91 | + " bind(iri(concat(\"http://www.wikidata.org/prop/''' + subject_type + '''/\", ?id)) as ?directProp)\n", |
| 92 | + " }\n", |
| 93 | + "'''\n", |
| 94 | + " results = []\n", |
| 95 | + " r = requests.get(sparql_endpoint, params={'query' : query}, headers=request_header_dictionary)\n", |
| 96 | + " return r.text\n", |
| 97 | + "\n", |
79 | 98 | "def perform_sparql_update(sparql_endpoint, pwd, update_command):\n", |
80 | 99 | " # SPARQL Update requires HTTP POST\n", |
81 | 100 | " hdr = {'Content-Type' : 'application/sparql-update'}\n", |
82 | 101 | " r = requests.post(sparql_endpoint, auth=('admin', pwd), headers=hdr, data = update_command)\n", |
83 | 102 | " print(str(r.status_code) + ' ' + r.url)\n", |
84 | | - " print(r.text)\n" |
| 103 | + " print(r.text)\n", |
| 104 | + "\n", |
| 105 | + "def prep_and_update(sparql_endpoint, pwd, graph_name, graph_text):\n", |
| 106 | + " # remove prefixes from response Turtle, which are not necessary since IRIs are unabbreviated\n", |
| 107 | + " graph_text_list = graph_text.split('\\n')\n", |
| 108 | + " # print(graph_text_list)\n", |
| 109 | + " graph_text = ''\n", |
| 110 | + " for line in graph_text_list:\n", |
| 111 | + " try:\n", |
| 112 | + " if line[0] != '@':\n", |
| 113 | + " graph_text += line + '\\n'\n", |
| 114 | + " except:\n", |
| 115 | + " pass\n", |
| 116 | + " #print()\n", |
| 117 | + " #print(graph_text)\n", |
| 118 | + "\n", |
| 119 | + " if len(graph_text) != 0: # don't perform an update if there aren't any triples to add\n", |
| 120 | + " # Send SPARQL 1.1 UPDATE to endpoint to add the constructed triples into the graph\n", |
| 121 | + " update_command = '''INSERT DATA\n", |
| 122 | + " { GRAPH <''' + graph_name + '''> { \n", |
| 123 | + " ''' + graph_text + '''\n", |
| 124 | + " }}'''\n", |
| 125 | + "\n", |
| 126 | + " #print(update_command)\n", |
| 127 | + " perform_sparql_update(sparql_endpoint, pwd, update_command)\n", |
| 128 | + " else:\n", |
| 129 | + " print('no triples to write')" |
85 | 130 | ] |
86 | 131 | }, |
87 | 132 | { |
88 | 133 | "cell_type": "code", |
89 | | - "execution_count": 3, |
| 134 | + "execution_count": null, |
90 | 135 | "metadata": {}, |
91 | | - "outputs": [ |
92 | | - { |
93 | | - "name": "stdout", |
94 | | - "output_type": "stream", |
95 | | - "text": [ |
96 | | - "constructed triples retrieved\n" |
97 | | - ] |
98 | | - } |
99 | | - ], |
| 136 | + "outputs": [], |
100 | 137 | "source": [ |
101 | 138 | "# ---------------\n", |
102 | 139 | "# Construct the direct property statements entailed by the Wikibase model and retrieve from endpoint \n", |
103 | 140 | "# ---------------\n", |
104 | 141 | "pwd = load_credential(filename, directory)\n", |
105 | 142 | "\n", |
106 | | - "graph_text = retrieve_direct_statements(sparql_endpoint)\n", |
| 143 | + "graph_text = retrieve_direct_statements(sparql_endpoint, graph_name)\n", |
107 | 144 | "#print(graph_text)\n", |
108 | | - "print('constructed triples retrieved')" |
109 | | - ] |
110 | | - }, |
111 | | - { |
112 | | - "cell_type": "code", |
113 | | - "execution_count": 4, |
114 | | - "metadata": {}, |
115 | | - "outputs": [], |
116 | | - "source": [ |
117 | | - "# remove prefixes from response Turtle, which are not necessary since IRIs are unabbreviated\n", |
118 | | - "graph_text_list = graph_text.split('\\n')\n", |
119 | | - "# print(graph_text_list)\n", |
120 | | - "graph_text = ''\n", |
121 | | - "for line in graph_text_list:\n", |
122 | | - " try:\n", |
123 | | - " if line[0] != '@':\n", |
124 | | - " graph_text += line + '\\n'\n", |
125 | | - " except:\n", |
126 | | - " pass\n", |
127 | | - "#print()\n", |
128 | | - "#print(graph_text)" |
129 | | - ] |
130 | | - }, |
131 | | - { |
132 | | - "cell_type": "code", |
133 | | - "execution_count": 5, |
134 | | - "metadata": {}, |
135 | | - "outputs": [ |
136 | | - { |
137 | | - "name": "stdout", |
138 | | - "output_type": "stream", |
139 | | - "text": [ |
140 | | - "200 https://sparql.vanderbilt.edu/sparql\n", |
141 | | - "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\"><html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=UTF-8\"><title>blazegraph™ by SYSTAP</title\n", |
142 | | - "></head\n", |
143 | | - "><body<p>totalElapsed=0ms, elapsed=0ms, connFlush=0ms, batchResolve=0, whereClause=0ms, deleteClause=0ms, insertClause=0ms</p\n", |
144 | | - "><hr><p>COMMIT: totalElapsed=251ms, commitTime=1598157003429, mutationCount=40</p\n", |
145 | | - "></html\n", |
146 | | - ">\n", |
147 | | - "\n", |
148 | | - "done\n" |
149 | | - ] |
150 | | - } |
151 | | - ], |
152 | | - "source": [ |
153 | | - "# Send SPARQL 1.1 UPDATE to endpoint to add the constructed triples into the graph\n", |
| 145 | + "print('constructed direct triples retrieved')\n", |
154 | 146 | "\n", |
155 | | - "update_command = '''INSERT DATA\n", |
156 | | - "{ GRAPH <''' + graph_name + '''> { \n", |
157 | | - "''' + graph_text + '''\n", |
158 | | - "}}'''\n", |
| 147 | + "prep_and_update(sparql_endpoint, pwd, graph_name, graph_text)\n", |
| 148 | + "print()\n", |
159 | 149 | "\n", |
160 | | - "#print(update_command)\n", |
| 150 | + "for subject_type in ['statement', 'reference', 'qualifier']:\n", |
| 151 | + " graph_text = retrieve_time_statements(sparql_endpoint, graph_name, subject_type)\n", |
| 152 | + " #print(graph_text)\n", |
| 153 | + " print('constructed direct ' + subject_type + ' time triples retrieved')\n", |
161 | 154 | "\n", |
162 | | - "perform_sparql_update(sparql_endpoint, pwd, update_command)\n", |
| 155 | + " prep_and_update(sparql_endpoint, pwd, graph_name, graph_text)\n", |
| 156 | + " print()\n", |
163 | 157 | "\n", |
164 | 158 | "print()\n", |
165 | 159 | "print('done')" |
|
0 commit comments