001
002 package org.deepamehta.plugins.wikidata;
003
004 import de.deepamehta.core.Association;
005 import de.deepamehta.core.AssociationType;
006 import de.deepamehta.core.RelatedAssociation;
007 import de.deepamehta.core.Topic;
008 import de.deepamehta.core.model.*;
009 import de.deepamehta.core.osgi.PluginActivator;
010 import de.deepamehta.core.service.Inject;
011 import de.deepamehta.core.service.ResultList;
012 import de.deepamehta.core.service.Transactional;
013 import de.deepamehta.core.storage.spi.DeepaMehtaTransaction;
014 import de.deepamehta.plugins.accesscontrol.service.AccessControlService;
015
016 import java.io.BufferedReader;
017 import java.io.ByteArrayInputStream;
018 import java.io.IOException;
019 import java.io.InputStreamReader;
020 import java.net.HttpURLConnection;
021 import java.net.MalformedURLException;
022 import java.net.URL;
023 import java.net.URLEncoder;
024 import java.util.Iterator;
025 import java.util.List;
026 import java.util.logging.Level;
027 import java.util.logging.Logger;
028
029 import javax.ws.rs.*;
030 import javax.ws.rs.core.MediaType;
031 import javax.ws.rs.core.Response.Status;
032 import javax.xml.parsers.DocumentBuilder;
033 import javax.xml.parsers.DocumentBuilderFactory;
034 import javax.xml.parsers.ParserConfigurationException;
035
036 import org.codehaus.jettison.json.JSONArray;
037 import org.codehaus.jettison.json.JSONException;
038 import org.codehaus.jettison.json.JSONObject;
039 import org.deepamehta.plugins.wikidata.service.WikidataSearchService;
040 import org.w3c.dom.Document;
041 import org.w3c.dom.Node;
042 import org.w3c.dom.NodeList;
043 import org.xml.sax.InputSource;
044 import org.xml.sax.SAXException;
045
046
047
048 /**
049 * A very basic plugin to search and explore wikidata.
050 * Allows to turn a \"Wikidata Search Result Entity\" (of type=property) into DeepaMehta 4 AssociationTypes.
051 *
052 * @author Malte Reißig (<malte@mikromedia.de>)
053 * @website https://github.com/mukil/dm4-wikidata
054 * @version 0.0.5-SNAPSHOT
055 */
056
057 @Path("/wikidata")
058 @Consumes("application/json")
059 @Produces("application/json")
060 public class WikidataSearchPlugin extends PluginActivator implements WikidataSearchService {
061
062 private Logger log = Logger.getLogger(getClass().getName());
063
064 private final String DEEPAMEHTA_VERSION = "DeepaMehta 4.4";
065 private final String WIKIDATA_TYPE_SEARCH_VERSION = "0.0.5-SNAPSHOT";
066 private final String CHARSET = "UTF-8";
067
068 // --- DeepaMehta 4 URIs
069
070 private final String DM_WEBBROWSER_URL = "dm4.webbrowser.url";
071
072 // --- Wikidata DeepaMehta URIs
073
074 private final String WS_WIKIDATA_URI = "org.deepamehta.workspaces.wikidata";
075
076 private final String WD_SEARCH_BUCKET_URI = "org.deepamehta.wikidata.search_bucket";
077 private final String WD_SEARCH_QUERY_URI = "org.deepamehta.wikidata.search_query";
078
079 private final String WD_LANGUAGE_URI = "org.deepamehta.wikidata.language";
080 // private final String WD_LANGUAGE_NAME_URI = "org.deepamehta.wikidata.language_name";
081 // private final String WD_LANGUAGE_ISO_CODE_URI = "org.deepamehta.wikidata.language_code_iso";
082 private final String WD_LANGUAGE_DATA_URI_PREFIX = "org.deepamehta.wikidata.lang_";
083
084 private final String WD_SEARCH_ENTITY_URI = "org.deepamehta.wikidata.search_entity";
085 private final String WD_SEARCH_ENTITY_LABEL_URI = "org.deepamehta.wikidata.search_entity_label";
086 private final String WD_SEARCH_ENTITY_TYPE_URI = "org.deepamehta.wikidata.search_entity_type";
087 private final String WD_SEARCH_ENTITY_ORDINAL_NR = "org.deepamehta.wikidata.search_ordinal_nr";
088 private final String WD_SEARCH_ENTITY_DESCR_URI = "org.deepamehta.wikidata.search_entity_description";
089 private final String WD_SEARCH_ENTITY_ALIAS_URI = "org.deepamehta.wikidata.search_entity_alias";
090 private final String WD_SEARCH_ENTITIY_DATA_URI_PREFIX = "org.deepamehta.wikidata.entity_";
091
092 private final String WD_TEXT_TYPE_URI = "org.deepamehta.wikidata.text";
093
094 private final String WD_COMMONS_MEDIA_TYPE_URI = "org.deepamehta.wikidata.commons_media";
095 private final String WD_COMMONS_MEDIA_NAME_TYPE_URI = "org.deepamehta.wikidata.commons_media_name";
096 private final String WD_COMMONS_MEDIA_PATH_TYPE_URI = "org.deepamehta.wikidata.commons_media_path";
097 // private final String WD_COMMONS_MEDIA_TYPE_TYPE_URI = "org.deepamehta.wikidata.commons_media_type";
098 private final String WD_COMMONS_MEDIA_DESCR_TYPE_URI = "org.deepamehta.wikidata.commons_media_descr";
099 private final String WD_COMMONS_AUTHOR_HTML_URI = "org.deepamehta.wikidata.commons_author_html";
100 private final String WD_COMMONS_LICENSE_HTML_URI = "org.deepamehta.wikidata.commons_license_html";
101 // private final String WD_GLOBE_COORDINATE_TYPE_URI = "org.deepamehta.wikidata.globe_coordinate";
102
103 private final String WD_ENTITY_CLAIM_EDGE = "org.deepamehta.wikidata.claim_edge";
104
105 // --- Wikidata Service URIs
106
107 private final String WD_SEARCH_ENTITIES_ENDPOINT =
108 "http://www.wikidata.org/w/api.php?action=wbsearchentities&format=json&limit=50";
109 private final String WD_CHECK_ENTITY_CLAIMS_ENDPOINT =
110 "http://www.wikidata.org/w/api.php?action=wbgetclaims&format=json"; // &ungroupedlist=0
111 private final String WD_GET_ENTITY_ENDPOINT = "http://www.wikidata.org/w/api.php?action=wbgetentities"
112 + "&props=info%7Caliases%7Clabels%7Cdescriptions&format=json"; // sitelinks%2Furls%7C
113 private final String WD_SEARCH_ENTITY_TYPE_PROPERTY = "property";
114 private final String WD_SEARCH_ENTITY_TYPE_ITEM = "item";
115 private final String WD_ENTITY_BASE_URI = "org.wikidata.entity.";
116
117 private final String LANG_EN = "en";
118
119 private final String WIKIDATA_ENTITY_URL_PREFIX = "//www.wikidata.org/wiki/";
120 private final String WIKIDATA_PROPERTY_ENTITY_URL_PREFIX = "Property:";
121 // private final String WIKIMEDIA_COMMONS_MEDIA_FILE_URL_PREFIX = "//commons.wikimedia.org/wiki/File:";
122
123 @Inject
124 private AccessControlService acService = null;
125
126
127
128 // --
129 // --- Public REST API Endpoints
130 // --
131
132 @GET
133 @Path("/search/{entity}/{query}/{language_code}")
134 @Produces(MediaType.APPLICATION_JSON)
135 @Override
136 @Transactional
137 public Topic searchWikidataEntity(@PathParam("query") String query, @PathParam("language_code") String lang,
138 @PathParam("entity") String type) {
139 String json_result = "";
140 StringBuffer resultBody = new StringBuffer();
141 URL requestUri = null;
142 Topic search_bucket = null;
143 // sanity check (set en as default-language if nothing was provided by un-initialized language widget)
144 if (lang == null || lang.equals("undefined")) {
145 log.warning("Wikidata Language Search Option was not provided, now requesting data in EN");
146 lang = LANG_EN;
147 }
148 // start search operation
149 try {
150 // 1) fixme: Authorize request
151 requestUri = new URL(WD_SEARCH_ENTITIES_ENDPOINT + "&search="+ query +"&language="+ lang +"&type=" + type);
152 log.fine("Wikidata Search Entities Request: " + requestUri.toString());
153 // 2) initiate request
154 HttpURLConnection connection = (HttpURLConnection) requestUri.openConnection();
155 connection.setRequestMethod("GET");
156 connection.setRequestProperty("User-Agent", "DeepaMehta "+DEEPAMEHTA_VERSION+" - "
157 + "Wikidata Search " + WIKIDATA_TYPE_SEARCH_VERSION);
158 // 3) check the response
159 int httpStatusCode = connection.getResponseCode();
160 if (httpStatusCode != HttpURLConnection.HTTP_OK) {
161 throw new WebApplicationException(new Throwable("Error with HTTPConnection."),
162 Status.INTERNAL_SERVER_ERROR);
163 }
164 // 4) read in the response
165 BufferedReader rd = new BufferedReader(new InputStreamReader(connection.getInputStream(), CHARSET));
166 for (String input; (input = rd.readLine()) != null;) {
167 resultBody.append(input);
168 }
169 rd.close();
170 // 5) process response
171 if (resultBody.toString().isEmpty()) {
172 throw new WebApplicationException(new RuntimeException("Wikidata was silent."),
173 Status.NO_CONTENT);
174 } else {
175 // ..) Create Wikidata Search Bucket
176 ChildTopicsModel bucket_model = new ChildTopicsModel();
177 bucket_model.put(WD_SEARCH_QUERY_URI, query);
178 bucket_model.putRef(WD_LANGUAGE_URI, WD_LANGUAGE_DATA_URI_PREFIX + lang);
179 json_result = resultBody.toString();
180 log.fine("Wikidata Search Request Response: " + json_result);
181 processWikidataEntitySearch(json_result, bucket_model, type, lang);
182 search_bucket = dms.createTopic(new TopicModel(WD_SEARCH_BUCKET_URI, bucket_model));
183 // workaround: addRef does not (yet) fetchComposite, so fetchComposite=true
184 search_bucket = dms.getTopic(search_bucket.getId());
185 log.info("Wikidata Search Bucket for "+ query +" in ("+ lang +") was CREATED");
186 }
187 search_bucket.loadChildTopics(); // load all child topics
188 } catch (MalformedURLException e) {
189 log.warning("Wikidata Plugin: MalformedURLException ..." + e.getMessage());
190 throw new RuntimeException("Could not find wikidata endpoint.", e);
191 } catch (IOException ioe) {
192 throw new WebApplicationException(new Throwable(ioe), Status.BAD_REQUEST);
193 } catch (Exception e) {
194 throw new WebApplicationException(new Throwable(e), Status.INTERNAL_SERVER_ERROR);
195 } finally {
196 return search_bucket;
197 }
198 }
199
200 @GET
201 @Path("/{entityId}/{language_code}")
202 @Produces(MediaType.APPLICATION_JSON)
203 @Override
204 public Topic getOrCreateWikidataEntity(@PathParam("entityId") String entityId,
205 @PathParam("language_code") String language_code) {
206 String json_result = "";
207 StringBuffer resultBody = new StringBuffer();
208 URL requestUri = null;
209 Topic entity = null;
210 // sanity check (set en as default-language if nothing was provided by un-initialized language widget)
211 if (language_code == null || language_code.equals("undefined")) {
212 log.warning("Wikidata Language Search Option was not provided, now requesting data in EN");
213 language_code = LANG_EN;
214 }
215 try {
216 // 1) fixme: Authorize request
217 // &sites=dewiki&&languages=de
218 requestUri = new URL(WD_GET_ENTITY_ENDPOINT + "&ids="+ entityId + "&languages=" + language_code);
219 log.fine("Requesting Wikidata Entity Details " + requestUri.toString());
220 // 2) initiate request
221 HttpURLConnection connection = (HttpURLConnection) requestUri.openConnection();
222 connection.setRequestMethod("GET");
223 connection.setRequestProperty("User-Agent", "DeepaMehta "+DEEPAMEHTA_VERSION+" - "
224 + "Wikidata Search " + WIKIDATA_TYPE_SEARCH_VERSION);
225 // 3) check the response
226 int httpStatusCode = connection.getResponseCode();
227 if (httpStatusCode != HttpURLConnection.HTTP_OK) {
228 throw new WebApplicationException(new Throwable("Error with HTTPConnection."),
229 Status.INTERNAL_SERVER_ERROR);
230 }
231 // 4) read in the response
232 BufferedReader rd = new BufferedReader(new InputStreamReader(connection.getInputStream(), CHARSET));
233 for (String input; (input = rd.readLine()) != null;) {
234 resultBody.append(input);
235 }
236 rd.close();
237 // 5) process response
238 if (resultBody.toString().isEmpty()) {
239 throw new WebApplicationException(new RuntimeException("Wikidata was silent."),
240 Status.NO_CONTENT);
241 } else {
242 // 6) Create or Update Wikidata Search Entity
243 json_result = resultBody.toString();
244 log.fine("Wikidata Entity Request Response: " + json_result);
245 JSONObject response = new JSONObject(json_result);
246 JSONObject entities = response.getJSONObject("entities");
247 JSONObject response_entity = entities.getJSONObject(entityId);
248 // 0) Check if we need to CREATE or UPDATE our search result entity item
249 Topic existingEntity = dms.getTopic("uri",
250 new SimpleValue(WD_SEARCH_ENTITIY_DATA_URI_PREFIX + entityId));
251 if (existingEntity == null) {
252 entity = createWikidataSearchEntity(response_entity, language_code);
253 } else {
254 // Updates labels, descriptions, aliases, url and (query) language
255 entity = updateWikidataEntity(existingEntity, response_entity, language_code);
256 }
257 entity.loadChildTopics(); // load all child topics
258 }
259 } catch (MalformedURLException e) {
260 log.warning("Wikidata Plugin: MalformedURLException ..." + e.getMessage());
261 throw new RuntimeException("Could not find wikidata endpoint.", e);
262 } catch (IOException ioe) {
263 throw new WebApplicationException(new Throwable(ioe), Status.BAD_REQUEST);
264 } catch (JSONException je) {
265 throw new WebApplicationException(new Throwable(je), Status.INTERNAL_SERVER_ERROR);
266 } catch (Exception e) {
267 throw new WebApplicationException(new Throwable(e), Status.INTERNAL_SERVER_ERROR);
268 } finally {
269 return entity;
270 }
271 }
272
273 @GET
274 @Path("/check/claims/{id}/{language_code}")
275 @Produces(MediaType.APPLICATION_JSON)
276 @Override
277 @Transactional
278 public Topic loadClaimsAndRelatedWikidataItems(@PathParam("id") long topicId,
279 @PathParam("language_code") String language_option) {
280
281 String json_result = "";
282 StringBuffer resultBody = new StringBuffer();
283 URL requestUri = null;
284 Topic wikidataItem = dms.getTopic(topicId);
285 // 0) sanity check (set en as default-language if nothing was provided by un-initialized language widget)
286 if (language_option == null || language_option.equals("undefined")) {
287 log.warning("Wikidata Language Search Option was not provided, now requesting data in EN.");
288 language_option = LANG_EN;
289 }
290 String wikidataId = wikidataItem.getUri().replaceAll(WD_SEARCH_ENTITIY_DATA_URI_PREFIX, "");
291 try {
292 // 1) ### Authorize request
293 // 2) ### be explicit and add "&rank=normal" to wbgetclaims-call, ### add "&props=references" somewhen
294 requestUri = new URL(WD_CHECK_ENTITY_CLAIMS_ENDPOINT + "&entity=" + wikidataId);
295 log.fine("Requesting Wikidata Entity Claims: " + requestUri.toString());
296 // 2) initiate request
297 HttpURLConnection connection = (HttpURLConnection) requestUri.openConnection();
298 connection.setRequestMethod("GET");
299 connection.setRequestProperty("User-Agent", "DeepaMehta "+DEEPAMEHTA_VERSION+" - "
300 + "Wikidata Search " + WIKIDATA_TYPE_SEARCH_VERSION);
301 // 3) check the response
302 int httpStatusCode = connection.getResponseCode();
303 if (httpStatusCode != HttpURLConnection.HTTP_OK) {
304 throw new WebApplicationException(new Throwable("Error with HTTPConnection."),
305 Status.INTERNAL_SERVER_ERROR);
306 }
307 // 4) read in the response
308 BufferedReader rd = new BufferedReader(new InputStreamReader(connection.getInputStream(), CHARSET));
309 for (String input; (input = rd.readLine()) != null;) {
310 resultBody.append(input);
311 }
312 rd.close();
313 // 5) process response
314 if (resultBody.toString().isEmpty()) {
315 throw new WebApplicationException(new RuntimeException("Wikidata was silent."),
316 Status.NO_CONTENT);
317 } else {
318 json_result = resultBody.toString();
319 log.fine("Wikidata Claim Request Response: " + json_result);
320 processWikidataClaims(json_result, wikidataItem, language_option);
321 log.info("Wikidata Claim Response is FINE");
322 }
323 wikidataItem.loadChildTopics(); // load all child topics
324 } catch (MalformedURLException e) {
325 log.warning("Wikidata Plugin: MalformedURLException ..." + e.getMessage());
326 throw new RuntimeException("Could not find wikidata endpoint.", e);
327 } catch (IOException ioe) {
328 throw new WebApplicationException(new Throwable(ioe), Status.BAD_REQUEST);
329 } catch (Exception e) {
330 throw new WebApplicationException(new Throwable(e), Status.INTERNAL_SERVER_ERROR);
331 } finally {
332 return wikidataItem;
333 }
334 }
335
336 @GET
337 @Path("/property/turn/{id}")
338 @Produces(MediaType.APPLICATION_JSON)
339 @Override
340 @Transactional
341 public Topic createWikidataAssociationType(@PathParam("id") long id) {
342 AssociationType association_type = null;
343 try {
344 Topic property_entity = dms.getTopic(id);
345 // 1) Create new Association Type model
346 String property_name = property_entity.getSimpleValue().toString();
347 AssociationTypeModel assoc_type_model = new AssociationTypeModel("org.deepamehta.wikidata.assoctype_"
348 + property_entity.getUri().replaceAll(WD_SEARCH_ENTITIY_DATA_URI_PREFIX, ""),
349 property_name, "dm4.core.text");
350 association_type = dms.createAssociationType(assoc_type_model);
351 // 2) Assign to "Wikidata" Workspace
352 assignToWikidataWorkspace(association_type);
353 // 3) Associated search-result-entity to new assoc-type (to keep track)
354 dms.createAssociation(new AssociationModel("dm4.core.association",
355 new TopicRoleModel(property_entity.getUri(), "dm4.core.default"),
356 new TopicRoleModel(association_type.getUri(), "dm4.core.default")
357 ));
358 log.info("Turned wikidata property \""+ property_entity.getUri() +"\" into DM Association Type!");
359 } catch (Error e) {
360 log.warning("OH: The Wikidata Plugin experienced an unforeseen error! "+ e.getMessage());
361 } finally {
362 return association_type;
363 }
364 }
365
366 @GET
367 @Path("/property/related/claims/{id}")
368 @Produces(MediaType.APPLICATION_JSON)
369 @Override
370 public ResultList<RelatedAssociation> getTopicRelatedAssociations (@PathParam("id") long topicId) {
371 Topic topic = dms.getTopic(topicId);
372 ResultList<RelatedAssociation> associations = topic.getRelatedAssociations("dm4.core.aggregation",
373 "dm4.core.child", "dm4.core.parent", "org.deepamehta.wikidata.claim_edge");
374 return associations.loadChildTopics();
375 }
376
377 // --
378 // --- Wikidata Search (Application Specific) Private Methods
379 // --
380
381 private void processWikidataEntitySearch(String json_result, ChildTopicsModel search_bucket,
382 String type, String lang) {
383 try {
384 JSONObject response = new JSONObject(json_result);
385 JSONArray result = response.getJSONArray("search");
386 if (result.length() > 0) {
387 for (int i = 0; i < result.length(); i++) {
388 JSONObject entity_response = result.getJSONObject(i);
389 // Check if entity already exists
390 String id = entity_response.getString("id");
391 Topic existing_entity = dms.getTopic("uri",
392 new SimpleValue(WD_SEARCH_ENTITIY_DATA_URI_PREFIX + id));
393 if (existing_entity == null) {
394 // Create new search entity composite
395 String name = entity_response.getString("label");
396 String url = entity_response.getString("url");
397 //
398 ChildTopicsModel entity_composite = new ChildTopicsModel();
399 entity_composite.put(WD_SEARCH_ENTITY_LABEL_URI, name);
400 if (entity_response.has("description")) {
401 String description = entity_response.getString("description");
402 entity_composite.put(WD_SEARCH_ENTITY_DESCR_URI, description);
403 }
404 entity_composite.put(DM_WEBBROWSER_URL, url);
405 // ### fix. aliases add up
406 if (entity_response.has("aliases")) {
407 JSONArray aliases = entity_response.getJSONArray("aliases");
408 for (int a=0; a < aliases.length(); a++) {
409 String alias = aliases.getString(a);
410 entity_composite.add(WD_SEARCH_ENTITY_ALIAS_URI,
411 new TopicModel(WD_SEARCH_ENTITY_ALIAS_URI, new SimpleValue(alias)));
412 }
413 }
414 // set enity place in resultset
415 entity_composite.put(WD_SEARCH_ENTITY_ORDINAL_NR, i);
416 // set entity-type
417 entity_composite.put(WD_SEARCH_ENTITY_TYPE_URI, type);
418 // set language-value on entity-result
419 entity_composite.putRef(WD_LANGUAGE_URI, WD_LANGUAGE_DATA_URI_PREFIX + lang);
420 TopicModel entity_model = new TopicModel(WD_SEARCH_ENTITIY_DATA_URI_PREFIX + id,
421 WD_SEARCH_ENTITY_URI, entity_composite);
422 // create and reference entity in wikidata search bucket
423 search_bucket.add(WD_SEARCH_ENTITY_URI, entity_model);
424 } else {
425 // reference existing entity in wikidata search bucket by URI
426 search_bucket.addRef(WD_SEARCH_ENTITY_URI, WD_SEARCH_ENTITIY_DATA_URI_PREFIX + id);
427 }
428 }
429 }
430 } catch (JSONException ex) {
431 log.warning("Wikidata Plugin: JSONException during processing a wikidata entity search response. "
432 + ex.getMessage());
433 }
434 }
435
436 private Topic createWikidataSearchEntity(JSONObject entity_response, String lang) {
437 Topic entity = null;
438 DeepaMehtaTransaction tx = dms.beginTx();
439 try {
440 String id = entity_response.getString("id");
441 // Create new search entity composite
442 ChildTopicsModel entity_composite = buildWikidataEntityModel(entity_response, lang);
443 TopicModel entity_model = new TopicModel(WD_SEARCH_ENTITIY_DATA_URI_PREFIX + id,
444 WD_SEARCH_ENTITY_URI, entity_composite);
445 entity = dms.createTopic(entity_model);
446 log.fine("Wikidata Search Entity Created (" +
447 entity_composite.getString(WD_SEARCH_ENTITY_TYPE_URI)+ "): \"" + entity.getSimpleValue() +"\" - FINE!");
448 tx.success();
449 } catch (Exception ex) {
450 log.warning("FAILED to create a \"Wikidata Search Entity\" caused by " + ex.getMessage());
451 tx.failure();
452 } finally {
453 tx.finish();
454 return entity;
455 }
456 }
457
458 private Topic updateWikidataEntity(Topic entity, JSONObject entity_response, String lang) {
459 DeepaMehtaTransaction tx = dms.beginTx();
460 try {
461 // Update existing search entity topic
462 ChildTopicsModel entity_composite = buildWikidataEntityModel(entity_response, lang);
463 TopicModel entity_model = new TopicModel(entity.getId(), entity_composite);
464 dms.updateTopic(entity_model);
465 log.fine("Wikidata Search Entity Updated (" +
466 entity_composite.getString(WD_SEARCH_ENTITY_TYPE_URI)+ "): \"" + entity.getSimpleValue() +"\" - FINE!");
467 tx.success();
468 return entity;
469 } catch (Exception ex) {
470 log.warning("FAILED to UPDATE \"Wikidata Search Entity\" caused by " + ex.getMessage());
471 tx.failure();
472 } finally {
473 tx.finish();
474 }
475 return null;
476 }
477
478 private ChildTopicsModel buildWikidataEntityModel(JSONObject entity_response, String lang) {
479 ChildTopicsModel entity_composite = new ChildTopicsModel();
480 try {
481 String id = entity_response.getString("id");
482 String type = entity_response.getString("type");
483 entity_composite = new ChildTopicsModel();
484 // main label
485 if (entity_response.has("labels")) {
486 JSONObject labels = entity_response.getJSONObject("labels");
487 JSONObject languaged_label = labels.getJSONObject(lang);
488 String label = languaged_label.getString("value");
489 entity_composite.put(WD_SEARCH_ENTITY_LABEL_URI, label);
490 }
491 // main description
492 if (entity_response.has("descriptions")) {
493 JSONObject descriptions = entity_response.getJSONObject("descriptions");
494 JSONObject languaged_descr = descriptions.getJSONObject(lang);
495 String description = languaged_descr.getString("value");
496 entity_composite.put(WD_SEARCH_ENTITY_DESCR_URI, description);
497 }
498 // aliases
499 if (entity_response.has("aliases")) {
500 JSONObject aliases = entity_response.getJSONObject("aliases");
501 JSONArray languaged_aliases = aliases.getJSONArray(lang);
502 for (int a=0; a < languaged_aliases.length(); a++) {
503 JSONObject alias_object = languaged_aliases.getJSONObject(a);
504 String alias = alias_object.getString("value");
505 entity_composite.add(WD_SEARCH_ENTITY_ALIAS_URI,
506 new TopicModel(WD_SEARCH_ENTITY_ALIAS_URI, new SimpleValue(alias)));
507 }
508 }
509 // set wikidata url
510 if (type.equals(WD_SEARCH_ENTITY_TYPE_PROPERTY)) {
511 entity_composite.put(DM_WEBBROWSER_URL, WIKIDATA_ENTITY_URL_PREFIX
512 + WIKIDATA_PROPERTY_ENTITY_URL_PREFIX + id);
513 } else {
514 entity_composite.put(DM_WEBBROWSER_URL, WIKIDATA_ENTITY_URL_PREFIX + id);
515 }
516 // set language-value on entity-result
517 entity_composite.putRef(WD_LANGUAGE_URI, WD_LANGUAGE_DATA_URI_PREFIX + lang);
518 // ### sitelinks
519 /** if (entity_response.has("sitelinks")) {
520 JSONObject sitelinks = entity_response.getJSONObject("sitelinks");
521 if (sitelinks.has(lang + "wiki")) {
522 JSONObject sitelink = sitelinks.getJSONObject(lang + "wiki");
523 entity_composite.put(DM_WEBBROWSER_URL, sitelink.getString("url"));
524 } else {
525 log.warning("There is no sitelink for this item in this language/wiki: " + lang + "wiki");
526 }
527 } **/
528 entity_composite.put(WD_SEARCH_ENTITY_TYPE_URI, type);
529 return entity_composite;
530 } catch (JSONException jex) {
531 log.warning("JSONException during build up of the search-entities composite model");
532 throw new RuntimeException(jex);
533 }
534 }
535
536 private void processWikidataClaims(String json_result, Topic wikidataItem, String language_code) {
537 try {
538 JSONObject response = new JSONObject(json_result);
539 JSONObject result = response.getJSONObject("claims");
540 // ### Needs to identify if claims (already imported in DM4) are not yet part of the current wikidata-data
541 Iterator properties = result.keys();
542 log.info("Wikidata Plugin is processing all properties part of related " + result.length() + " CLAIMS");
543 Topic propertyEntity = null;
544 while (properties.hasNext()) {
545 String property_id = properties.next().toString();
546 // 1) Load related property-entity
547 propertyEntity = getOrCreateWikidataEntity(property_id, language_code);
548 // HashMap<String, List<Topic>> all_entities = new HashMap<String, List<Topic>>();
549 JSONArray property_listing = result.getJSONArray(property_id);
550 for (int i=0; i < property_listing.length(); i++) {
551 // 2) fetch related wikidata entity
552 Topic referencedItemEntity = null;
553 JSONObject entity_response = property_listing.getJSONObject(i);
554 JSONObject mainsnak = entity_response.getJSONObject("mainsnak");
555 String claim_guid = entity_response.getString("id");
556 // 3) build up item as part of the claim (if so)
557 String itemId = "";
558 String snakDataType = mainsnak.getString("datatype");
559 // log.info("SNakDataType=" + snakDataType + "MainSnak" + mainsnak.toString());
560 JSONObject snakDataValue = mainsnak.getJSONObject("datavalue");
561 // ..) depending on the various (claimed/realted) value-types
562 if (snakDataType.equals("wikibase-item")) {
563 // log.info("Wikibase Item claimed via \"" + propertyEntity.getSimpleValue() + "\"");
564 JSONObject snakDataValueValue = snakDataValue.getJSONObject("value");
565 long numericId = snakDataValueValue.getLong("numeric-id");
566 itemId = "Q" + numericId; // is this always of entity-type "item"? responses looks like.
567 referencedItemEntity = getOrCreateWikidataEntity(itemId, language_code);
568 } else if (snakDataType.equals("commonsMedia")) {
569 // do relate wikidata.commons_media
570 log.info(" --------- Commons Media Item! ------------");
571 if (snakDataValue.has("value")) {
572 String fileName = snakDataValue.getString("value");
573 referencedItemEntity = getOrCreateWikimediaCommonsMediaTopic(fileName);
574 log.info(" --- FINE! --- Related Wikimedia Commons File to Wikidata Item!");
575 }
576 /** **/
577 // ### make use of WIKIMEDIA_COMMONS_MEDIA_FILE_URL_PREFIX and implement page-renderer
578 } else if (snakDataType.equals("globe-coordinate")) {
579 // do relate wikidata.globe_coordinate
580 // log.fine("Globe Coordinate claimed via \"" + propertyEntity.getSimpleValue()
581 // + "\" ("+language_code+") DEBUG:");
582 // log.fine(" " + snakDataValue.toString());
583 } else if (snakDataType.equals("url")) {
584 if (snakDataValue.has("value")) {
585 // ### getOrCreateWebResource()
586 String value = snakDataValue.getString("value");
587 log.warning("### SKIPPING URL => " + value);
588 }
589 } else if (snakDataType.equals("string")) {
590 if (snakDataValue.has("value")) {
591 String value = snakDataValue.getString("value");
592 referencedItemEntity = getOrCreateWikidataText(value, language_code);
593 } else {
594 log.warning("Could not access wikidata-text value - json-response EMPTY!");
595 }
596 } else {
597 log.warning("Value claimed as " + propertyEntity.getSimpleValue() + " is not of any known type"
598 + " wikibase-item but \"" + snakDataType +"\" ("+snakDataValue+")");
599 // e.g. snakDataType.equals("quantity")
600 }
601 // store topic reference to (new or already existing) wikidata-entity/ resp. -value topic
602 if (referencedItemEntity != null) {
603 createWikidataClaimEdge(claim_guid, wikidataItem, referencedItemEntity,
604 propertyEntity);
605 } else {
606 log.warning("SKIPPED creating claim of type \""+snakDataType+"\" value for "
607 + "\""+propertyEntity.getSimpleValue()+"\"");
608 }
609 }
610 /** Iterator entity_iterator = all_entities.keySet().iterator();
611 StringBuffer requesting_ids = new StringBuffer();
612 while (entity_iterator.hasNext()) {
613 String entity_id = entity_iterator.next().toString();
614 requesting_ids.append(entity_id + "|");
615 }
616 log.info("Requesting ALL ITEMS for " +property_id+ ": " + requesting_ids.toString());
617 omitting this solution bcause: "*": "Too many values supplied for parameter 'ids': the limit is 50" **/
618 }
619 } catch (JSONException ex) {
620 log.warning("JSONException during processing a wikidata claim. " + ex.getMessage());
621 throw new RuntimeException(ex);
622 }
623 }
624
625 private Association createWikidataClaimEdge (String claim_guid, Topic one, Topic two, Topic property) {
626 Association claim = null;
627 try {
628 if (!associationExists(WD_ENTITY_CLAIM_EDGE, one, two)) {
629 // 1) Create \"Wikidata Claim\"-Edge with GUID
630 claim = dms.createAssociation(new AssociationModel(WD_ENTITY_CLAIM_EDGE,
631 new TopicRoleModel(one.getId(), "dm4.core.default"),
632 new TopicRoleModel(two.getId(), "dm4.core.default")));
633 claim.setUri(claim_guid);
634 /** log.info("Created \"Wikidata Claim\" with GUID: " + claim.getUri() +" for \"" + two.getSimpleValue() +
635 " (property: " + property.getSimpleValue() +
636 "\") for \"" + one.getSimpleValue() + "\" - FINE"); **/
637 // 2) Assign wikidata property (=Wikidata Search Entity) to this claim-edge
638 claim.setChildTopics(new ChildTopicsModel().putRef(WD_SEARCH_ENTITY_URI,
639 property.getUri()));
640 // ### problems with missing aggregated childs for composite assocTypes to be investigated ..
641 dms.updateAssociation(claim.getModel());
642 claim.loadChildTopics();
643 }
644 return claim;
645 } catch (Exception e) {
646 log.severe("FAILED to create a \"Claim\" between \""+one.getSimpleValue()+"\" - \""+two.getSimpleValue());
647 throw new RuntimeException(e);
648 }
649 }
650
651 private Topic getOrCreateWikidataText(String value, String lang) {
652 Topic textValue = null;
653 // 1) query for text-value
654 try {
655 textValue = dms.getTopic(WD_TEXT_TYPE_URI, new SimpleValue(value));
656 } catch (Exception ex) {
657 // log.info("Could not find a wikidata-text value topic for \"" + value + ex.getMessage() + "\"");
658 }
659 // 2) re-use or create
660 DeepaMehtaTransaction tx = dms.beginTx();
661 try {
662 if (textValue == null) {
663 textValue = dms.createTopic(new TopicModel(WD_TEXT_TYPE_URI, new SimpleValue(value)));
664 log.info("CREATED \"Wikidata Text\" - \"" + value +"\" (" + lang + ") - OK!");
665 } /** else {
666 log.info("FETCHED \"Wikidata Text\" - \"" + textValue.getSimpleValue() +"\" "
667 + "(" + lang + ") - Re-using it!");
668 } **/
669 tx.success();
670 return textValue;
671 } catch (Exception ex) {
672 log.warning("FAILURE during creating a wikidata value topic: " + ex.getLocalizedMessage());
673 throw new RuntimeException(ex);
674 } finally {
675 tx.finish();
676 }
677 }
678
679 private Topic getOrCreateWikimediaCommonsMediaTopic(String fileName) {
680 Topic mediaTopic = dms.getTopic(WD_COMMONS_MEDIA_NAME_TYPE_URI, new SimpleValue(fileName));
681 if (mediaTopic == null) { // create new media topic
682 ChildTopicsModel mediaCompositeModel = new ChildTopicsModel()
683 .put(WD_COMMONS_MEDIA_NAME_TYPE_URI, fileName);
684 enrichAboutWikimediaCommonsMetaData(mediaCompositeModel, fileName);
685 TopicModel mediaTopicModel = new TopicModel(WD_COMMONS_MEDIA_TYPE_URI, mediaCompositeModel);
686 mediaTopic = dms.createTopic(mediaTopicModel).loadChildTopics();
687 log.info("Created new Wikimedia Commons Media Topic \"" + mediaTopic.getSimpleValue().toString());
688 } else {
689 mediaTopic = mediaTopic.getRelatedTopic("dm4.core.composition",
690 "dm4.core.child", "dm4.core.parent", WD_COMMONS_MEDIA_TYPE_URI);
691 }
692 // reference existing media topic ### here is no update mechanism yet
693 return mediaTopic;
694 }
695
696 private void enrichAboutWikimediaCommonsMetaData(ChildTopicsModel model, String fileName) {
697 // 1) fetch data by name from http://tools.wmflabs.org/magnus-toolserver/commonsapi.php?image=
698 URL requestUri;
699 StringBuffer resultBody = new StringBuffer();
700 String xml_result = "";
701 try {
702 requestUri = new URL("http://tools.wmflabs.org/magnus-toolserver/commonsapi.php?image="
703 + URLEncoder.encode(fileName, CHARSET));
704 log.fine("Requesting Wikimedia Commons Item Details: " + requestUri.toString());
705 // 2) initiate request
706 HttpURLConnection connection = (HttpURLConnection) requestUri.openConnection();
707 connection.setRequestMethod("GET");
708 connection.setRequestProperty("User-Agent", "DeepaMehta "+DEEPAMEHTA_VERSION+" - "
709 + "Wikidata Search " + WIKIDATA_TYPE_SEARCH_VERSION);
710 // 3) check the response
711 int httpStatusCode = connection.getResponseCode();
712 if (httpStatusCode != HttpURLConnection.HTTP_OK) {
713 throw new WebApplicationException(new Throwable("Error with HTTPConnection."),
714 Status.INTERNAL_SERVER_ERROR);
715 }
716 // 4) read in the response
717 BufferedReader rd = new BufferedReader(new InputStreamReader(connection.getInputStream(), CHARSET));
718 for (String input; (input = rd.readLine()) != null;) {
719 resultBody.append(input);
720 }
721 rd.close();
722 // 5) process response
723 if (resultBody.toString().isEmpty()) {
724 throw new WebApplicationException(new RuntimeException("Wikidata was silent."),
725 Status.NO_CONTENT);
726 } else {
727 DocumentBuilder builder;
728 Document document;
729 xml_result = resultBody.toString();
730 builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
731 document = builder.parse(new InputSource(new ByteArrayInputStream(xml_result.getBytes("utf-8"))));
732 NodeList responses = document.getElementsByTagName("response");
733 // Node defaultLanguageDescr = responses.item(1).getFirstChild();
734 Node fileElement = responses.item(0).getFirstChild();
735 //
736 Node resourceUrls = fileElement.getChildNodes().item(2);
737 NodeList resourceElements = resourceUrls.getChildNodes(); // file and description as childs
738 Node filePath = resourceElements.item(0); // file at 0
739 Node authorUrl = fileElement.getChildNodes().item(10); // authorUrl HTML at 10
740 Node permission = fileElement.getChildNodes().item(12); // permission HTML at 12
741 //
742 String authorText = (authorUrl != null) ? authorUrl.getTextContent() : "No author information available.";
743 String permissionText = (permission != null) ? permission.getTextContent() : "No license information available.";
744 model.put(WD_COMMONS_MEDIA_PATH_TYPE_URI, filePath.getTextContent());
745 // model.put(WD_COMMONS_MEDIA_DESCR_TYPE_URI, defaultLanguageDescr.getTextContent());
746 model.put(WD_COMMONS_AUTHOR_HTML_URI, authorText);
747 model.put(WD_COMMONS_LICENSE_HTML_URI, permissionText);
748 log.fine(" --- Wikimedia Commons Response is FINE ---");
749 }
750 } catch (MalformedURLException e) {
751 log.log(Level.SEVERE, "Wikidata Plugin: MalformedURLException ...", e);
752 } catch (ParserConfigurationException e) {
753 log.log(Level.SEVERE, "Wikidata Plugin: ParserConfigurationException ...", e);
754 } catch (IOException ioe) {
755 log.log(Level.SEVERE, "Wikidata Plugin: IOException ...", ioe);
756 } catch (SAXException ex) {
757 log.log(Level.SEVERE, null, ex);
758 } catch (Exception e) {
759 log.log(Level.SEVERE, null , e);
760 }
761 }
762
763 // --
764 // --- DeepaMehta 4 Plugin Related Private Methods
765 // --
766
767 @Override
768 public void assignToWikidataWorkspace(Topic topic) {
769 if (topic == null) return;
770 Topic wikidataWorkspace = dms.getTopic("uri", new SimpleValue(WS_WIKIDATA_URI));
771 if (!associationExists("dm4.core.aggregation", topic, wikidataWorkspace)) {
772 dms.createAssociation(new AssociationModel("dm4.core.aggregation",
773 new TopicRoleModel(topic.getId(), "dm4.core.parent"),
774 new TopicRoleModel(wikidataWorkspace.getId(), "dm4.core.child")
775 ));
776 }
777 }
778
779 private boolean associationExists(String edge_type, Topic item, Topic user) {
780 List<Association> results = dms.getAssociations(item.getId(), user.getId(), edge_type);
781 return (results.size() > 0) ? true : false;
782 }
783
784 }