001 002 package org.deepamehta.plugins.wikidata; 003 004 import de.deepamehta.core.Association; 005 import de.deepamehta.core.AssociationType; 006 import de.deepamehta.core.RelatedAssociation; 007 import de.deepamehta.core.Topic; 008 import de.deepamehta.core.model.*; 009 import de.deepamehta.core.osgi.PluginActivator; 010 import de.deepamehta.core.service.Inject; 011 import de.deepamehta.core.service.ResultList; 012 import de.deepamehta.core.service.Transactional; 013 import de.deepamehta.core.storage.spi.DeepaMehtaTransaction; 014 import de.deepamehta.plugins.accesscontrol.service.AccessControlService; 015 016 import java.io.BufferedReader; 017 import java.io.ByteArrayInputStream; 018 import java.io.IOException; 019 import java.io.InputStreamReader; 020 import java.net.HttpURLConnection; 021 import java.net.MalformedURLException; 022 import java.net.URL; 023 import java.net.URLEncoder; 024 import java.util.Iterator; 025 import java.util.List; 026 import java.util.logging.Level; 027 import java.util.logging.Logger; 028 029 import javax.ws.rs.*; 030 import javax.ws.rs.core.MediaType; 031 import javax.ws.rs.core.Response.Status; 032 import javax.xml.parsers.DocumentBuilder; 033 import javax.xml.parsers.DocumentBuilderFactory; 034 import javax.xml.parsers.ParserConfigurationException; 035 036 import org.codehaus.jettison.json.JSONArray; 037 import org.codehaus.jettison.json.JSONException; 038 import org.codehaus.jettison.json.JSONObject; 039 import org.deepamehta.plugins.wikidata.service.WikidataSearchService; 040 import org.w3c.dom.Document; 041 import org.w3c.dom.Node; 042 import org.w3c.dom.NodeList; 043 import org.xml.sax.InputSource; 044 import org.xml.sax.SAXException; 045 046 047 048 /** 049 * A very basic plugin to search and explore wikidata. 050 * Allows to turn a \"Wikidata Search Result Entity\" (of type=property) into DeepaMehta 4 AssociationTypes. 051 * 052 * @author Malte Reißig (<malte@mikromedia.de>) 053 * @website https://github.com/mukil/dm4-wikidata 054 * @version 0.0.5-SNAPSHOT 055 */ 056 057 @Path("/wikidata") 058 @Consumes("application/json") 059 @Produces("application/json") 060 public class WikidataSearchPlugin extends PluginActivator implements WikidataSearchService { 061 062 private Logger log = Logger.getLogger(getClass().getName()); 063 064 private final String DEEPAMEHTA_VERSION = "DeepaMehta 4.4"; 065 private final String WIKIDATA_TYPE_SEARCH_VERSION = "0.0.5-SNAPSHOT"; 066 private final String CHARSET = "UTF-8"; 067 068 // --- DeepaMehta 4 URIs 069 070 private final String DM_WEBBROWSER_URL = "dm4.webbrowser.url"; 071 072 // --- Wikidata DeepaMehta URIs 073 074 private final String WS_WIKIDATA_URI = "org.deepamehta.workspaces.wikidata"; 075 076 private final String WD_SEARCH_BUCKET_URI = "org.deepamehta.wikidata.search_bucket"; 077 private final String WD_SEARCH_QUERY_URI = "org.deepamehta.wikidata.search_query"; 078 079 private final String WD_LANGUAGE_URI = "org.deepamehta.wikidata.language"; 080 // private final String WD_LANGUAGE_NAME_URI = "org.deepamehta.wikidata.language_name"; 081 // private final String WD_LANGUAGE_ISO_CODE_URI = "org.deepamehta.wikidata.language_code_iso"; 082 private final String WD_LANGUAGE_DATA_URI_PREFIX = "org.deepamehta.wikidata.lang_"; 083 084 private final String WD_SEARCH_ENTITY_URI = "org.deepamehta.wikidata.search_entity"; 085 private final String WD_SEARCH_ENTITY_LABEL_URI = "org.deepamehta.wikidata.search_entity_label"; 086 private final String WD_SEARCH_ENTITY_TYPE_URI = "org.deepamehta.wikidata.search_entity_type"; 087 private final String WD_SEARCH_ENTITY_ORDINAL_NR = "org.deepamehta.wikidata.search_ordinal_nr"; 088 private final String WD_SEARCH_ENTITY_DESCR_URI = "org.deepamehta.wikidata.search_entity_description"; 089 private final String WD_SEARCH_ENTITY_ALIAS_URI = "org.deepamehta.wikidata.search_entity_alias"; 090 private final String WD_SEARCH_ENTITIY_DATA_URI_PREFIX = "org.deepamehta.wikidata.entity_"; 091 092 private final String WD_TEXT_TYPE_URI = "org.deepamehta.wikidata.text"; 093 094 private final String WD_COMMONS_MEDIA_TYPE_URI = "org.deepamehta.wikidata.commons_media"; 095 private final String WD_COMMONS_MEDIA_NAME_TYPE_URI = "org.deepamehta.wikidata.commons_media_name"; 096 private final String WD_COMMONS_MEDIA_PATH_TYPE_URI = "org.deepamehta.wikidata.commons_media_path"; 097 // private final String WD_COMMONS_MEDIA_TYPE_TYPE_URI = "org.deepamehta.wikidata.commons_media_type"; 098 private final String WD_COMMONS_MEDIA_DESCR_TYPE_URI = "org.deepamehta.wikidata.commons_media_descr"; 099 private final String WD_COMMONS_AUTHOR_HTML_URI = "org.deepamehta.wikidata.commons_author_html"; 100 private final String WD_COMMONS_LICENSE_HTML_URI = "org.deepamehta.wikidata.commons_license_html"; 101 // private final String WD_GLOBE_COORDINATE_TYPE_URI = "org.deepamehta.wikidata.globe_coordinate"; 102 103 private final String WD_ENTITY_CLAIM_EDGE = "org.deepamehta.wikidata.claim_edge"; 104 105 // --- Wikidata Service URIs 106 107 private final String WD_SEARCH_ENTITIES_ENDPOINT = 108 "http://www.wikidata.org/w/api.php?action=wbsearchentities&format=json&limit=50"; 109 private final String WD_CHECK_ENTITY_CLAIMS_ENDPOINT = 110 "http://www.wikidata.org/w/api.php?action=wbgetclaims&format=json"; // &ungroupedlist=0 111 private final String WD_GET_ENTITY_ENDPOINT = "http://www.wikidata.org/w/api.php?action=wbgetentities" 112 + "&props=info%7Caliases%7Clabels%7Cdescriptions&format=json"; // sitelinks%2Furls%7C 113 private final String WD_SEARCH_ENTITY_TYPE_PROPERTY = "property"; 114 private final String WD_SEARCH_ENTITY_TYPE_ITEM = "item"; 115 private final String WD_ENTITY_BASE_URI = "org.wikidata.entity."; 116 117 private final String LANG_EN = "en"; 118 119 private final String WIKIDATA_ENTITY_URL_PREFIX = "//www.wikidata.org/wiki/"; 120 private final String WIKIDATA_PROPERTY_ENTITY_URL_PREFIX = "Property:"; 121 // private final String WIKIMEDIA_COMMONS_MEDIA_FILE_URL_PREFIX = "//commons.wikimedia.org/wiki/File:"; 122 123 @Inject 124 private AccessControlService acService = null; 125 126 127 128 // -- 129 // --- Public REST API Endpoints 130 // -- 131 132 @GET 133 @Path("/search/{entity}/{query}/{language_code}") 134 @Produces(MediaType.APPLICATION_JSON) 135 @Override 136 @Transactional 137 public Topic searchWikidataEntity(@PathParam("query") String query, @PathParam("language_code") String lang, 138 @PathParam("entity") String type) { 139 String json_result = ""; 140 StringBuffer resultBody = new StringBuffer(); 141 URL requestUri = null; 142 Topic search_bucket = null; 143 // sanity check (set en as default-language if nothing was provided by un-initialized language widget) 144 if (lang == null || lang.equals("undefined")) { 145 log.warning("Wikidata Language Search Option was not provided, now requesting data in EN"); 146 lang = LANG_EN; 147 } 148 // start search operation 149 try { 150 // 1) fixme: Authorize request 151 requestUri = new URL(WD_SEARCH_ENTITIES_ENDPOINT + "&search="+ query +"&language="+ lang +"&type=" + type); 152 log.fine("Wikidata Search Entities Request: " + requestUri.toString()); 153 // 2) initiate request 154 HttpURLConnection connection = (HttpURLConnection) requestUri.openConnection(); 155 connection.setRequestMethod("GET"); 156 connection.setRequestProperty("User-Agent", "DeepaMehta "+DEEPAMEHTA_VERSION+" - " 157 + "Wikidata Search " + WIKIDATA_TYPE_SEARCH_VERSION); 158 // 3) check the response 159 int httpStatusCode = connection.getResponseCode(); 160 if (httpStatusCode != HttpURLConnection.HTTP_OK) { 161 throw new WebApplicationException(new Throwable("Error with HTTPConnection."), 162 Status.INTERNAL_SERVER_ERROR); 163 } 164 // 4) read in the response 165 BufferedReader rd = new BufferedReader(new InputStreamReader(connection.getInputStream(), CHARSET)); 166 for (String input; (input = rd.readLine()) != null;) { 167 resultBody.append(input); 168 } 169 rd.close(); 170 // 5) process response 171 if (resultBody.toString().isEmpty()) { 172 throw new WebApplicationException(new RuntimeException("Wikidata was silent."), 173 Status.NO_CONTENT); 174 } else { 175 // ..) Create Wikidata Search Bucket 176 ChildTopicsModel bucket_model = new ChildTopicsModel(); 177 bucket_model.put(WD_SEARCH_QUERY_URI, query); 178 bucket_model.putRef(WD_LANGUAGE_URI, WD_LANGUAGE_DATA_URI_PREFIX + lang); 179 json_result = resultBody.toString(); 180 log.fine("Wikidata Search Request Response: " + json_result); 181 processWikidataEntitySearch(json_result, bucket_model, type, lang); 182 search_bucket = dms.createTopic(new TopicModel(WD_SEARCH_BUCKET_URI, bucket_model)); 183 // workaround: addRef does not (yet) fetchComposite, so fetchComposite=true 184 search_bucket = dms.getTopic(search_bucket.getId()); 185 log.info("Wikidata Search Bucket for "+ query +" in ("+ lang +") was CREATED"); 186 } 187 search_bucket.loadChildTopics(); // load all child topics 188 } catch (MalformedURLException e) { 189 log.warning("Wikidata Plugin: MalformedURLException ..." + e.getMessage()); 190 throw new RuntimeException("Could not find wikidata endpoint.", e); 191 } catch (IOException ioe) { 192 throw new WebApplicationException(new Throwable(ioe), Status.BAD_REQUEST); 193 } catch (Exception e) { 194 throw new WebApplicationException(new Throwable(e), Status.INTERNAL_SERVER_ERROR); 195 } finally { 196 return search_bucket; 197 } 198 } 199 200 @GET 201 @Path("/{entityId}/{language_code}") 202 @Produces(MediaType.APPLICATION_JSON) 203 @Override 204 public Topic getOrCreateWikidataEntity(@PathParam("entityId") String entityId, 205 @PathParam("language_code") String language_code) { 206 String json_result = ""; 207 StringBuffer resultBody = new StringBuffer(); 208 URL requestUri = null; 209 Topic entity = null; 210 // sanity check (set en as default-language if nothing was provided by un-initialized language widget) 211 if (language_code == null || language_code.equals("undefined")) { 212 log.warning("Wikidata Language Search Option was not provided, now requesting data in EN"); 213 language_code = LANG_EN; 214 } 215 try { 216 // 1) fixme: Authorize request 217 // &sites=dewiki&&languages=de 218 requestUri = new URL(WD_GET_ENTITY_ENDPOINT + "&ids="+ entityId + "&languages=" + language_code); 219 log.fine("Requesting Wikidata Entity Details " + requestUri.toString()); 220 // 2) initiate request 221 HttpURLConnection connection = (HttpURLConnection) requestUri.openConnection(); 222 connection.setRequestMethod("GET"); 223 connection.setRequestProperty("User-Agent", "DeepaMehta "+DEEPAMEHTA_VERSION+" - " 224 + "Wikidata Search " + WIKIDATA_TYPE_SEARCH_VERSION); 225 // 3) check the response 226 int httpStatusCode = connection.getResponseCode(); 227 if (httpStatusCode != HttpURLConnection.HTTP_OK) { 228 throw new WebApplicationException(new Throwable("Error with HTTPConnection."), 229 Status.INTERNAL_SERVER_ERROR); 230 } 231 // 4) read in the response 232 BufferedReader rd = new BufferedReader(new InputStreamReader(connection.getInputStream(), CHARSET)); 233 for (String input; (input = rd.readLine()) != null;) { 234 resultBody.append(input); 235 } 236 rd.close(); 237 // 5) process response 238 if (resultBody.toString().isEmpty()) { 239 throw new WebApplicationException(new RuntimeException("Wikidata was silent."), 240 Status.NO_CONTENT); 241 } else { 242 // 6) Create or Update Wikidata Search Entity 243 json_result = resultBody.toString(); 244 log.fine("Wikidata Entity Request Response: " + json_result); 245 JSONObject response = new JSONObject(json_result); 246 JSONObject entities = response.getJSONObject("entities"); 247 JSONObject response_entity = entities.getJSONObject(entityId); 248 // 0) Check if we need to CREATE or UPDATE our search result entity item 249 Topic existingEntity = dms.getTopic("uri", 250 new SimpleValue(WD_SEARCH_ENTITIY_DATA_URI_PREFIX + entityId)); 251 if (existingEntity == null) { 252 entity = createWikidataSearchEntity(response_entity, language_code); 253 } else { 254 // Updates labels, descriptions, aliases, url and (query) language 255 entity = updateWikidataEntity(existingEntity, response_entity, language_code); 256 } 257 entity.loadChildTopics(); // load all child topics 258 } 259 } catch (MalformedURLException e) { 260 log.warning("Wikidata Plugin: MalformedURLException ..." + e.getMessage()); 261 throw new RuntimeException("Could not find wikidata endpoint.", e); 262 } catch (IOException ioe) { 263 throw new WebApplicationException(new Throwable(ioe), Status.BAD_REQUEST); 264 } catch (JSONException je) { 265 throw new WebApplicationException(new Throwable(je), Status.INTERNAL_SERVER_ERROR); 266 } catch (Exception e) { 267 throw new WebApplicationException(new Throwable(e), Status.INTERNAL_SERVER_ERROR); 268 } finally { 269 return entity; 270 } 271 } 272 273 @GET 274 @Path("/check/claims/{id}/{language_code}") 275 @Produces(MediaType.APPLICATION_JSON) 276 @Override 277 @Transactional 278 public Topic loadClaimsAndRelatedWikidataItems(@PathParam("id") long topicId, 279 @PathParam("language_code") String language_option) { 280 281 String json_result = ""; 282 StringBuffer resultBody = new StringBuffer(); 283 URL requestUri = null; 284 Topic wikidataItem = dms.getTopic(topicId); 285 // 0) sanity check (set en as default-language if nothing was provided by un-initialized language widget) 286 if (language_option == null || language_option.equals("undefined")) { 287 log.warning("Wikidata Language Search Option was not provided, now requesting data in EN."); 288 language_option = LANG_EN; 289 } 290 String wikidataId = wikidataItem.getUri().replaceAll(WD_SEARCH_ENTITIY_DATA_URI_PREFIX, ""); 291 try { 292 // 1) ### Authorize request 293 // 2) ### be explicit and add "&rank=normal" to wbgetclaims-call, ### add "&props=references" somewhen 294 requestUri = new URL(WD_CHECK_ENTITY_CLAIMS_ENDPOINT + "&entity=" + wikidataId); 295 log.fine("Requesting Wikidata Entity Claims: " + requestUri.toString()); 296 // 2) initiate request 297 HttpURLConnection connection = (HttpURLConnection) requestUri.openConnection(); 298 connection.setRequestMethod("GET"); 299 connection.setRequestProperty("User-Agent", "DeepaMehta "+DEEPAMEHTA_VERSION+" - " 300 + "Wikidata Search " + WIKIDATA_TYPE_SEARCH_VERSION); 301 // 3) check the response 302 int httpStatusCode = connection.getResponseCode(); 303 if (httpStatusCode != HttpURLConnection.HTTP_OK) { 304 throw new WebApplicationException(new Throwable("Error with HTTPConnection."), 305 Status.INTERNAL_SERVER_ERROR); 306 } 307 // 4) read in the response 308 BufferedReader rd = new BufferedReader(new InputStreamReader(connection.getInputStream(), CHARSET)); 309 for (String input; (input = rd.readLine()) != null;) { 310 resultBody.append(input); 311 } 312 rd.close(); 313 // 5) process response 314 if (resultBody.toString().isEmpty()) { 315 throw new WebApplicationException(new RuntimeException("Wikidata was silent."), 316 Status.NO_CONTENT); 317 } else { 318 json_result = resultBody.toString(); 319 log.fine("Wikidata Claim Request Response: " + json_result); 320 processWikidataClaims(json_result, wikidataItem, language_option); 321 log.info("Wikidata Claim Response is FINE"); 322 } 323 wikidataItem.loadChildTopics(); // load all child topics 324 } catch (MalformedURLException e) { 325 log.warning("Wikidata Plugin: MalformedURLException ..." + e.getMessage()); 326 throw new RuntimeException("Could not find wikidata endpoint.", e); 327 } catch (IOException ioe) { 328 throw new WebApplicationException(new Throwable(ioe), Status.BAD_REQUEST); 329 } catch (Exception e) { 330 throw new WebApplicationException(new Throwable(e), Status.INTERNAL_SERVER_ERROR); 331 } finally { 332 return wikidataItem; 333 } 334 } 335 336 @GET 337 @Path("/property/turn/{id}") 338 @Produces(MediaType.APPLICATION_JSON) 339 @Override 340 @Transactional 341 public Topic createWikidataAssociationType(@PathParam("id") long id) { 342 AssociationType association_type = null; 343 try { 344 Topic property_entity = dms.getTopic(id); 345 // 1) Create new Association Type model 346 String property_name = property_entity.getSimpleValue().toString(); 347 AssociationTypeModel assoc_type_model = new AssociationTypeModel("org.deepamehta.wikidata.assoctype_" 348 + property_entity.getUri().replaceAll(WD_SEARCH_ENTITIY_DATA_URI_PREFIX, ""), 349 property_name, "dm4.core.text"); 350 association_type = dms.createAssociationType(assoc_type_model); 351 // 2) Assign to "Wikidata" Workspace 352 assignToWikidataWorkspace(association_type); 353 // 3) Associated search-result-entity to new assoc-type (to keep track) 354 dms.createAssociation(new AssociationModel("dm4.core.association", 355 new TopicRoleModel(property_entity.getUri(), "dm4.core.default"), 356 new TopicRoleModel(association_type.getUri(), "dm4.core.default") 357 )); 358 log.info("Turned wikidata property \""+ property_entity.getUri() +"\" into DM Association Type!"); 359 } catch (Error e) { 360 log.warning("OH: The Wikidata Plugin experienced an unforeseen error! "+ e.getMessage()); 361 } finally { 362 return association_type; 363 } 364 } 365 366 @GET 367 @Path("/property/related/claims/{id}") 368 @Produces(MediaType.APPLICATION_JSON) 369 @Override 370 public ResultList<RelatedAssociation> getTopicRelatedAssociations (@PathParam("id") long topicId) { 371 Topic topic = dms.getTopic(topicId); 372 ResultList<RelatedAssociation> associations = topic.getRelatedAssociations("dm4.core.aggregation", 373 "dm4.core.child", "dm4.core.parent", "org.deepamehta.wikidata.claim_edge"); 374 return associations.loadChildTopics(); 375 } 376 377 // -- 378 // --- Wikidata Search (Application Specific) Private Methods 379 // -- 380 381 private void processWikidataEntitySearch(String json_result, ChildTopicsModel search_bucket, 382 String type, String lang) { 383 try { 384 JSONObject response = new JSONObject(json_result); 385 JSONArray result = response.getJSONArray("search"); 386 if (result.length() > 0) { 387 for (int i = 0; i < result.length(); i++) { 388 JSONObject entity_response = result.getJSONObject(i); 389 // Check if entity already exists 390 String id = entity_response.getString("id"); 391 Topic existing_entity = dms.getTopic("uri", 392 new SimpleValue(WD_SEARCH_ENTITIY_DATA_URI_PREFIX + id)); 393 if (existing_entity == null) { 394 // Create new search entity composite 395 String name = entity_response.getString("label"); 396 String url = entity_response.getString("url"); 397 // 398 ChildTopicsModel entity_composite = new ChildTopicsModel(); 399 entity_composite.put(WD_SEARCH_ENTITY_LABEL_URI, name); 400 if (entity_response.has("description")) { 401 String description = entity_response.getString("description"); 402 entity_composite.put(WD_SEARCH_ENTITY_DESCR_URI, description); 403 } 404 entity_composite.put(DM_WEBBROWSER_URL, url); 405 // ### fix. aliases add up 406 if (entity_response.has("aliases")) { 407 JSONArray aliases = entity_response.getJSONArray("aliases"); 408 for (int a=0; a < aliases.length(); a++) { 409 String alias = aliases.getString(a); 410 entity_composite.add(WD_SEARCH_ENTITY_ALIAS_URI, 411 new TopicModel(WD_SEARCH_ENTITY_ALIAS_URI, new SimpleValue(alias))); 412 } 413 } 414 // set enity place in resultset 415 entity_composite.put(WD_SEARCH_ENTITY_ORDINAL_NR, i); 416 // set entity-type 417 entity_composite.put(WD_SEARCH_ENTITY_TYPE_URI, type); 418 // set language-value on entity-result 419 entity_composite.putRef(WD_LANGUAGE_URI, WD_LANGUAGE_DATA_URI_PREFIX + lang); 420 TopicModel entity_model = new TopicModel(WD_SEARCH_ENTITIY_DATA_URI_PREFIX + id, 421 WD_SEARCH_ENTITY_URI, entity_composite); 422 // create and reference entity in wikidata search bucket 423 search_bucket.add(WD_SEARCH_ENTITY_URI, entity_model); 424 } else { 425 // reference existing entity in wikidata search bucket by URI 426 search_bucket.addRef(WD_SEARCH_ENTITY_URI, WD_SEARCH_ENTITIY_DATA_URI_PREFIX + id); 427 } 428 } 429 } 430 } catch (JSONException ex) { 431 log.warning("Wikidata Plugin: JSONException during processing a wikidata entity search response. " 432 + ex.getMessage()); 433 } 434 } 435 436 private Topic createWikidataSearchEntity(JSONObject entity_response, String lang) { 437 Topic entity = null; 438 DeepaMehtaTransaction tx = dms.beginTx(); 439 try { 440 String id = entity_response.getString("id"); 441 // Create new search entity composite 442 ChildTopicsModel entity_composite = buildWikidataEntityModel(entity_response, lang); 443 TopicModel entity_model = new TopicModel(WD_SEARCH_ENTITIY_DATA_URI_PREFIX + id, 444 WD_SEARCH_ENTITY_URI, entity_composite); 445 entity = dms.createTopic(entity_model); 446 log.fine("Wikidata Search Entity Created (" + 447 entity_composite.getString(WD_SEARCH_ENTITY_TYPE_URI)+ "): \"" + entity.getSimpleValue() +"\" - FINE!"); 448 tx.success(); 449 } catch (Exception ex) { 450 log.warning("FAILED to create a \"Wikidata Search Entity\" caused by " + ex.getMessage()); 451 tx.failure(); 452 } finally { 453 tx.finish(); 454 return entity; 455 } 456 } 457 458 private Topic updateWikidataEntity(Topic entity, JSONObject entity_response, String lang) { 459 DeepaMehtaTransaction tx = dms.beginTx(); 460 try { 461 // Update existing search entity topic 462 ChildTopicsModel entity_composite = buildWikidataEntityModel(entity_response, lang); 463 TopicModel entity_model = new TopicModel(entity.getId(), entity_composite); 464 dms.updateTopic(entity_model); 465 log.fine("Wikidata Search Entity Updated (" + 466 entity_composite.getString(WD_SEARCH_ENTITY_TYPE_URI)+ "): \"" + entity.getSimpleValue() +"\" - FINE!"); 467 tx.success(); 468 return entity; 469 } catch (Exception ex) { 470 log.warning("FAILED to UPDATE \"Wikidata Search Entity\" caused by " + ex.getMessage()); 471 tx.failure(); 472 } finally { 473 tx.finish(); 474 } 475 return null; 476 } 477 478 private ChildTopicsModel buildWikidataEntityModel(JSONObject entity_response, String lang) { 479 ChildTopicsModel entity_composite = new ChildTopicsModel(); 480 try { 481 String id = entity_response.getString("id"); 482 String type = entity_response.getString("type"); 483 entity_composite = new ChildTopicsModel(); 484 // main label 485 if (entity_response.has("labels")) { 486 JSONObject labels = entity_response.getJSONObject("labels"); 487 JSONObject languaged_label = labels.getJSONObject(lang); 488 String label = languaged_label.getString("value"); 489 entity_composite.put(WD_SEARCH_ENTITY_LABEL_URI, label); 490 } 491 // main description 492 if (entity_response.has("descriptions")) { 493 JSONObject descriptions = entity_response.getJSONObject("descriptions"); 494 JSONObject languaged_descr = descriptions.getJSONObject(lang); 495 String description = languaged_descr.getString("value"); 496 entity_composite.put(WD_SEARCH_ENTITY_DESCR_URI, description); 497 } 498 // aliases 499 if (entity_response.has("aliases")) { 500 JSONObject aliases = entity_response.getJSONObject("aliases"); 501 JSONArray languaged_aliases = aliases.getJSONArray(lang); 502 for (int a=0; a < languaged_aliases.length(); a++) { 503 JSONObject alias_object = languaged_aliases.getJSONObject(a); 504 String alias = alias_object.getString("value"); 505 entity_composite.add(WD_SEARCH_ENTITY_ALIAS_URI, 506 new TopicModel(WD_SEARCH_ENTITY_ALIAS_URI, new SimpleValue(alias))); 507 } 508 } 509 // set wikidata url 510 if (type.equals(WD_SEARCH_ENTITY_TYPE_PROPERTY)) { 511 entity_composite.put(DM_WEBBROWSER_URL, WIKIDATA_ENTITY_URL_PREFIX 512 + WIKIDATA_PROPERTY_ENTITY_URL_PREFIX + id); 513 } else { 514 entity_composite.put(DM_WEBBROWSER_URL, WIKIDATA_ENTITY_URL_PREFIX + id); 515 } 516 // set language-value on entity-result 517 entity_composite.putRef(WD_LANGUAGE_URI, WD_LANGUAGE_DATA_URI_PREFIX + lang); 518 // ### sitelinks 519 /** if (entity_response.has("sitelinks")) { 520 JSONObject sitelinks = entity_response.getJSONObject("sitelinks"); 521 if (sitelinks.has(lang + "wiki")) { 522 JSONObject sitelink = sitelinks.getJSONObject(lang + "wiki"); 523 entity_composite.put(DM_WEBBROWSER_URL, sitelink.getString("url")); 524 } else { 525 log.warning("There is no sitelink for this item in this language/wiki: " + lang + "wiki"); 526 } 527 } **/ 528 entity_composite.put(WD_SEARCH_ENTITY_TYPE_URI, type); 529 return entity_composite; 530 } catch (JSONException jex) { 531 log.warning("JSONException during build up of the search-entities composite model"); 532 throw new RuntimeException(jex); 533 } 534 } 535 536 private void processWikidataClaims(String json_result, Topic wikidataItem, String language_code) { 537 try { 538 JSONObject response = new JSONObject(json_result); 539 JSONObject result = response.getJSONObject("claims"); 540 // ### Needs to identify if claims (already imported in DM4) are not yet part of the current wikidata-data 541 Iterator properties = result.keys(); 542 log.info("Wikidata Plugin is processing all properties part of related " + result.length() + " CLAIMS"); 543 Topic propertyEntity = null; 544 while (properties.hasNext()) { 545 String property_id = properties.next().toString(); 546 // 1) Load related property-entity 547 propertyEntity = getOrCreateWikidataEntity(property_id, language_code); 548 // HashMap<String, List<Topic>> all_entities = new HashMap<String, List<Topic>>(); 549 JSONArray property_listing = result.getJSONArray(property_id); 550 for (int i=0; i < property_listing.length(); i++) { 551 // 2) fetch related wikidata entity 552 Topic referencedItemEntity = null; 553 JSONObject entity_response = property_listing.getJSONObject(i); 554 JSONObject mainsnak = entity_response.getJSONObject("mainsnak"); 555 String claim_guid = entity_response.getString("id"); 556 // 3) build up item as part of the claim (if so) 557 String itemId = ""; 558 String snakDataType = mainsnak.getString("datatype"); 559 // log.info("SNakDataType=" + snakDataType + "MainSnak" + mainsnak.toString()); 560 JSONObject snakDataValue = mainsnak.getJSONObject("datavalue"); 561 // ..) depending on the various (claimed/realted) value-types 562 if (snakDataType.equals("wikibase-item")) { 563 // log.info("Wikibase Item claimed via \"" + propertyEntity.getSimpleValue() + "\""); 564 JSONObject snakDataValueValue = snakDataValue.getJSONObject("value"); 565 long numericId = snakDataValueValue.getLong("numeric-id"); 566 itemId = "Q" + numericId; // is this always of entity-type "item"? responses looks like. 567 referencedItemEntity = getOrCreateWikidataEntity(itemId, language_code); 568 } else if (snakDataType.equals("commonsMedia")) { 569 // do relate wikidata.commons_media 570 log.info(" --------- Commons Media Item! ------------"); 571 if (snakDataValue.has("value")) { 572 String fileName = snakDataValue.getString("value"); 573 referencedItemEntity = getOrCreateWikimediaCommonsMediaTopic(fileName); 574 log.info(" --- FINE! --- Related Wikimedia Commons File to Wikidata Item!"); 575 } 576 /** **/ 577 // ### make use of WIKIMEDIA_COMMONS_MEDIA_FILE_URL_PREFIX and implement page-renderer 578 } else if (snakDataType.equals("globe-coordinate")) { 579 // do relate wikidata.globe_coordinate 580 // log.fine("Globe Coordinate claimed via \"" + propertyEntity.getSimpleValue() 581 // + "\" ("+language_code+") DEBUG:"); 582 // log.fine(" " + snakDataValue.toString()); 583 } else if (snakDataType.equals("url")) { 584 if (snakDataValue.has("value")) { 585 // ### getOrCreateWebResource() 586 String value = snakDataValue.getString("value"); 587 log.warning("### SKIPPING URL => " + value); 588 } 589 } else if (snakDataType.equals("string")) { 590 if (snakDataValue.has("value")) { 591 String value = snakDataValue.getString("value"); 592 referencedItemEntity = getOrCreateWikidataText(value, language_code); 593 } else { 594 log.warning("Could not access wikidata-text value - json-response EMPTY!"); 595 } 596 } else { 597 log.warning("Value claimed as " + propertyEntity.getSimpleValue() + " is not of any known type" 598 + " wikibase-item but \"" + snakDataType +"\" ("+snakDataValue+")"); 599 // e.g. snakDataType.equals("quantity") 600 } 601 // store topic reference to (new or already existing) wikidata-entity/ resp. -value topic 602 if (referencedItemEntity != null) { 603 createWikidataClaimEdge(claim_guid, wikidataItem, referencedItemEntity, 604 propertyEntity); 605 } else { 606 log.warning("SKIPPED creating claim of type \""+snakDataType+"\" value for " 607 + "\""+propertyEntity.getSimpleValue()+"\""); 608 } 609 } 610 /** Iterator entity_iterator = all_entities.keySet().iterator(); 611 StringBuffer requesting_ids = new StringBuffer(); 612 while (entity_iterator.hasNext()) { 613 String entity_id = entity_iterator.next().toString(); 614 requesting_ids.append(entity_id + "|"); 615 } 616 log.info("Requesting ALL ITEMS for " +property_id+ ": " + requesting_ids.toString()); 617 omitting this solution bcause: "*": "Too many values supplied for parameter 'ids': the limit is 50" **/ 618 } 619 } catch (JSONException ex) { 620 log.warning("JSONException during processing a wikidata claim. " + ex.getMessage()); 621 throw new RuntimeException(ex); 622 } 623 } 624 625 private Association createWikidataClaimEdge (String claim_guid, Topic one, Topic two, Topic property) { 626 Association claim = null; 627 try { 628 if (!associationExists(WD_ENTITY_CLAIM_EDGE, one, two)) { 629 // 1) Create \"Wikidata Claim\"-Edge with GUID 630 claim = dms.createAssociation(new AssociationModel(WD_ENTITY_CLAIM_EDGE, 631 new TopicRoleModel(one.getId(), "dm4.core.default"), 632 new TopicRoleModel(two.getId(), "dm4.core.default"))); 633 claim.setUri(claim_guid); 634 /** log.info("Created \"Wikidata Claim\" with GUID: " + claim.getUri() +" for \"" + two.getSimpleValue() + 635 " (property: " + property.getSimpleValue() + 636 "\") for \"" + one.getSimpleValue() + "\" - FINE"); **/ 637 // 2) Assign wikidata property (=Wikidata Search Entity) to this claim-edge 638 claim.setChildTopics(new ChildTopicsModel().putRef(WD_SEARCH_ENTITY_URI, 639 property.getUri())); 640 // ### problems with missing aggregated childs for composite assocTypes to be investigated .. 641 dms.updateAssociation(claim.getModel()); 642 claim.loadChildTopics(); 643 } 644 return claim; 645 } catch (Exception e) { 646 log.severe("FAILED to create a \"Claim\" between \""+one.getSimpleValue()+"\" - \""+two.getSimpleValue()); 647 throw new RuntimeException(e); 648 } 649 } 650 651 private Topic getOrCreateWikidataText(String value, String lang) { 652 Topic textValue = null; 653 // 1) query for text-value 654 try { 655 textValue = dms.getTopic(WD_TEXT_TYPE_URI, new SimpleValue(value)); 656 } catch (Exception ex) { 657 // log.info("Could not find a wikidata-text value topic for \"" + value + ex.getMessage() + "\""); 658 } 659 // 2) re-use or create 660 DeepaMehtaTransaction tx = dms.beginTx(); 661 try { 662 if (textValue == null) { 663 textValue = dms.createTopic(new TopicModel(WD_TEXT_TYPE_URI, new SimpleValue(value))); 664 log.info("CREATED \"Wikidata Text\" - \"" + value +"\" (" + lang + ") - OK!"); 665 } /** else { 666 log.info("FETCHED \"Wikidata Text\" - \"" + textValue.getSimpleValue() +"\" " 667 + "(" + lang + ") - Re-using it!"); 668 } **/ 669 tx.success(); 670 return textValue; 671 } catch (Exception ex) { 672 log.warning("FAILURE during creating a wikidata value topic: " + ex.getLocalizedMessage()); 673 throw new RuntimeException(ex); 674 } finally { 675 tx.finish(); 676 } 677 } 678 679 private Topic getOrCreateWikimediaCommonsMediaTopic(String fileName) { 680 Topic mediaTopic = dms.getTopic(WD_COMMONS_MEDIA_NAME_TYPE_URI, new SimpleValue(fileName)); 681 if (mediaTopic == null) { // create new media topic 682 ChildTopicsModel mediaCompositeModel = new ChildTopicsModel() 683 .put(WD_COMMONS_MEDIA_NAME_TYPE_URI, fileName); 684 enrichAboutWikimediaCommonsMetaData(mediaCompositeModel, fileName); 685 TopicModel mediaTopicModel = new TopicModel(WD_COMMONS_MEDIA_TYPE_URI, mediaCompositeModel); 686 mediaTopic = dms.createTopic(mediaTopicModel).loadChildTopics(); 687 log.info("Created new Wikimedia Commons Media Topic \"" + mediaTopic.getSimpleValue().toString()); 688 } else { 689 mediaTopic = mediaTopic.getRelatedTopic("dm4.core.composition", 690 "dm4.core.child", "dm4.core.parent", WD_COMMONS_MEDIA_TYPE_URI); 691 } 692 // reference existing media topic ### here is no update mechanism yet 693 return mediaTopic; 694 } 695 696 private void enrichAboutWikimediaCommonsMetaData(ChildTopicsModel model, String fileName) { 697 // 1) fetch data by name from http://tools.wmflabs.org/magnus-toolserver/commonsapi.php?image= 698 URL requestUri; 699 StringBuffer resultBody = new StringBuffer(); 700 String xml_result = ""; 701 try { 702 requestUri = new URL("http://tools.wmflabs.org/magnus-toolserver/commonsapi.php?image=" 703 + URLEncoder.encode(fileName, CHARSET)); 704 log.fine("Requesting Wikimedia Commons Item Details: " + requestUri.toString()); 705 // 2) initiate request 706 HttpURLConnection connection = (HttpURLConnection) requestUri.openConnection(); 707 connection.setRequestMethod("GET"); 708 connection.setRequestProperty("User-Agent", "DeepaMehta "+DEEPAMEHTA_VERSION+" - " 709 + "Wikidata Search " + WIKIDATA_TYPE_SEARCH_VERSION); 710 // 3) check the response 711 int httpStatusCode = connection.getResponseCode(); 712 if (httpStatusCode != HttpURLConnection.HTTP_OK) { 713 throw new WebApplicationException(new Throwable("Error with HTTPConnection."), 714 Status.INTERNAL_SERVER_ERROR); 715 } 716 // 4) read in the response 717 BufferedReader rd = new BufferedReader(new InputStreamReader(connection.getInputStream(), CHARSET)); 718 for (String input; (input = rd.readLine()) != null;) { 719 resultBody.append(input); 720 } 721 rd.close(); 722 // 5) process response 723 if (resultBody.toString().isEmpty()) { 724 throw new WebApplicationException(new RuntimeException("Wikidata was silent."), 725 Status.NO_CONTENT); 726 } else { 727 DocumentBuilder builder; 728 Document document; 729 xml_result = resultBody.toString(); 730 builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); 731 document = builder.parse(new InputSource(new ByteArrayInputStream(xml_result.getBytes("utf-8")))); 732 NodeList responses = document.getElementsByTagName("response"); 733 // Node defaultLanguageDescr = responses.item(1).getFirstChild(); 734 Node fileElement = responses.item(0).getFirstChild(); 735 // 736 Node resourceUrls = fileElement.getChildNodes().item(2); 737 NodeList resourceElements = resourceUrls.getChildNodes(); // file and description as childs 738 Node filePath = resourceElements.item(0); // file at 0 739 Node authorUrl = fileElement.getChildNodes().item(10); // authorUrl HTML at 10 740 Node permission = fileElement.getChildNodes().item(12); // permission HTML at 12 741 // 742 String authorText = (authorUrl != null) ? authorUrl.getTextContent() : "No author information available."; 743 String permissionText = (permission != null) ? permission.getTextContent() : "No license information available."; 744 model.put(WD_COMMONS_MEDIA_PATH_TYPE_URI, filePath.getTextContent()); 745 // model.put(WD_COMMONS_MEDIA_DESCR_TYPE_URI, defaultLanguageDescr.getTextContent()); 746 model.put(WD_COMMONS_AUTHOR_HTML_URI, authorText); 747 model.put(WD_COMMONS_LICENSE_HTML_URI, permissionText); 748 log.fine(" --- Wikimedia Commons Response is FINE ---"); 749 } 750 } catch (MalformedURLException e) { 751 log.log(Level.SEVERE, "Wikidata Plugin: MalformedURLException ...", e); 752 } catch (ParserConfigurationException e) { 753 log.log(Level.SEVERE, "Wikidata Plugin: ParserConfigurationException ...", e); 754 } catch (IOException ioe) { 755 log.log(Level.SEVERE, "Wikidata Plugin: IOException ...", ioe); 756 } catch (SAXException ex) { 757 log.log(Level.SEVERE, null, ex); 758 } catch (Exception e) { 759 log.log(Level.SEVERE, null , e); 760 } 761 } 762 763 // -- 764 // --- DeepaMehta 4 Plugin Related Private Methods 765 // -- 766 767 @Override 768 public void assignToWikidataWorkspace(Topic topic) { 769 if (topic == null) return; 770 Topic wikidataWorkspace = dms.getTopic("uri", new SimpleValue(WS_WIKIDATA_URI)); 771 if (!associationExists("dm4.core.aggregation", topic, wikidataWorkspace)) { 772 dms.createAssociation(new AssociationModel("dm4.core.aggregation", 773 new TopicRoleModel(topic.getId(), "dm4.core.parent"), 774 new TopicRoleModel(wikidataWorkspace.getId(), "dm4.core.child") 775 )); 776 } 777 } 778 779 private boolean associationExists(String edge_type, Topic item, Topic user) { 780 List<Association> results = dms.getAssociations(item.getId(), user.getId(), edge_type); 781 return (results.size() > 0) ? true : false; 782 } 783 784 }