001 002package org.deepamehta.plugins.wikidata; 003 004import de.deepamehta.core.Association; 005import de.deepamehta.core.AssociationType; 006import de.deepamehta.core.RelatedAssociation; 007import de.deepamehta.core.Topic; 008import de.deepamehta.core.model.*; 009import de.deepamehta.core.osgi.PluginActivator; 010import de.deepamehta.core.service.Inject; 011import de.deepamehta.core.service.ResultList; 012import de.deepamehta.core.service.Transactional; 013import de.deepamehta.core.storage.spi.DeepaMehtaTransaction; 014import de.deepamehta.plugins.accesscontrol.service.AccessControlService; 015 016import java.io.BufferedReader; 017import java.io.ByteArrayInputStream; 018import java.io.IOException; 019import java.io.InputStreamReader; 020import java.net.HttpURLConnection; 021import java.net.MalformedURLException; 022import java.net.URL; 023import java.net.URLEncoder; 024import java.util.ArrayList; 025import java.util.Iterator; 026import java.util.List; 027import java.util.logging.Level; 028import java.util.logging.Logger; 029 030import javax.ws.rs.*; 031import javax.ws.rs.core.MediaType; 032import javax.ws.rs.core.Response.Status; 033import javax.xml.parsers.DocumentBuilder; 034import javax.xml.parsers.DocumentBuilderFactory; 035import javax.xml.parsers.ParserConfigurationException; 036 037import org.codehaus.jettison.json.JSONArray; 038import org.codehaus.jettison.json.JSONException; 039import org.codehaus.jettison.json.JSONObject; 040import org.deepamehta.plugins.wikidata.service.WikidataSearchService; 041import org.w3c.dom.DOMException; 042import org.w3c.dom.Document; 043import org.w3c.dom.Node; 044import org.w3c.dom.NodeList; 045import org.xml.sax.InputSource; 046import org.xml.sax.SAXException; 047 048 049 050/** 051 * A very basic plugin to search and explore wikidata. 052 * Allows to turn a \"Wikidata Search Result Entity\" (of type=property) into DeepaMehta 4 AssociationTypes. 053 * 054 * @author Malte Reißig (<malte@mikromedia.de>) 055 * @website https://github.com/mukil/dm4-wikidata 056 * @version 0.0.4.1 057 */ 058 059@Path("/wikidata") 060@Consumes("application/json") 061@Produces("application/json") 062public class WikidataSearchPlugin extends PluginActivator implements WikidataSearchService { 063 064 private Logger log = Logger.getLogger(getClass().getName()); 065 066 private final String DEEPAMEHTA_VERSION = "DeepaMehta 4.4"; 067 private final String WIKIDATA_TYPE_SEARCH_VERSION = "0.0.5-SNAPSHOT"; 068 private final String CHARSET = "UTF-8"; 069 070 // --- DeepaMehta 4 URIs 071 072 private final String DM_WEBBROWSER_URL = "dm4.webbrowser.url"; 073 074 // --- Wikidata DeepaMehta URIs 075 076 private final String WS_WIKIDATA_URI = "org.deepamehta.workspaces.wikidata"; 077 078 private final String WD_SEARCH_BUCKET_URI = "org.deepamehta.wikidata.search_bucket"; 079 private final String WD_SEARCH_QUERY_URI = "org.deepamehta.wikidata.search_query"; 080 081 private final String WD_LANGUAGE_URI = "org.deepamehta.wikidata.language"; 082 // private final String WD_LANGUAGE_NAME_URI = "org.deepamehta.wikidata.language_name"; 083 // private final String WD_LANGUAGE_ISO_CODE_URI = "org.deepamehta.wikidata.language_code_iso"; 084 private final String WD_LANGUAGE_DATA_URI_PREFIX = "org.deepamehta.wikidata.lang_"; 085 086 private final String WD_SEARCH_ENTITY_URI = "org.deepamehta.wikidata.search_entity"; 087 private final String WD_SEARCH_ENTITY_LABEL_URI = "org.deepamehta.wikidata.search_entity_label"; 088 private final String WD_SEARCH_ENTITY_TYPE_URI = "org.deepamehta.wikidata.search_entity_type"; 089 private final String WD_SEARCH_ENTITY_ORDINAL_NR = "org.deepamehta.wikidata.search_ordinal_nr"; 090 private final String WD_SEARCH_ENTITY_DESCR_URI = "org.deepamehta.wikidata.search_entity_description"; 091 private final String WD_SEARCH_ENTITY_ALIAS_URI = "org.deepamehta.wikidata.search_entity_alias"; 092 private final String WD_SEARCH_ENTITIY_DATA_URI_PREFIX = "org.deepamehta.wikidata.entity_"; 093 094 private final String WD_TEXT_TYPE_URI = "org.deepamehta.wikidata.text"; 095 096 private final String WD_COMMONS_MEDIA_TYPE_URI = "org.deepamehta.wikidata.commons_media"; 097 private final String WD_COMMONS_MEDIA_NAME_TYPE_URI = "org.deepamehta.wikidata.commons_media_name"; 098 private final String WD_COMMONS_MEDIA_PATH_TYPE_URI = "org.deepamehta.wikidata.commons_media_path"; 099 // private final String WD_COMMONS_MEDIA_TYPE_TYPE_URI = "org.deepamehta.wikidata.commons_media_type"; 100 private final String WD_COMMONS_MEDIA_DESCR_TYPE_URI = "org.deepamehta.wikidata.commons_media_descr"; 101 private final String WD_COMMONS_AUTHOR_HTML_URI = "org.deepamehta.wikidata.commons_author_html"; 102 private final String WD_COMMONS_LICENSE_HTML_URI = "org.deepamehta.wikidata.commons_license_html"; 103 // private final String WD_GLOBE_COORDINATE_TYPE_URI = "org.deepamehta.wikidata.globe_coordinate"; 104 105 private final String WD_ENTITY_CLAIM_EDGE = "org.deepamehta.wikidata.claim_edge"; 106 107 // --- Wikidata Service URIs 108 109 private final String WD_SEARCH_ENTITIES_ENDPOINT = 110 "https://www.wikidata.org/w/api.php?action=wbsearchentities&format=json&limit=50"; 111 private final String WD_CHECK_ENTITY_CLAIMS_ENDPOINT = 112 "https://www.wikidata.org/w/api.php?action=wbgetclaims&format=json"; // &ungroupedlist=0 113 private final String WD_GET_ENTITY_ENDPOINT = "https://www.wikidata.org/w/api.php?action=wbgetentities" 114 + "&props=info%7Caliases%7Clabels%7Cdescriptions&format=json"; // sitelinks%2Furls%7C 115 private final String WD_SEARCH_ENTITY_TYPE_PROPERTY = "property"; 116 private final String WD_SEARCH_ENTITY_TYPE_ITEM = "item"; 117 private final String WD_ENTITY_BASE_URI = "org.wikidata.entity."; 118 119 private final String LANG_EN = "en"; 120 121 private final String WIKIDATA_ENTITY_URL_PREFIX = "//www.wikidata.org/wiki/"; 122 private final String WIKIDATA_PROPERTY_ENTITY_URL_PREFIX = "Property:"; 123 // private final String WIKIMEDIA_COMMONS_MEDIA_FILE_URL_PREFIX = "//commons.wikimedia.org/wiki/File:"; 124 125 @Inject 126 private AccessControlService acService = null; 127 128 129 130 // -- 131 // --- Public REST API Endpoints 132 // -- 133 134 @GET 135 @Path("/search/{entity}/{query}/{language_code}") 136 @Produces(MediaType.APPLICATION_JSON) 137 @Override 138 @Transactional 139 public Topic searchWikidataEntity(@PathParam("query") String query, @PathParam("language_code") String lang, 140 @PathParam("entity") String type) { 141 String json_result = ""; 142 StringBuffer resultBody = new StringBuffer(); 143 URL requestUri = null; 144 Topic search_bucket = null; 145 // sanity check (set en as default-language if nothing was provided by un-initialized language widget) 146 if (lang == null || lang.equals("undefined")) { 147 log.warning("Wikidata Language Search Option was not provided, now requesting data in EN"); 148 lang = LANG_EN; 149 } 150 // start search operation 151 try { 152 // 1) fixme: Authorize request 153 requestUri = new URL(WD_SEARCH_ENTITIES_ENDPOINT + "&search="+ query +"&language="+ lang +"&type=" + type); 154 log.info("Wikidata Search Entities Request: " + requestUri.toString()); 155 // 2) initiate request 156 HttpURLConnection connection = (HttpURLConnection) requestUri.openConnection(); 157 connection.setRequestMethod("GET"); 158 connection.setRequestProperty("User-Agent", "DeepaMehta "+DEEPAMEHTA_VERSION+" - " 159 + "Wikidata Search " + WIKIDATA_TYPE_SEARCH_VERSION); 160 // 3) check the response 161 int httpStatusCode = connection.getResponseCode(); 162 if (httpStatusCode != HttpURLConnection.HTTP_OK) { 163 throw new RuntimeException("Error with HTTPConnection, HTTP Status: " + httpStatusCode); 164 } 165 // 4) read in the response 166 BufferedReader rd = new BufferedReader(new InputStreamReader(connection.getInputStream(), CHARSET)); 167 for (String input; (input = rd.readLine()) != null;) { 168 resultBody.append(input); 169 } 170 rd.close(); 171 // 5) process response 172 if (resultBody.toString().isEmpty()) { 173 throw new RuntimeException("Wikidata was silent, HTTP Response: No content!"); 174 } else { 175 log.fine("Wikidata Search Request Response: " + resultBody.toString()); 176 // ..) Create Wikidata Search Bucket 177 ChildTopicsModel bucket_model = new ChildTopicsModel(); 178 bucket_model.put(WD_SEARCH_QUERY_URI, query); 179 bucket_model.putRef(WD_LANGUAGE_URI, WD_LANGUAGE_DATA_URI_PREFIX + lang); 180 json_result = resultBody.toString(); 181 processWikidataEntitySearch(json_result, bucket_model, type, lang); 182 search_bucket = dms.createTopic(new TopicModel(WD_SEARCH_BUCKET_URI, bucket_model)); 183 // workaround: addRef does not (yet) fetchComposite, so fetchComposite=true 184 search_bucket = dms.getTopic(search_bucket.getId()); 185 log.info("Wikidata Search Bucket for "+ query +" in ("+ lang +") was CREATED"); 186 } 187 search_bucket.loadChildTopics(); // load all child topics 188 } catch (MalformedURLException e) { 189 log.warning("Wikidata Plugin: MalformedURLException ..." + e.getMessage()); 190 throw new RuntimeException("Could not find wikidata endpoint - " + requestUri.toString(), e); 191 } catch (IOException ioe) { 192 log.warning("Wikidata Plugin: IOException ..." + ioe.getMessage()); 193 throw new WebApplicationException(new Throwable(ioe), Status.BAD_REQUEST); 194 } 195 return search_bucket; 196 } 197 198 @GET 199 @Path("/{entityId}/{language_code}") 200 @Produces(MediaType.APPLICATION_JSON) 201 @Override 202 @Transactional 203 public Topic getOrCreateWikidataEntity(@PathParam("entityId") String entityId, 204 @PathParam("language_code") String language_code) { 205 String json_result = ""; 206 StringBuffer resultBody = new StringBuffer(); 207 URL requestUri = null; 208 Topic entity = null; 209 // sanity check (set en as default-language if nothing was provided by un-initialized language widget) 210 if (language_code == null || language_code.equals("undefined")) { 211 log.warning("Wikidata Language Search Option was not provided, now requesting data in EN"); 212 language_code = LANG_EN; 213 } 214 try { 215 // 1) fixme: Authorize request 216 // &sites=dewiki&&languages=de 217 requestUri = new URL(WD_GET_ENTITY_ENDPOINT + "&ids="+ entityId + "&languages=" + language_code); 218 log.fine("Requesting Wikidata Entity Details: " + requestUri.toString()); 219 // 2) initiate request 220 HttpURLConnection connection = (HttpURLConnection) requestUri.openConnection(); 221 connection.setRequestMethod("GET"); 222 connection.setRequestProperty("User-Agent", "DeepaMehta "+DEEPAMEHTA_VERSION+" - " 223 + "Wikidata Search " + WIKIDATA_TYPE_SEARCH_VERSION); 224 // 3) check the response 225 int httpStatusCode = connection.getResponseCode(); 226 if (httpStatusCode != HttpURLConnection.HTTP_OK) { 227 throw new RuntimeException("Error with HTTPConnection, HTTP Status: " + httpStatusCode); 228 } 229 // 4) read in the response 230 BufferedReader rd = new BufferedReader(new InputStreamReader(connection.getInputStream(), CHARSET)); 231 for (String input; (input = rd.readLine()) != null;) { 232 resultBody.append(input); 233 } 234 rd.close(); 235 // 5) process response 236 if (resultBody.toString().isEmpty()) { 237 throw new RuntimeException("Wikidata was silent, HTTP Response: No content!"); 238 } else { 239 // 6) Create or Update Wikidata Search Entity 240 json_result = resultBody.toString(); 241 log.fine("Wikidata Entity Request Response: " + json_result); 242 JSONObject response = new JSONObject(json_result); 243 JSONObject entities = response.getJSONObject("entities"); 244 JSONObject response_entity = entities.getJSONObject(entityId); 245 // 0) Check if we need to CREATE or UPDATE our search result entity item 246 Topic existingEntity = dms.getTopic("uri", 247 new SimpleValue(WD_SEARCH_ENTITIY_DATA_URI_PREFIX + entityId)); 248 if (existingEntity == null) { 249 entity = createWikidataSearchEntity(response_entity, language_code); 250 } else { 251 // Updates labels, descriptions, aliases, url and (query) language 252 entity = updateWikidataEntity(existingEntity, response_entity, language_code); 253 } 254 entity.loadChildTopics(); 255 } 256 } catch (MalformedURLException e) { 257 log.warning("Wikidata Plugin: MalformedURLException ..." + e.getMessage()); 258 throw new RuntimeException("Could not find wikidata endpoint - " + requestUri.toString(), e); 259 } catch (IOException ioe) { 260 throw new WebApplicationException(new Throwable(ioe), Status.BAD_REQUEST); 261 } catch (JSONException je) { 262 throw new WebApplicationException(new Throwable(je), Status.INTERNAL_SERVER_ERROR); 263 } 264 return entity; 265 } 266 267 /** This method handles the "Import topics" command available on all "Wikidata Search Result" topics. */ 268 269 @GET 270 @Path("/check/claims/{id}/{language_code}") 271 @Produces(MediaType.APPLICATION_JSON) 272 @Override 273 @Transactional 274 public Topic loadClaimsAndRelatedWikidataItems(@PathParam("id") long topicId, 275 @PathParam("language_code") String language_option) { 276 277 String json_result = ""; 278 StringBuffer resultBody = new StringBuffer(); 279 URL requestUri = null; 280 Topic wikidataItem = dms.getTopic(topicId); 281 // 0) sanity check (set en as default-language if nothing was provided by un-initialized language widget) 282 if (language_option == null || language_option.equals("undefined")) { 283 log.warning("Wikidata Language Search Option was not provided, now requesting data in EN."); 284 language_option = LANG_EN; 285 } 286 String wikidataId = wikidataItem.getUri().replaceAll(WD_SEARCH_ENTITIY_DATA_URI_PREFIX, ""); 287 try { 288 // 1) ### Authorize request 289 // 2) ### be explicit and add "&rank=normal" to wbgetclaims-call, ### add "&props=references" somewhen 290 requestUri = new URL(WD_CHECK_ENTITY_CLAIMS_ENDPOINT + "&entity=" + wikidataId); 291 log.fine("Requesting Wikidata Entity Claims: " + requestUri.toString()); 292 // 2) initiate request 293 HttpURLConnection connection = (HttpURLConnection) requestUri.openConnection(); 294 connection.setRequestMethod("GET"); 295 connection.setRequestProperty("User-Agent", "DeepaMehta "+DEEPAMEHTA_VERSION+" - " 296 + "Wikidata Search " + WIKIDATA_TYPE_SEARCH_VERSION); 297 // 3) check the response 298 int httpStatusCode = connection.getResponseCode(); 299 if (httpStatusCode != HttpURLConnection.HTTP_OK) { 300 throw new RuntimeException("Error with HTTPConnection, HTTP Status: " + httpStatusCode); 301 } 302 // 4) read in the response 303 BufferedReader rd = new BufferedReader(new InputStreamReader(connection.getInputStream(), CHARSET)); 304 for (String input; (input = rd.readLine()) != null;) { 305 resultBody.append(input); 306 } 307 rd.close(); 308 // 5) process response 309 if (resultBody.toString().isEmpty()) { 310 throw new RuntimeException("Wikidata was silent, HTTP Response: No content!"); 311 } else { 312 json_result = resultBody.toString(); 313 log.fine("Wikidata Claim Request Response: " + json_result); 314 processWikidataClaims(json_result, wikidataItem, language_option); 315 log.info("Wikidata Claim Response is FINE"); 316 } 317 wikidataItem.loadChildTopics(); // load all child topics 318 } catch (MalformedURLException e) { 319 log.warning("Wikidata Plugin: MalformedURLException ..." + e.getMessage()); 320 throw new RuntimeException("Could not find wikidata endpoint - " + requestUri.toString(), e); 321 } catch (IOException ioe) { 322 throw new WebApplicationException(new Throwable(ioe), Status.BAD_REQUEST); 323 } 324 return wikidataItem; 325 } 326 327 @GET 328 @Path("/property/turn/{id}") 329 @Produces(MediaType.APPLICATION_JSON) 330 @Override 331 @Transactional 332 public Topic createWikidataAssociationType(@PathParam("id") long id) { 333 AssociationType association_type = null; 334 try { 335 Topic property_entity = dms.getTopic(id); 336 // 1) Create new Association Type model 337 String property_name = property_entity.getSimpleValue().toString(); 338 AssociationTypeModel assoc_type_model = new AssociationTypeModel("org.deepamehta.wikidata.assoctype_" 339 + property_entity.getUri().replaceAll(WD_SEARCH_ENTITIY_DATA_URI_PREFIX, ""), 340 property_name, "dm4.core.text"); 341 association_type = dms.createAssociationType(assoc_type_model); 342 // 2) Assign to "Wikidata" Workspace 343 assignToWikidataWorkspace(association_type); 344 // 3) Associated search-result-entity to new assoc-type (to keep track) 345 dms.createAssociation(new AssociationModel("dm4.core.association", 346 new TopicRoleModel(property_entity.getUri(), "dm4.core.default"), 347 new TopicRoleModel(association_type.getUri(), "dm4.core.default") 348 )); 349 log.info("Turned wikidata property \""+ property_entity.getUri() +"\" into DM Association Type!"); 350 } catch (Error e) { 351 log.warning("OH: The Wikidata Plugin experienced an unforeseen error! "+ e.getMessage()); 352 } finally { 353 return association_type; 354 } 355 } 356 357 @GET 358 @Path("/property/related/claims/{id}") 359 @Produces(MediaType.APPLICATION_JSON) 360 @Override 361 public ResultList<RelatedAssociation> getTopicRelatedAssociations (@PathParam("id") long topicId) { 362 Topic topic = dms.getTopic(topicId); 363 ResultList<RelatedAssociation> associations = topic.getRelatedAssociations("dm4.core.aggregation", 364 "dm4.core.child", "dm4.core.parent", "org.deepamehta.wikidata.claim_edge"); 365 return associations.loadChildTopics(); 366 } 367 368 // -- 369 // --- Wikidata Search (Application Specific) Private Methods 370 // -- 371 372 private void processWikidataEntitySearch(String json_result, ChildTopicsModel search_bucket, 373 String type, String lang) { 374 try { 375 JSONObject response = new JSONObject(json_result); 376 JSONArray result = response.getJSONArray("search"); 377 if (result.length() > 0) { 378 for (int i = 0; i < result.length(); i++) { 379 JSONObject entity_response = result.getJSONObject(i); 380 // Check if entity already exists 381 String id = entity_response.getString("id"); 382 Topic existing_entity = dms.getTopic("uri", 383 new SimpleValue(WD_SEARCH_ENTITIY_DATA_URI_PREFIX + id)); 384 if (existing_entity == null) { 385 // Create new search entity composite 386 String name = entity_response.getString("label"); 387 String url = entity_response.getString("url"); 388 // 389 ChildTopicsModel entity_composite = new ChildTopicsModel(); 390 entity_composite.put(WD_SEARCH_ENTITY_LABEL_URI, name); 391 if (entity_response.has("description")) { 392 String description = entity_response.getString("description"); 393 entity_composite.put(WD_SEARCH_ENTITY_DESCR_URI, description); 394 } 395 entity_composite.put(DM_WEBBROWSER_URL, url); 396 // ### fix. aliases add up 397 if (entity_response.has("aliases")) { 398 JSONArray aliases = entity_response.getJSONArray("aliases"); 399 for (int a=0; a < aliases.length(); a++) { 400 String alias = aliases.getString(a); 401 entity_composite.add(WD_SEARCH_ENTITY_ALIAS_URI, 402 new TopicModel(WD_SEARCH_ENTITY_ALIAS_URI, new SimpleValue(alias))); 403 } 404 } 405 // set enity place in resultset 406 entity_composite.put(WD_SEARCH_ENTITY_ORDINAL_NR, i); 407 // set entity-type 408 entity_composite.put(WD_SEARCH_ENTITY_TYPE_URI, type); 409 // set language-value on entity-result 410 entity_composite.putRef(WD_LANGUAGE_URI, WD_LANGUAGE_DATA_URI_PREFIX + lang); 411 TopicModel entity_model = new TopicModel(WD_SEARCH_ENTITIY_DATA_URI_PREFIX + id, 412 WD_SEARCH_ENTITY_URI, entity_composite); 413 // create and reference entity in wikidata search bucket 414 search_bucket.add(WD_SEARCH_ENTITY_URI, entity_model); 415 } else { 416 // reference existing entity in wikidata search bucket by URI 417 search_bucket.addRef(WD_SEARCH_ENTITY_URI, WD_SEARCH_ENTITIY_DATA_URI_PREFIX + id); 418 } 419 } 420 } 421 } catch (JSONException ex) { 422 throw new RuntimeException(ex); 423 } 424 } 425 426 private Topic createWikidataSearchEntity(JSONObject entity_response, String lang) { 427 Topic entity = null; 428 DeepaMehtaTransaction tx = dms.beginTx(); 429 try { 430 String id = entity_response.getString("id"); 431 // Create new search entity composite 432 ChildTopicsModel entity_composite = buildWikidataEntityModel(entity_response, lang); 433 TopicModel entity_model = new TopicModel(WD_SEARCH_ENTITIY_DATA_URI_PREFIX + id, 434 WD_SEARCH_ENTITY_URI, entity_composite); 435 entity = dms.createTopic(entity_model); 436 log.info("Wikidata Search Entity Created (" + 437 entity_composite.getString(WD_SEARCH_ENTITY_TYPE_URI)+ "): \"" + 438 entity.getSimpleValue() +"\" "+entity.getId()+" - FINE!"); 439 tx.success(); 440 tx.finish(); 441 return entity; 442 } catch (Exception ex) { 443 tx.failure(); 444 throw new RuntimeException(ex); 445 } 446 } 447 448 private Topic updateWikidataEntity(Topic entity, JSONObject entity_response, String lang) { 449 DeepaMehtaTransaction tx = dms.beginTx(); 450 try { 451 // Update existing search entity topic 452 ChildTopicsModel entity_composite = buildWikidataEntityModel(entity_response, lang); 453 TopicModel entity_model = new TopicModel(entity.getId(), entity_composite); 454 dms.updateTopic(entity_model); 455 log.fine("Wikidata Search Entity Updated (" + 456 entity_composite.getString(WD_SEARCH_ENTITY_TYPE_URI)+ "): \"" + entity.getSimpleValue() +"\" - FINE!"); 457 tx.success(); 458 tx.finish(); 459 return entity; 460 } catch (Exception ex) { 461 tx.failure(); 462 throw new RuntimeException(ex); 463 } 464 } 465 466 private ChildTopicsModel buildWikidataEntityModel(JSONObject entity_response, String lang) { 467 ChildTopicsModel entity_composite = null; 468 try { 469 String id = entity_response.getString("id"); 470 String type = entity_response.getString("type"); 471 entity_composite = new ChildTopicsModel(); 472 // main label 473 if (entity_response.has("labels")) { 474 JSONObject labels = entity_response.getJSONObject("labels"); 475 JSONObject languaged_label = null; 476 if (labels.has(lang)) { 477 languaged_label = labels.getJSONObject(lang); 478 String label = languaged_label.getString("value"); 479 entity_composite.put(WD_SEARCH_ENTITY_LABEL_URI, label); 480 } else { 481 log.warning("No label found for language \"" + lang + "\" and id " + id); 482 } 483 } 484 // main description 485 if (entity_response.has("descriptions")) { 486 JSONObject descriptions = entity_response.getJSONObject("descriptions"); 487 JSONObject languaged_descr = null; 488 if (descriptions.has(lang)) { 489 languaged_descr = descriptions.getJSONObject(lang); 490 String description = languaged_descr.getString("value"); 491 entity_composite.put(WD_SEARCH_ENTITY_DESCR_URI, description); 492 } else { 493 log.warning("No description found for language \"" + lang + "\" and id " + id); 494 } 495 } 496 // aliases 497 if (entity_response.has("aliases")) { 498 JSONObject aliases = entity_response.getJSONObject("aliases"); 499 JSONArray languaged_aliases = null; 500 if (aliases.has(lang)) { 501 languaged_aliases = aliases.getJSONArray(lang); 502 for (int a=0; a < languaged_aliases.length(); a++) { 503 JSONObject alias_object = languaged_aliases.getJSONObject(a); 504 String alias = alias_object.getString("value"); 505 entity_composite.add(WD_SEARCH_ENTITY_ALIAS_URI, 506 new TopicModel(WD_SEARCH_ENTITY_ALIAS_URI, new SimpleValue(alias))); 507 } 508 } 509 } 510 // set wikidata url 511 if (type.equals(WD_SEARCH_ENTITY_TYPE_PROPERTY)) { 512 entity_composite.put(DM_WEBBROWSER_URL, WIKIDATA_ENTITY_URL_PREFIX 513 + WIKIDATA_PROPERTY_ENTITY_URL_PREFIX + id); 514 } else { 515 entity_composite.put(DM_WEBBROWSER_URL, WIKIDATA_ENTITY_URL_PREFIX + id); 516 } 517 // set language-value on entity-result 518 entity_composite.putRef(WD_LANGUAGE_URI, WD_LANGUAGE_DATA_URI_PREFIX + lang); 519 // ### sitelinks 520 /** if (entity_response.has("sitelinks")) { 521 JSONObject sitelinks = entity_response.getJSONObject("sitelinks"); 522 if (sitelinks.has(lang + "wiki")) { 523 JSONObject sitelink = sitelinks.getJSONObject(lang + "wiki"); 524 entity_composite.put(DM_WEBBROWSER_URL, sitelink.getString("url")); 525 } else { 526 log.warning("There is no sitelink for this item in this language/wiki: " + lang + "wiki"); 527 } 528 } **/ 529 entity_composite.put(WD_SEARCH_ENTITY_TYPE_URI, type); 530 return entity_composite; 531 } catch (JSONException jex) { 532 throw new RuntimeException(jex); 533 } 534 } 535 536 /** 537 * Fix 1: On each processing delete all outgoing claims and re-create (them) from scratch, this way we 538 * support *deletion* of claims at the remote-site without re-creating the referenced items. 539 * Fix 2: Process qualifierSnaks on each claim (extend migration for that). 540 * Fix 3: Process all references for each claim (simply as URLs?). 541 */ 542 private void processWikidataClaims(String json_result, Topic wikidataItem, String language_code) { 543 try { 544 JSONObject response = new JSONObject(json_result); 545 JSONObject result = response.getJSONObject("claims"); 546 // Delete all claims going out from this item (me) 547 removeAllClaimsFromThisItem(wikidataItem); 548 wikidataItem = dms.getTopic(wikidataItem.getId()); 549 // Then re-create all claims going out from this item (this is our "UPDATE") 550 Iterator properties = result.keys(); 551 log.info("Wikidata Plugin is processing all properties part of related " + result.length() + " CLAIMS"); 552 Topic propertyEntity = null; 553 while (properties.hasNext()) { 554 String property_id = properties.next().toString(); 555 // 1) Load related property-entity 556 propertyEntity = getOrCreateWikidataEntity(property_id, language_code); 557 // HashMap<String, List<Topic>> all_entities = new HashMap<String, List<Topic>>(); 558 JSONArray property_listing = result.getJSONArray(property_id); 559 // ### process all claims properly (delete and then create) 560 for (int i=0; i < property_listing.length(); i++) { 561 // 2) fetch related wikidata entity 562 Topic referencedItemEntity = null; 563 JSONObject entity_response = property_listing.getJSONObject(i); 564 JSONObject mainsnak = entity_response.getJSONObject("mainsnak"); 565 String claim_guid = entity_response.getString("id"); 566 // 3) build up item as part of the claim (if so) 567 String itemId = ""; 568 String snakDataType = mainsnak.getString("datatype"); 569 // log.info("SNakDataType=" + snakDataType + "MainSnak" + mainsnak.toString()); 570 JSONObject snakDataValue = mainsnak.getJSONObject("datavalue"); 571 // ..) depending on the various (claimed/realted) value-types 572 if (snakDataType.equals("wikibase-item")) { 573 // log.info("Wikibase Item claimed via \"" + propertyEntity.getSimpleValue() + "\""); 574 JSONObject snakDataValueValue = snakDataValue.getJSONObject("value"); 575 long numericId = snakDataValueValue.getLong("numeric-id"); 576 itemId = "Q" + numericId; // is this always of entity-type "item"? responses looks like. 577 referencedItemEntity = getOrCreateWikidataEntity(itemId, language_code); 578 } else if (snakDataType.equals("commonsMedia")) { 579 // do relate wikidata.commons_media 580 log.info(" --------- Commons Media Item! ------------"); 581 if (snakDataValue.has("value")) { 582 String fileName = snakDataValue.getString("value"); 583 referencedItemEntity = getOrCreateWikimediaCommonsMediaTopic(fileName); 584 log.info(" --- FINE! --- Related Wikimedia Commons File to Wikidata Item!"); 585 } 586 /** **/ 587 // ### make use of WIKIMEDIA_COMMONS_MEDIA_FILE_URL_PREFIX and implement page-renderer 588 } else if (snakDataType.equals("globe-coordinate")) { 589 // do relate wikidata.globe_coordinate 590 // log.fine("Globe Coordinate claimed via \"" + propertyEntity.getSimpleValue() 591 // + "\" ("+language_code+") DEBUG:"); 592 // log.fine(" " + snakDataValue.toString()); 593 } else if (snakDataType.equals("url")) { 594 if (snakDataValue.has("value")) { 595 // ### getOrCreateWebResource() 596 String value = snakDataValue.getString("value"); 597 log.warning("### SKIPPING URL => " + value); 598 } 599 } else if (snakDataType.equals("string")) { 600 if (snakDataValue.has("value")) { 601 String value = snakDataValue.getString("value"); 602 referencedItemEntity = getOrCreateWikidataText(value, language_code); 603 } else { 604 log.warning("Could not access wikidata-text value - json-response EMPTY!"); 605 } 606 } else if (snakDataType.equals("quantity")) { 607 if (snakDataValue.has("value")) { 608 JSONObject value = snakDataValue.getJSONObject("value"); 609 if (value.has("amount")) { 610 String amount = value.getString("amount"); 611 referencedItemEntity = getOrCreateWikidataText(amount, language_code); 612 } else { 613 log.warning("Could not access wikidata-text value - AMOUNT EMPTY!"); 614 } 615 } else { 616 log.warning("Could not access wikidata-text value - NO VALUE SET!"); 617 } 618 } else { 619 log.warning("Value claimed as " + propertyEntity.getSimpleValue() + " is not of any known type" 620 + " wikibase-item but \"" + snakDataType +"\" ("+snakDataValue+")"); 621 // e.g. snakDataType.equals("quantity") 622 } 623 // store topic reference to (new or already existing) wikidata-entity/ resp. -value topic 624 if (referencedItemEntity != null) { 625 createWikidataClaimEdge(claim_guid, wikidataItem, referencedItemEntity, 626 propertyEntity); 627 } else { 628 log.warning("SKIPPED creating claim of type \""+snakDataType+"\" value for " 629 + "\""+propertyEntity.getSimpleValue()+"\" on \"" + wikidataItem.getSimpleValue()+"\""); 630 } 631 } 632 /** Iterator entity_iterator = all_entities.keySet().iterator(); 633 StringBuffer requesting_ids = new StringBuffer(); 634 while (entity_iterator.hasNext()) { 635 String entity_id = entity_iterator.next().toString(); 636 requesting_ids.append(entity_id + "|"); 637 } 638 log.info("Requesting ALL ITEMS for " +property_id+ ": " + requesting_ids.toString()); 639 omitting this solution bcause: "*": "Too many values supplied for parameter 'ids': the limit is 50" **/ 640 } 641 } catch (JSONException ex) { 642 log.warning("JSONException during processing a wikidata claim. " + ex.getMessage()); 643 throw new RuntimeException(ex); 644 } 645 } 646 647 private void removeAllClaimsFromThisItem(Topic wikidataItem) { 648 List<Association> all_claims = wikidataItem.getAssociations(); 649 ArrayList<Association> claims_to_be_deleted = new ArrayList(); 650 for (Association claim : all_claims) { 651 if (claim.getTypeUri().equals(WD_ENTITY_CLAIM_EDGE)) { 652 if (claim.getRole1().getModel().getRoleTypeUri().equals("dm4.core.default") 653 && claim.getRole2().getModel().getRoleTypeUri().equals("dm4.core.default")) { 654 // just delete _all_ old, un-directed associations invloving me (to re-import them with direction) 655 claims_to_be_deleted.add(claim); 656 } 657 // every "claim" where i am the "parent" is to be deleted and re-created 658 if ((claim.getRole2().getModel().getRoleTypeUri().equals("dm4.core.parent") 659 && claim.getRole2().getPlayerId() == wikidataItem.getId()) 660 || (claim.getRole1().getModel().getRoleTypeUri().equals("dm4.core.parent") && 661 claim.getRole1().getPlayerId() == wikidataItem.getId())) { 662 if (!(claim.getRole2().getPlayerId() == wikidataItem.getId() && // ### cannot remove association to one-self 663 claim.getRole1().getPlayerId() == wikidataItem.getId())) { 664 claims_to_be_deleted.add(claim); 665 } else { 666 // ### log command to investigate database with a corrupt db (that is topics with 667 // self-referential associations) 668 log.warning("IDENTIFIED association to one-self, skip removal cause it would throw an Error"); 669 } 670 } 671 } 672 } 673 log.info("> " + claims_to_be_deleted.size() + " claims to be DELETED"); 674 for (Association edge : claims_to_be_deleted) { 675 DeepaMehtaTransaction dx = dms.beginTx(); 676 try { 677 log.info("> Associaton \""+edge.getSimpleValue()+"\" is deleted (" + edge.getUri() + ")" 678 + " from 1: \""+edge.getRole1().getPlayer().getSimpleValue()+"\" ==> " 679 + " to 2: \""+edge.getRole2().getPlayer().getSimpleValue() + "\""); 680 dms.deleteAssociation(edge.getId()); 681 dx.success(); 682 } catch (Exception e) { 683 throw new RuntimeException(e); 684 } finally { 685 dx.finish(); 686 } 687 } 688 } 689 690 /** 691 * From Topic plays the role of a parent and to topic plays role of a child, 692 * just like in wikidata in the semantics of a *Claim*. 693 */ 694 private Association createWikidataClaimEdge(String claim_guid, Topic from, Topic to, Topic property) { 695 Association claim = null; 696 DeepaMehtaTransaction dx = dms.beginTx(); 697 try { 698 if (!associationExists(WD_ENTITY_CLAIM_EDGE, from, to) 699 && (to.getId() != from.getId())) { // ### dm4 does not allow self-referential associations 700 // 1) Create \"Wikidata Claim\"-Edge with GUID 701 claim = dms.createAssociation(new AssociationModel(WD_ENTITY_CLAIM_EDGE, 702 new TopicRoleModel(from.getId(), "dm4.core.parent"), 703 new TopicRoleModel(to.getId(), "dm4.core.child"))); 704 claim.setUri(claim_guid); 705 /** log.info("Created \"Wikidata Claim\" with GUID: " + claim.getUri() +" for \"" + two.getSimpleValue() + 706 " (property: " + property.getSimpleValue() + 707 "\") for \"" + one.getSimpleValue() + "\" - FINE"); **/ 708 // 2) Assign wikidata property (=Wikidata Search Entity) to this claim-edge 709 claim.setChildTopics(new ChildTopicsModel().putRef(WD_SEARCH_ENTITY_URI, 710 property.getUri())); 711 // ### problems with missing aggregated childs for composite assocTypes to be investigated .. 712 dms.updateAssociation(claim.getModel()); 713 claim.loadChildTopics(); 714 } 715 dx.success(); 716 dx.finish(); 717 return claim; 718 } catch (Exception e) { 719 log.severe("FAILED to create a \"Claim\" between \""+from.getSimpleValue()+"\" - \""+to.getSimpleValue()); 720 dx.failure(); 721 throw new RuntimeException(e); 722 } 723 } 724 725 private Topic getOrCreateWikidataText(String value, String lang) { 726 Topic textValue = null; 727 // 1) query for text-value 728 try { 729 textValue = dms.getTopic(WD_TEXT_TYPE_URI, new SimpleValue(value)); 730 } catch (Exception ex) { 731 // log.info("Could not find a wikidata-text value topic for \"" + value + ex.getMessage() + "\""); 732 } 733 // 2) re-use or create 734 DeepaMehtaTransaction tx = dms.beginTx(); 735 try { 736 if (textValue == null) { 737 textValue = dms.createTopic(new TopicModel(WD_TEXT_TYPE_URI, new SimpleValue(value))); 738 log.info("CREATED \"Wikidata Text\" - \"" + value +"\" (" + lang + ") - OK!"); 739 } /** else { 740 log.info("FETCHED \"Wikidata Text\" - \"" + textValue.getSimpleValue() +"\" " 741 + "(" + lang + ") - Re-using it!"); 742 } **/ 743 tx.success(); 744 tx.finish(); 745 return textValue; 746 } catch (Exception ex) { 747 tx.failure(); 748 log.warning("FAILURE during creating a wikidata value topic: " + ex.getLocalizedMessage()); 749 throw new RuntimeException(ex); 750 } 751 } 752 753 private Topic getOrCreateWikimediaCommonsMediaTopic(String fileName) { 754 Topic mediaTopic = dms.getTopic(WD_COMMONS_MEDIA_NAME_TYPE_URI, new SimpleValue(fileName)); 755 if (mediaTopic == null) { // create new media topic 756 DeepaMehtaTransaction dx = dms.beginTx(); 757 ChildTopicsModel mediaCompositeModel = new ChildTopicsModel() 758 .put(WD_COMMONS_MEDIA_NAME_TYPE_URI, fileName); 759 enrichAboutWikimediaCommonsMetaData(mediaCompositeModel, fileName); 760 TopicModel mediaTopicModel = new TopicModel(WD_COMMONS_MEDIA_TYPE_URI, mediaCompositeModel); 761 try { 762 mediaTopic = dms.createTopic(mediaTopicModel).loadChildTopics(); 763 log.info("Created new Wikimedia Commons Media Topic \"" + mediaTopic.getSimpleValue().toString()); 764 dx.success(); 765 dx.finish(); 766 } catch (RuntimeException re) { 767 log.log(Level.SEVERE, "Could not create Wikidata Commons Media Topic", re); 768 dx.failure(); 769 } 770 } else { 771 mediaTopic = mediaTopic.getRelatedTopic("dm4.core.composition", 772 "dm4.core.child", "dm4.core.parent", WD_COMMONS_MEDIA_TYPE_URI); 773 } 774 // reference existing media topic ### here is no update mechanism yet 775 return mediaTopic; 776 } 777 778 private void enrichAboutWikimediaCommonsMetaData(ChildTopicsModel model, String fileName) { 779 // 1) fetch data by name from http://tools.wmflabs.org/magnus-toolserver/commonsapi.php?image= 780 URL requestUri; 781 StringBuffer resultBody = new StringBuffer(); 782 String xml_result = ""; 783 try { 784 requestUri = new URL("http://tools.wmflabs.org/magnus-toolserver/commonsapi.php?image=" 785 + URLEncoder.encode(fileName, CHARSET)); 786 log.fine("Requesting Wikimedia Commons Item Details: " + requestUri.toString()); 787 // 2) initiate request 788 HttpURLConnection connection = (HttpURLConnection) requestUri.openConnection(); 789 connection.setRequestMethod("GET"); 790 connection.setRequestProperty("User-Agent", "DeepaMehta "+DEEPAMEHTA_VERSION+" - " 791 + "Wikidata Search " + WIKIDATA_TYPE_SEARCH_VERSION); 792 // 3) check the response 793 int httpStatusCode = connection.getResponseCode(); 794 if (httpStatusCode != HttpURLConnection.HTTP_OK) { 795 throw new RuntimeException("Error with HTTPConnection, HTTP Status: " + httpStatusCode); 796 } 797 // 4) read in the response 798 BufferedReader rd = new BufferedReader(new InputStreamReader(connection.getInputStream(), CHARSET)); 799 for (String input; (input = rd.readLine()) != null;) { 800 resultBody.append(input); 801 } 802 rd.close(); 803 // 5) process response 804 if (resultBody.toString().isEmpty()) { 805 throw new RuntimeException("Wikidata was silent, HTTP Response: No content!"); 806 } else { 807 DocumentBuilder builder; 808 Document document; 809 xml_result = resultBody.toString(); 810 builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); 811 document = builder.parse(new InputSource(new ByteArrayInputStream(xml_result.getBytes("utf-8")))); 812 NodeList responses = document.getElementsByTagName("response"); 813 // Node defaultLanguageDescr = responses.item(1).getFirstChild(); 814 Node fileElement = responses.item(0).getFirstChild(); 815 // 816 Node resourceUrls = fileElement.getChildNodes().item(2); 817 NodeList resourceElements = resourceUrls.getChildNodes(); // file and description as childs 818 Node filePath = resourceElements.item(0); // file at 0 819 Node authorUrl = fileElement.getChildNodes().item(10); // authorUrl HTML at 10 820 Node permission = fileElement.getChildNodes().item(12); // permission HTML at 12 821 // 822 String authorText = (authorUrl != null) ? authorUrl.getTextContent() : "No author information available."; 823 String permissionText = (permission != null) ? permission.getTextContent() : "No license information available."; 824 model.put(WD_COMMONS_MEDIA_PATH_TYPE_URI, filePath.getTextContent()); 825 // model.put(WD_COMMONS_MEDIA_DESCR_TYPE_URI, defaultLanguageDescr.getTextContent()); 826 model.put(WD_COMMONS_AUTHOR_HTML_URI, authorText); 827 model.put(WD_COMMONS_LICENSE_HTML_URI, permissionText); 828 log.fine(" --- Wikimedia Commons Response is FINE ---"); 829 } 830 } catch (MalformedURLException e) { 831 log.log(Level.SEVERE, "Wikidata Plugin: MalformedURLException ...", e); 832 } catch (ParserConfigurationException e) { 833 log.log(Level.SEVERE, "Wikidata Plugin: ParserConfigurationException ...", e); 834 } catch (IOException ioe) { 835 log.log(Level.SEVERE, "Wikidata Plugin: IOException ...", ioe); 836 } catch (SAXException ex) { 837 log.log(Level.SEVERE, null, ex); 838 839 } catch (DOMException e) { 840 log.log(Level.SEVERE, null , e); 841 } 842 } 843 844 // -- 845 // --- DeepaMehta 4 Plugin Related Private Methods 846 // -- 847 848 @Override 849 public void assignToWikidataWorkspace(Topic topic) { 850 if (topic == null) return; 851 Topic wikidataWorkspace = dms.getTopic("uri", new SimpleValue(WS_WIKIDATA_URI)); 852 if (!associationExists("dm4.core.aggregation", topic, wikidataWorkspace)) { 853 dms.createAssociation(new AssociationModel("dm4.core.aggregation", 854 new TopicRoleModel(topic.getId(), "dm4.core.parent"), 855 new TopicRoleModel(wikidataWorkspace.getId(), "dm4.core.child") 856 )); 857 } 858 } 859 860 private boolean associationExists(String edge_type, Topic item, Topic user) { 861 List<Association> results = dms.getAssociations(item.getId(), user.getId(), edge_type); 862 return (results.size() > 0) ? true : false; 863 } 864 865}