001
002package org.deepamehta.plugins.wikidata;
003
004import de.deepamehta.core.Association;
005import de.deepamehta.core.AssociationType;
006import de.deepamehta.core.RelatedAssociation;
007import de.deepamehta.core.Topic;
008import de.deepamehta.core.model.*;
009import de.deepamehta.core.osgi.PluginActivator;
010import de.deepamehta.core.service.Inject;
011import de.deepamehta.core.service.ResultList;
012import de.deepamehta.core.service.Transactional;
013import de.deepamehta.core.storage.spi.DeepaMehtaTransaction;
014import de.deepamehta.plugins.accesscontrol.service.AccessControlService;
015
016import java.io.BufferedReader;
017import java.io.ByteArrayInputStream;
018import java.io.IOException;
019import java.io.InputStreamReader;
020import java.net.HttpURLConnection;
021import java.net.MalformedURLException;
022import java.net.URL;
023import java.net.URLEncoder;
024import java.util.ArrayList;
025import java.util.Iterator;
026import java.util.List;
027import java.util.logging.Level;
028import java.util.logging.Logger;
029
030import javax.ws.rs.*;
031import javax.ws.rs.core.MediaType;
032import javax.ws.rs.core.Response.Status;
033import javax.xml.parsers.DocumentBuilder;
034import javax.xml.parsers.DocumentBuilderFactory;
035import javax.xml.parsers.ParserConfigurationException;
036
037import org.codehaus.jettison.json.JSONArray;
038import org.codehaus.jettison.json.JSONException;
039import org.codehaus.jettison.json.JSONObject;
040import org.deepamehta.plugins.wikidata.service.WikidataSearchService;
041import org.w3c.dom.DOMException;
042import org.w3c.dom.Document;
043import org.w3c.dom.Node;
044import org.w3c.dom.NodeList;
045import org.xml.sax.InputSource;
046import org.xml.sax.SAXException;
047
048
049
050/**
051 * A very basic plugin to search and explore wikidata.
052 * Allows to turn a \"Wikidata Search Result Entity\" (of type=property) into DeepaMehta 4 AssociationTypes.
053 *
054 * @author Malte Reißig (<malte@mikromedia.de>)
055 * @website https://github.com/mukil/dm4-wikidata
056 * @version 0.0.4.1
057 */
058
059@Path("/wikidata")
060@Consumes("application/json")
061@Produces("application/json")
062public class WikidataSearchPlugin extends PluginActivator implements WikidataSearchService {
063
064    private Logger log = Logger.getLogger(getClass().getName());
065
066    private final String DEEPAMEHTA_VERSION = "DeepaMehta 4.4";
067    private final String WIKIDATA_TYPE_SEARCH_VERSION = "0.0.5-SNAPSHOT";
068    private final String CHARSET = "UTF-8";
069
070    // --- DeepaMehta 4 URIs
071
072    private final String DM_WEBBROWSER_URL = "dm4.webbrowser.url";
073
074    // --- Wikidata DeepaMehta URIs
075
076    private final String WS_WIKIDATA_URI = "org.deepamehta.workspaces.wikidata";
077
078    private final String WD_SEARCH_BUCKET_URI = "org.deepamehta.wikidata.search_bucket";
079    private final String WD_SEARCH_QUERY_URI = "org.deepamehta.wikidata.search_query";
080
081    private final String WD_LANGUAGE_URI = "org.deepamehta.wikidata.language";
082    // private final String WD_LANGUAGE_NAME_URI = "org.deepamehta.wikidata.language_name";
083    // private final String WD_LANGUAGE_ISO_CODE_URI = "org.deepamehta.wikidata.language_code_iso";
084    private final String WD_LANGUAGE_DATA_URI_PREFIX = "org.deepamehta.wikidata.lang_";
085
086    private final String WD_SEARCH_ENTITY_URI = "org.deepamehta.wikidata.search_entity";
087    private final String WD_SEARCH_ENTITY_LABEL_URI = "org.deepamehta.wikidata.search_entity_label";
088    private final String WD_SEARCH_ENTITY_TYPE_URI = "org.deepamehta.wikidata.search_entity_type";
089    private final String WD_SEARCH_ENTITY_ORDINAL_NR = "org.deepamehta.wikidata.search_ordinal_nr";
090    private final String WD_SEARCH_ENTITY_DESCR_URI = "org.deepamehta.wikidata.search_entity_description";
091    private final String WD_SEARCH_ENTITY_ALIAS_URI = "org.deepamehta.wikidata.search_entity_alias";
092    private final String WD_SEARCH_ENTITIY_DATA_URI_PREFIX = "org.deepamehta.wikidata.entity_";
093
094    private final String WD_TEXT_TYPE_URI = "org.deepamehta.wikidata.text";
095    
096    private final String WD_COMMONS_MEDIA_TYPE_URI = "org.deepamehta.wikidata.commons_media";
097    private final String WD_COMMONS_MEDIA_NAME_TYPE_URI = "org.deepamehta.wikidata.commons_media_name";
098    private final String WD_COMMONS_MEDIA_PATH_TYPE_URI = "org.deepamehta.wikidata.commons_media_path";
099    // private final String WD_COMMONS_MEDIA_TYPE_TYPE_URI = "org.deepamehta.wikidata.commons_media_type";
100    private final String WD_COMMONS_MEDIA_DESCR_TYPE_URI = "org.deepamehta.wikidata.commons_media_descr";
101    private final String WD_COMMONS_AUTHOR_HTML_URI = "org.deepamehta.wikidata.commons_author_html";
102    private final String WD_COMMONS_LICENSE_HTML_URI = "org.deepamehta.wikidata.commons_license_html";
103    // private final String WD_GLOBE_COORDINATE_TYPE_URI = "org.deepamehta.wikidata.globe_coordinate";
104
105    private final String WD_ENTITY_CLAIM_EDGE = "org.deepamehta.wikidata.claim_edge";
106
107    // --- Wikidata Service URIs
108
109    private final String WD_SEARCH_ENTITIES_ENDPOINT =
110            "https://www.wikidata.org/w/api.php?action=wbsearchentities&format=json&limit=50";
111    private final String WD_CHECK_ENTITY_CLAIMS_ENDPOINT =
112            "https://www.wikidata.org/w/api.php?action=wbgetclaims&format=json"; // &ungroupedlist=0
113    private final String WD_GET_ENTITY_ENDPOINT = "https://www.wikidata.org/w/api.php?action=wbgetentities"
114            + "&props=info%7Caliases%7Clabels%7Cdescriptions&format=json"; // sitelinks%2Furls%7C
115    private final String WD_SEARCH_ENTITY_TYPE_PROPERTY = "property";
116    private final String WD_SEARCH_ENTITY_TYPE_ITEM = "item";
117    private final String WD_ENTITY_BASE_URI = "org.wikidata.entity.";
118    
119    private final String LANG_EN = "en";
120    
121    private final String WIKIDATA_ENTITY_URL_PREFIX = "//www.wikidata.org/wiki/";
122    private final String WIKIDATA_PROPERTY_ENTITY_URL_PREFIX = "Property:";
123    // private final String WIKIMEDIA_COMMONS_MEDIA_FILE_URL_PREFIX = "//commons.wikimedia.org/wiki/File:";
124    
125    @Inject
126    private AccessControlService acService = null;
127    
128    
129    
130    // --
131    // --- Public REST API Endpoints
132    // --
133
134    @GET
135    @Path("/search/{entity}/{query}/{language_code}")
136    @Produces(MediaType.APPLICATION_JSON)
137    @Override
138    @Transactional
139    public Topic searchWikidataEntity(@PathParam("query") String query, @PathParam("language_code") String lang,
140                @PathParam("entity") String type) {
141        String json_result = "";
142        StringBuffer resultBody = new StringBuffer();
143        URL requestUri = null;
144        Topic search_bucket = null;
145        // sanity check (set en as default-language if nothing was provided by un-initialized language widget)
146        if (lang == null || lang.equals("undefined")) {
147            log.warning("Wikidata Language Search Option was not provided, now requesting data in EN");
148            lang = LANG_EN;
149        }
150        // start search operation
151        try {
152            // 1) fixme: Authorize request
153            requestUri = new URL(WD_SEARCH_ENTITIES_ENDPOINT + "&search="+ query +"&language="+ lang +"&type=" + type);
154            log.info("Wikidata Search Entities Request: " + requestUri.toString());
155            // 2) initiate request
156            HttpURLConnection connection = (HttpURLConnection) requestUri.openConnection();
157            connection.setRequestMethod("GET");
158            connection.setRequestProperty("User-Agent", "DeepaMehta "+DEEPAMEHTA_VERSION+" - "
159                    + "Wikidata Search " + WIKIDATA_TYPE_SEARCH_VERSION);
160            // 3) check the response
161            int httpStatusCode = connection.getResponseCode();
162            if (httpStatusCode != HttpURLConnection.HTTP_OK) {
163                throw new RuntimeException("Error with HTTPConnection, HTTP Status: " + httpStatusCode);
164            }
165            // 4) read in the response
166            BufferedReader rd = new BufferedReader(new InputStreamReader(connection.getInputStream(), CHARSET));
167            for (String input; (input = rd.readLine()) != null;) {
168                resultBody.append(input);
169            }
170            rd.close();
171            // 5) process response
172            if (resultBody.toString().isEmpty()) {
173                throw new RuntimeException("Wikidata was silent, HTTP Response: No content!");
174            } else {
175                log.fine("Wikidata Search Request Response: " + resultBody.toString());
176                // ..) Create Wikidata Search Bucket
177                ChildTopicsModel bucket_model = new ChildTopicsModel();
178                bucket_model.put(WD_SEARCH_QUERY_URI, query);
179                bucket_model.putRef(WD_LANGUAGE_URI, WD_LANGUAGE_DATA_URI_PREFIX + lang);
180                json_result = resultBody.toString();
181                processWikidataEntitySearch(json_result, bucket_model, type, lang);
182                search_bucket = dms.createTopic(new TopicModel(WD_SEARCH_BUCKET_URI, bucket_model));
183                // workaround: addRef does not (yet) fetchComposite, so fetchComposite=true
184                search_bucket = dms.getTopic(search_bucket.getId());
185                log.info("Wikidata Search Bucket for "+ query +" in ("+ lang +") was CREATED");
186            }
187            search_bucket.loadChildTopics(); // load all child topics
188        } catch (MalformedURLException e) {
189            log.warning("Wikidata Plugin: MalformedURLException ..." + e.getMessage());
190            throw new RuntimeException("Could not find wikidata endpoint - " + requestUri.toString(), e);
191        } catch (IOException ioe) {
192            log.warning("Wikidata Plugin: IOException ..." + ioe.getMessage());
193            throw new WebApplicationException(new Throwable(ioe), Status.BAD_REQUEST);
194        }
195        return search_bucket;
196    }
197
198    @GET
199    @Path("/{entityId}/{language_code}")
200    @Produces(MediaType.APPLICATION_JSON)
201    @Override
202    @Transactional
203    public Topic getOrCreateWikidataEntity(@PathParam("entityId") String entityId,
204        @PathParam("language_code") String language_code) {
205        String json_result = "";
206        StringBuffer resultBody = new StringBuffer();
207        URL requestUri = null;
208        Topic entity = null;
209        // sanity check (set en as default-language if nothing was provided by un-initialized language widget)
210        if (language_code == null || language_code.equals("undefined")) {
211            log.warning("Wikidata Language Search Option was not provided, now requesting data in EN");
212            language_code = LANG_EN;
213        }
214        try {
215            // 1) fixme: Authorize request
216            // &sites=dewiki&&languages=de
217            requestUri = new URL(WD_GET_ENTITY_ENDPOINT + "&ids="+ entityId + "&languages=" + language_code);
218            log.fine("Requesting Wikidata Entity Details: " + requestUri.toString());
219            // 2) initiate request
220            HttpURLConnection connection = (HttpURLConnection) requestUri.openConnection();
221            connection.setRequestMethod("GET");
222            connection.setRequestProperty("User-Agent", "DeepaMehta "+DEEPAMEHTA_VERSION+" - "
223                    + "Wikidata Search " + WIKIDATA_TYPE_SEARCH_VERSION);
224            // 3) check the response
225            int httpStatusCode = connection.getResponseCode();
226            if (httpStatusCode != HttpURLConnection.HTTP_OK) {
227                throw new RuntimeException("Error with HTTPConnection, HTTP Status: " + httpStatusCode);
228            }
229            // 4) read in the response
230            BufferedReader rd = new BufferedReader(new InputStreamReader(connection.getInputStream(), CHARSET));
231            for (String input; (input = rd.readLine()) != null;) {
232                resultBody.append(input);
233            }
234            rd.close();
235            // 5) process response
236            if (resultBody.toString().isEmpty()) {
237                throw new RuntimeException("Wikidata was silent, HTTP Response: No content!");
238            } else {
239                // 6) Create or Update Wikidata Search Entity
240                json_result = resultBody.toString();
241                log.fine("Wikidata Entity Request Response: " + json_result);
242                JSONObject response = new JSONObject(json_result);
243                JSONObject entities = response.getJSONObject("entities");
244                JSONObject response_entity = entities.getJSONObject(entityId);
245                // 0) Check if we need to CREATE or UPDATE our search result entity item
246                Topic existingEntity = dms.getTopic("uri",
247                            new SimpleValue(WD_SEARCH_ENTITIY_DATA_URI_PREFIX + entityId));
248                if (existingEntity == null) {
249                    entity = createWikidataSearchEntity(response_entity, language_code);
250                } else {
251                    // Updates labels, descriptions, aliases, url and (query) language
252                    entity = updateWikidataEntity(existingEntity, response_entity, language_code);
253                }
254                entity.loadChildTopics();
255            }
256        } catch (MalformedURLException e) {
257            log.warning("Wikidata Plugin: MalformedURLException ..." + e.getMessage());
258            throw new RuntimeException("Could not find wikidata endpoint  - " + requestUri.toString(), e);
259        } catch (IOException ioe) {
260            throw new WebApplicationException(new Throwable(ioe), Status.BAD_REQUEST);
261        } catch (JSONException je) {
262            throw new WebApplicationException(new Throwable(je), Status.INTERNAL_SERVER_ERROR);
263        }
264        return entity;
265    }
266
267    /** This method handles the "Import topics" command available on all "Wikidata Search Result" topics. */
268
269    @GET
270    @Path("/check/claims/{id}/{language_code}")
271    @Produces(MediaType.APPLICATION_JSON)
272    @Override
273    @Transactional
274    public Topic loadClaimsAndRelatedWikidataItems(@PathParam("id") long topicId,
275            @PathParam("language_code") String language_option) {
276
277        String json_result = "";
278        StringBuffer resultBody = new StringBuffer();
279        URL requestUri = null;
280        Topic wikidataItem = dms.getTopic(topicId);
281        // 0) sanity check (set en as default-language if nothing was provided by un-initialized language widget)
282        if (language_option == null || language_option.equals("undefined")) {
283            log.warning("Wikidata Language Search Option was not provided, now requesting data in EN.");
284            language_option = LANG_EN;
285        }
286        String wikidataId = wikidataItem.getUri().replaceAll(WD_SEARCH_ENTITIY_DATA_URI_PREFIX, "");
287        try {
288            // 1) ### Authorize request
289            // 2) ### be explicit and add "&rank=normal" to wbgetclaims-call, ### add "&props=references" somewhen
290            requestUri = new URL(WD_CHECK_ENTITY_CLAIMS_ENDPOINT + "&entity=" + wikidataId);
291            log.fine("Requesting Wikidata Entity Claims: " + requestUri.toString());
292            // 2) initiate request
293            HttpURLConnection connection = (HttpURLConnection) requestUri.openConnection();
294            connection.setRequestMethod("GET");
295            connection.setRequestProperty("User-Agent", "DeepaMehta "+DEEPAMEHTA_VERSION+" - "
296                    + "Wikidata Search " + WIKIDATA_TYPE_SEARCH_VERSION);
297            // 3) check the response
298            int httpStatusCode = connection.getResponseCode();
299            if (httpStatusCode != HttpURLConnection.HTTP_OK) {
300                throw new RuntimeException("Error with HTTPConnection, HTTP Status: " + httpStatusCode);
301            }
302            // 4) read in the response
303            BufferedReader rd = new BufferedReader(new InputStreamReader(connection.getInputStream(), CHARSET));
304            for (String input; (input = rd.readLine()) != null;) {
305                resultBody.append(input);
306            }
307            rd.close();
308            // 5) process response
309            if (resultBody.toString().isEmpty()) {
310                throw new RuntimeException("Wikidata was silent, HTTP Response: No content!");
311            } else {
312                json_result = resultBody.toString();
313                log.fine("Wikidata Claim Request Response: " + json_result);
314                processWikidataClaims(json_result, wikidataItem, language_option);
315                log.info("Wikidata Claim Response is FINE");
316            }
317            wikidataItem.loadChildTopics(); // load all child topics
318        } catch (MalformedURLException e) {
319            log.warning("Wikidata Plugin: MalformedURLException ..." + e.getMessage());
320            throw new RuntimeException("Could not find wikidata endpoint - " + requestUri.toString(), e);
321        } catch (IOException ioe) {
322            throw new WebApplicationException(new Throwable(ioe), Status.BAD_REQUEST);
323        }
324        return wikidataItem;
325    }
326
327    @GET
328    @Path("/property/turn/{id}")
329    @Produces(MediaType.APPLICATION_JSON)
330    @Override
331    @Transactional
332    public Topic createWikidataAssociationType(@PathParam("id") long id) {
333        AssociationType association_type = null;
334        try {
335            Topic property_entity = dms.getTopic(id);
336            // 1) Create new Association Type model
337            String property_name = property_entity.getSimpleValue().toString();
338            AssociationTypeModel assoc_type_model = new AssociationTypeModel("org.deepamehta.wikidata.assoctype_"
339                    + property_entity.getUri().replaceAll(WD_SEARCH_ENTITIY_DATA_URI_PREFIX, ""),
340                    property_name, "dm4.core.text");
341            association_type = dms.createAssociationType(assoc_type_model);
342            // 2) Assign to "Wikidata" Workspace
343            assignToWikidataWorkspace(association_type);
344            // 3) Associated search-result-entity to new assoc-type (to keep track)
345            dms.createAssociation(new AssociationModel("dm4.core.association",
346                    new TopicRoleModel(property_entity.getUri(), "dm4.core.default"),
347                    new TopicRoleModel(association_type.getUri(), "dm4.core.default")
348                    ));
349            log.info("Turned wikidata property \""+ property_entity.getUri() +"\" into DM Association Type!");
350        } catch (Error e) {
351            log.warning("OH: The Wikidata Plugin experienced an unforeseen error! "+ e.getMessage());
352        } finally {
353            return association_type;
354        }
355    }
356
357    @GET
358    @Path("/property/related/claims/{id}")
359    @Produces(MediaType.APPLICATION_JSON)
360    @Override
361    public ResultList<RelatedAssociation> getTopicRelatedAssociations (@PathParam("id") long topicId) {
362        Topic topic = dms.getTopic(topicId);
363        ResultList<RelatedAssociation> associations = topic.getRelatedAssociations("dm4.core.aggregation",
364                "dm4.core.child", "dm4.core.parent", "org.deepamehta.wikidata.claim_edge");
365        return associations.loadChildTopics();
366    }
367    
368    // --
369    // ---  Wikidata Search (Application Specific) Private Methods
370    // --
371
372    private void processWikidataEntitySearch(String json_result, ChildTopicsModel search_bucket,
373            String type, String lang) {
374        try {
375            JSONObject response = new JSONObject(json_result);
376            JSONArray result = response.getJSONArray("search");
377            if (result.length() > 0) {
378                for (int i = 0; i < result.length(); i++) {
379                    JSONObject entity_response = result.getJSONObject(i);
380                    // Check if entity already exists
381                    String id = entity_response.getString("id");
382                    Topic existing_entity = dms.getTopic("uri",
383                            new SimpleValue(WD_SEARCH_ENTITIY_DATA_URI_PREFIX + id));
384                    if (existing_entity == null) {
385                        // Create new search entity composite
386                        String name = entity_response.getString("label");
387                        String url = entity_response.getString("url");
388                        //
389                        ChildTopicsModel entity_composite = new ChildTopicsModel();
390                        entity_composite.put(WD_SEARCH_ENTITY_LABEL_URI, name);
391                        if (entity_response.has("description")) {
392                            String description = entity_response.getString("description");
393                            entity_composite.put(WD_SEARCH_ENTITY_DESCR_URI, description);
394                        }
395                        entity_composite.put(DM_WEBBROWSER_URL, url);
396                        // ### fix. aliases add up
397                        if (entity_response.has("aliases")) {
398                            JSONArray aliases = entity_response.getJSONArray("aliases");
399                            for (int a=0; a < aliases.length(); a++) {
400                                String alias = aliases.getString(a);
401                                entity_composite.add(WD_SEARCH_ENTITY_ALIAS_URI,
402                                    new TopicModel(WD_SEARCH_ENTITY_ALIAS_URI, new SimpleValue(alias)));
403                            }
404                        }
405                        // set enity place in resultset
406                        entity_composite.put(WD_SEARCH_ENTITY_ORDINAL_NR, i);
407                        // set entity-type
408                        entity_composite.put(WD_SEARCH_ENTITY_TYPE_URI, type);
409                        // set language-value on entity-result
410                        entity_composite.putRef(WD_LANGUAGE_URI, WD_LANGUAGE_DATA_URI_PREFIX + lang);
411                        TopicModel entity_model = new TopicModel(WD_SEARCH_ENTITIY_DATA_URI_PREFIX + id,
412                                WD_SEARCH_ENTITY_URI, entity_composite);
413                        // create and reference  entity in wikidata search bucket
414                        search_bucket.add(WD_SEARCH_ENTITY_URI, entity_model);
415                    } else {
416                        // reference existing entity in wikidata search bucket by URI
417                        search_bucket.addRef(WD_SEARCH_ENTITY_URI, WD_SEARCH_ENTITIY_DATA_URI_PREFIX + id);
418                    }
419                }
420            }
421        } catch (JSONException ex) {
422            throw new RuntimeException(ex);
423        }
424    }
425
426    private Topic createWikidataSearchEntity(JSONObject entity_response, String lang) {
427        Topic entity = null;
428        DeepaMehtaTransaction tx = dms.beginTx();
429        try {
430            String id = entity_response.getString("id");
431            // Create new search entity composite
432            ChildTopicsModel entity_composite = buildWikidataEntityModel(entity_response, lang);
433            TopicModel entity_model = new TopicModel(WD_SEARCH_ENTITIY_DATA_URI_PREFIX + id,
434                    WD_SEARCH_ENTITY_URI, entity_composite);
435            entity = dms.createTopic(entity_model);
436            log.info("Wikidata Search Entity Created (" +
437                entity_composite.getString(WD_SEARCH_ENTITY_TYPE_URI)+ "): \"" +
438                entity.getSimpleValue() +"\" "+entity.getId()+" - FINE!");
439            tx.success();
440            tx.finish();
441            return entity;
442        } catch (Exception ex) {
443            tx.failure();
444            throw new RuntimeException(ex);
445        }
446    }
447
448    private Topic updateWikidataEntity(Topic entity, JSONObject entity_response, String lang) {
449        DeepaMehtaTransaction tx = dms.beginTx();
450        try {
451            // Update existing search entity topic
452            ChildTopicsModel entity_composite = buildWikidataEntityModel(entity_response, lang);
453            TopicModel entity_model = new TopicModel(entity.getId(), entity_composite);
454            dms.updateTopic(entity_model);
455            log.fine("Wikidata Search Entity Updated (" +
456                entity_composite.getString(WD_SEARCH_ENTITY_TYPE_URI)+ "): \"" + entity.getSimpleValue() +"\" - FINE!");
457            tx.success();
458            tx.finish();
459            return entity;
460        } catch (Exception ex) {
461            tx.failure();
462            throw new RuntimeException(ex);
463        }
464    }
465
466    private ChildTopicsModel buildWikidataEntityModel(JSONObject entity_response, String lang) {
467        ChildTopicsModel entity_composite = null;
468        try {
469            String id = entity_response.getString("id");
470            String type = entity_response.getString("type");
471            entity_composite = new ChildTopicsModel();
472            // main label
473            if (entity_response.has("labels")) {
474                JSONObject labels = entity_response.getJSONObject("labels");
475                JSONObject languaged_label = null;
476                if (labels.has(lang)) {
477                    languaged_label = labels.getJSONObject(lang);
478                    String label = languaged_label.getString("value");
479                    entity_composite.put(WD_SEARCH_ENTITY_LABEL_URI, label);
480                } else {
481                    log.warning("No label found for language \"" + lang + "\" and id " + id);
482                }
483            }
484            // main description
485            if (entity_response.has("descriptions")) {
486                JSONObject descriptions = entity_response.getJSONObject("descriptions");
487                JSONObject languaged_descr = null;
488                if (descriptions.has(lang)) {
489                    languaged_descr = descriptions.getJSONObject(lang);
490                    String description = languaged_descr.getString("value");
491                    entity_composite.put(WD_SEARCH_ENTITY_DESCR_URI, description);
492                } else {
493                    log.warning("No description found for language \"" + lang + "\" and id " + id);
494                }
495            }
496            // aliases
497            if (entity_response.has("aliases")) {
498                JSONObject aliases = entity_response.getJSONObject("aliases");
499                JSONArray languaged_aliases = null;
500                if (aliases.has(lang)) {
501                    languaged_aliases = aliases.getJSONArray(lang);
502                    for (int a=0; a < languaged_aliases.length(); a++) {
503                        JSONObject alias_object = languaged_aliases.getJSONObject(a);
504                        String alias = alias_object.getString("value");
505                        entity_composite.add(WD_SEARCH_ENTITY_ALIAS_URI,
506                            new TopicModel(WD_SEARCH_ENTITY_ALIAS_URI, new SimpleValue(alias)));
507                    }
508                }
509            }
510            // set wikidata url
511            if (type.equals(WD_SEARCH_ENTITY_TYPE_PROPERTY)) {
512                entity_composite.put(DM_WEBBROWSER_URL, WIKIDATA_ENTITY_URL_PREFIX
513                        + WIKIDATA_PROPERTY_ENTITY_URL_PREFIX + id);
514            } else {
515                entity_composite.put(DM_WEBBROWSER_URL, WIKIDATA_ENTITY_URL_PREFIX + id);
516            }
517            // set language-value on entity-result
518            entity_composite.putRef(WD_LANGUAGE_URI, WD_LANGUAGE_DATA_URI_PREFIX + lang);
519            // ### sitelinks
520            /** if (entity_response.has("sitelinks")) {
521                JSONObject sitelinks = entity_response.getJSONObject("sitelinks");
522                if (sitelinks.has(lang + "wiki")) {
523                    JSONObject sitelink = sitelinks.getJSONObject(lang + "wiki");
524                    entity_composite.put(DM_WEBBROWSER_URL, sitelink.getString("url"));
525                } else {
526                    log.warning("There is no sitelink for this item in this language/wiki: " + lang + "wiki");
527                }
528            } **/
529            entity_composite.put(WD_SEARCH_ENTITY_TYPE_URI, type);
530            return entity_composite;
531        } catch (JSONException jex) {
532            throw new RuntimeException(jex);
533        }
534    }
535
536    /**
537     * Fix 1: On each processing delete all outgoing claims and re-create (them) from scratch, this way we
538     * support *deletion* of claims at the remote-site without re-creating the referenced items.
539     * Fix 2: Process qualifierSnaks on each claim (extend migration for that).
540     * Fix 3: Process all references for each claim (simply as URLs?).
541     */
542    private void processWikidataClaims(String json_result, Topic wikidataItem, String language_code) {
543        try {
544            JSONObject response = new JSONObject(json_result);
545            JSONObject result = response.getJSONObject("claims");
546            // Delete all claims going out from this item (me)
547            removeAllClaimsFromThisItem(wikidataItem);
548            wikidataItem = dms.getTopic(wikidataItem.getId());
549            // Then re-create all claims going out from this item (this is our "UPDATE")
550            Iterator properties = result.keys();
551            log.info("Wikidata Plugin is processing all properties part of related " + result.length() + " CLAIMS");
552            Topic propertyEntity = null;
553            while (properties.hasNext()) {
554                String property_id = properties.next().toString();
555                // 1) Load related property-entity
556                propertyEntity = getOrCreateWikidataEntity(property_id, language_code);
557                // HashMap<String, List<Topic>> all_entities = new HashMap<String, List<Topic>>();
558                JSONArray property_listing = result.getJSONArray(property_id);
559                // ### process all claims properly (delete and then create)
560                for (int i=0; i < property_listing.length(); i++) {
561                    // 2) fetch related wikidata entity
562                    Topic referencedItemEntity = null;
563                    JSONObject entity_response = property_listing.getJSONObject(i);
564                    JSONObject mainsnak = entity_response.getJSONObject("mainsnak");
565                    String claim_guid = entity_response.getString("id");
566                    // 3) build up item as part of the claim (if so)
567                    String itemId = "";
568                    String snakDataType = mainsnak.getString("datatype");
569                    // log.info("SNakDataType=" + snakDataType + "MainSnak" + mainsnak.toString());
570                    JSONObject snakDataValue = mainsnak.getJSONObject("datavalue");
571                    // ..) depending on the various (claimed/realted) value-types
572                    if (snakDataType.equals("wikibase-item")) {
573                        // log.info("Wikibase Item claimed via \"" + propertyEntity.getSimpleValue() + "\"");
574                        JSONObject snakDataValueValue = snakDataValue.getJSONObject("value");
575                        long numericId = snakDataValueValue.getLong("numeric-id");
576                        itemId = "Q" + numericId; // is this always of entity-type "item"? responses looks like.
577                        referencedItemEntity = getOrCreateWikidataEntity(itemId, language_code);
578                    } else if (snakDataType.equals("commonsMedia")) {
579                        // do relate wikidata.commons_media
580                        log.info(" --------- Commons Media Item! ------------");
581                        if (snakDataValue.has("value")) {
582                            String fileName = snakDataValue.getString("value");
583                            referencedItemEntity = getOrCreateWikimediaCommonsMediaTopic(fileName);
584                            log.info(" --- FINE! --- Related Wikimedia Commons File to Wikidata Item!");
585                        }
586                        /**  **/
587                        // ### make use of WIKIMEDIA_COMMONS_MEDIA_FILE_URL_PREFIX and implement page-renderer
588                    } else if (snakDataType.equals("globe-coordinate")) {
589                        // do relate wikidata.globe_coordinate
590                        // log.fine("Globe Coordinate claimed via \"" + propertyEntity.getSimpleValue()
591                               // + "\" ("+language_code+") DEBUG:");
592                        // log.fine("  " + snakDataValue.toString());
593                    } else if (snakDataType.equals("url")) {
594                        if (snakDataValue.has("value")) {
595                            // ### getOrCreateWebResource()
596                            String value = snakDataValue.getString("value");
597                            log.warning("### SKIPPING URL => " + value);
598                        }
599                    } else if (snakDataType.equals("string")) {
600                        if (snakDataValue.has("value")) {
601                            String value = snakDataValue.getString("value");
602                            referencedItemEntity = getOrCreateWikidataText(value, language_code);
603                        } else {
604                            log.warning("Could not access wikidata-text value - json-response EMPTY!");
605                        }
606                    } else if (snakDataType.equals("quantity")) {
607                        if (snakDataValue.has("value")) {
608                            JSONObject value = snakDataValue.getJSONObject("value");
609                            if (value.has("amount")) {
610                                String amount = value.getString("amount");
611                                referencedItemEntity = getOrCreateWikidataText(amount, language_code);
612                            } else {
613                               log.warning("Could not access wikidata-text value - AMOUNT EMPTY!");
614                            }
615                        } else {
616                            log.warning("Could not access wikidata-text value - NO VALUE SET!");
617                        }
618                    } else {
619                        log.warning("Value claimed as " + propertyEntity.getSimpleValue() + " is not of any known type"
620                                + " wikibase-item but \"" + snakDataType +"\" ("+snakDataValue+")");
621                        // e.g. snakDataType.equals("quantity")
622                    }
623                    // store topic reference to (new or already existing) wikidata-entity/ resp. -value topic
624                    if (referencedItemEntity != null) {
625                        createWikidataClaimEdge(claim_guid, wikidataItem, referencedItemEntity,
626                            propertyEntity);
627                    } else {
628                        log.warning("SKIPPED creating claim of type \""+snakDataType+"\" value for "
629                                + "\""+propertyEntity.getSimpleValue()+"\" on \"" + wikidataItem.getSimpleValue()+"\"");
630                    }
631                }
632                /** Iterator entity_iterator = all_entities.keySet().iterator();
633                StringBuffer requesting_ids = new StringBuffer();
634                while (entity_iterator.hasNext()) {
635                    String entity_id = entity_iterator.next().toString();
636                    requesting_ids.append(entity_id + "|");
637                }
638                log.info("Requesting ALL ITEMS for " +property_id+ ": " + requesting_ids.toString());
639                omitting this solution bcause: "*": "Too many values supplied for parameter 'ids': the limit is 50" **/
640            }
641        } catch (JSONException ex) {
642            log.warning("JSONException during processing a wikidata claim. " + ex.getMessage());
643            throw new RuntimeException(ex);
644        }
645    }
646
647    private void removeAllClaimsFromThisItem(Topic wikidataItem) {
648        List<Association> all_claims = wikidataItem.getAssociations();
649        ArrayList<Association> claims_to_be_deleted = new ArrayList();
650        for (Association claim : all_claims) {
651            if (claim.getTypeUri().equals(WD_ENTITY_CLAIM_EDGE)) {
652                if (claim.getRole1().getModel().getRoleTypeUri().equals("dm4.core.default")
653                    && claim.getRole2().getModel().getRoleTypeUri().equals("dm4.core.default")) {
654                    // just delete _all_ old, un-directed associations invloving me (to re-import them with direction)
655                    claims_to_be_deleted.add(claim);
656                }
657                // every "claim" where i am the "parent" is to be deleted and re-created
658                if ((claim.getRole2().getModel().getRoleTypeUri().equals("dm4.core.parent")
659                    && claim.getRole2().getPlayerId() == wikidataItem.getId())
660                    || (claim.getRole1().getModel().getRoleTypeUri().equals("dm4.core.parent") &&
661                        claim.getRole1().getPlayerId() == wikidataItem.getId())) {
662                    if (!(claim.getRole2().getPlayerId() == wikidataItem.getId() && // ### cannot remove association to one-self
663                          claim.getRole1().getPlayerId() == wikidataItem.getId())) {
664                        claims_to_be_deleted.add(claim);
665                    } else {
666                        // ### log command to investigate database with a corrupt db (that is topics with
667                        // self-referential associations)
668                        log.warning("IDENTIFIED association to one-self, skip removal cause it would throw an Error");
669                    }
670                }
671            }
672        }
673        log.info("> " + claims_to_be_deleted.size() + " claims to be DELETED");
674        for (Association edge : claims_to_be_deleted) {
675            DeepaMehtaTransaction dx = dms.beginTx();
676            try {
677                log.info("> Associaton \""+edge.getSimpleValue()+"\" is deleted (" + edge.getUri() + ")"
678                    + " from 1: \""+edge.getRole1().getPlayer().getSimpleValue()+"\" ==> "
679                    + " to 2: \""+edge.getRole2().getPlayer().getSimpleValue() + "\"");
680                dms.deleteAssociation(edge.getId());
681                dx.success();
682            } catch (Exception e) {
683                throw new RuntimeException(e);
684            } finally {
685                dx.finish();
686            }
687        }
688    }
689
690    /**
691     * From Topic plays the role of a parent and to topic plays role of a child,
692     * just like in wikidata in the semantics of a *Claim*.
693     */
694    private Association createWikidataClaimEdge(String claim_guid, Topic from, Topic to, Topic property) {
695        Association claim = null;
696        DeepaMehtaTransaction dx = dms.beginTx();
697        try {
698            if (!associationExists(WD_ENTITY_CLAIM_EDGE, from, to)
699                && (to.getId() != from.getId())) { // ### dm4 does not allow self-referential associations
700                // 1) Create \"Wikidata Claim\"-Edge with GUID
701                claim = dms.createAssociation(new AssociationModel(WD_ENTITY_CLAIM_EDGE,
702                    new TopicRoleModel(from.getId(), "dm4.core.parent"),
703                    new TopicRoleModel(to.getId(), "dm4.core.child")));
704                claim.setUri(claim_guid);
705                /** log.info("Created \"Wikidata Claim\" with GUID: " + claim.getUri() +" for \"" + two.getSimpleValue() +
706                                " (property: " + property.getSimpleValue() +
707                                "\") for \"" + one.getSimpleValue() + "\" - FINE"); **/
708                // 2) Assign wikidata property (=Wikidata Search Entity) to this claim-edge
709                claim.setChildTopics(new ChildTopicsModel().putRef(WD_SEARCH_ENTITY_URI,
710                        property.getUri()));
711                // ### problems with missing aggregated childs for composite assocTypes to be investigated ..
712                dms.updateAssociation(claim.getModel());
713                claim.loadChildTopics();
714            }
715            dx.success();
716            dx.finish();
717            return claim;
718        } catch (Exception e) {
719            log.severe("FAILED to create a \"Claim\" between \""+from.getSimpleValue()+"\" - \""+to.getSimpleValue());
720            dx.failure();
721            throw new RuntimeException(e);
722        }
723    }
724
725    private Topic getOrCreateWikidataText(String value, String lang) {
726        Topic textValue = null;
727        // 1) query for text-value
728        try {
729            textValue = dms.getTopic(WD_TEXT_TYPE_URI, new SimpleValue(value));
730        } catch (Exception ex) {
731            // log.info("Could not find a wikidata-text value topic for \"" + value + ex.getMessage() + "\"");
732        }
733        // 2) re-use  or create
734        DeepaMehtaTransaction tx = dms.beginTx();
735        try {
736            if (textValue == null) {
737                textValue = dms.createTopic(new TopicModel(WD_TEXT_TYPE_URI, new SimpleValue(value)));
738                log.info("CREATED \"Wikidata Text\" - \"" + value +"\" (" + lang + ") - OK!");
739            } /** else {
740                log.info("FETCHED \"Wikidata Text\" - \"" + textValue.getSimpleValue() +"\" "
741                        + "(" + lang + ") - Re-using it!");
742            } **/
743            tx.success();
744            tx.finish();
745            return textValue;
746        } catch (Exception ex) {
747            tx.failure();
748            log.warning("FAILURE during creating a wikidata value topic: " + ex.getLocalizedMessage());
749            throw new RuntimeException(ex);
750        }
751    }
752    
753    private Topic getOrCreateWikimediaCommonsMediaTopic(String fileName) {
754        Topic mediaTopic = dms.getTopic(WD_COMMONS_MEDIA_NAME_TYPE_URI, new SimpleValue(fileName));
755        if (mediaTopic == null) { // create new media topic
756            DeepaMehtaTransaction dx = dms.beginTx();
757            ChildTopicsModel mediaCompositeModel = new ChildTopicsModel()
758                .put(WD_COMMONS_MEDIA_NAME_TYPE_URI, fileName);
759            enrichAboutWikimediaCommonsMetaData(mediaCompositeModel, fileName);
760            TopicModel mediaTopicModel = new TopicModel(WD_COMMONS_MEDIA_TYPE_URI, mediaCompositeModel);
761            try {
762                mediaTopic = dms.createTopic(mediaTopicModel).loadChildTopics();
763                log.info("Created new Wikimedia Commons Media Topic \"" + mediaTopic.getSimpleValue().toString());
764                dx.success();
765                dx.finish();
766            } catch (RuntimeException re) {
767                log.log(Level.SEVERE, "Could not create Wikidata Commons Media Topic", re);
768                dx.failure();
769            }
770        } else {
771            mediaTopic = mediaTopic.getRelatedTopic("dm4.core.composition", 
772                "dm4.core.child", "dm4.core.parent", WD_COMMONS_MEDIA_TYPE_URI);
773        }
774        // reference existing media topic ### here is no update mechanism yet
775        return mediaTopic;
776    }
777
778    private void enrichAboutWikimediaCommonsMetaData(ChildTopicsModel model, String fileName) {
779        // 1) fetch data by name from http://tools.wmflabs.org/magnus-toolserver/commonsapi.php?image=
780        URL requestUri;
781        StringBuffer resultBody = new StringBuffer();
782        String xml_result = "";
783        try {
784            requestUri = new URL("http://tools.wmflabs.org/magnus-toolserver/commonsapi.php?image=" 
785                    + URLEncoder.encode(fileName, CHARSET));
786            log.fine("Requesting Wikimedia Commons Item Details: " + requestUri.toString());
787            // 2) initiate request
788            HttpURLConnection connection = (HttpURLConnection) requestUri.openConnection();
789            connection.setRequestMethod("GET");
790            connection.setRequestProperty("User-Agent", "DeepaMehta "+DEEPAMEHTA_VERSION+" - "
791                    + "Wikidata Search " + WIKIDATA_TYPE_SEARCH_VERSION);
792            // 3) check the response
793            int httpStatusCode = connection.getResponseCode();
794            if (httpStatusCode != HttpURLConnection.HTTP_OK) {
795                throw new RuntimeException("Error with HTTPConnection, HTTP Status: " + httpStatusCode);
796            }
797            // 4) read in the response
798            BufferedReader rd = new BufferedReader(new InputStreamReader(connection.getInputStream(), CHARSET));
799            for (String input; (input = rd.readLine()) != null;) {
800                resultBody.append(input);
801            }
802            rd.close();
803            // 5) process response
804            if (resultBody.toString().isEmpty()) {
805                throw new RuntimeException("Wikidata was silent, HTTP Response: No content!");
806            } else {
807                DocumentBuilder builder;
808                Document document;
809                xml_result = resultBody.toString();
810                builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
811                document = builder.parse(new InputSource(new ByteArrayInputStream(xml_result.getBytes("utf-8"))));
812                NodeList responses = document.getElementsByTagName("response");
813                // Node defaultLanguageDescr = responses.item(1).getFirstChild();
814                Node fileElement = responses.item(0).getFirstChild();
815                // 
816                Node resourceUrls = fileElement.getChildNodes().item(2);
817                NodeList resourceElements = resourceUrls.getChildNodes(); // file and description as childs
818                Node filePath = resourceElements.item(0); // file at 0 
819                Node authorUrl = fileElement.getChildNodes().item(10); // authorUrl HTML at 10
820                Node permission = fileElement.getChildNodes().item(12); // permission HTML at 12
821                // 
822                String authorText = (authorUrl != null) ? authorUrl.getTextContent() : "No author information available.";
823                String permissionText = (permission != null) ? permission.getTextContent() : "No license information available.";
824                model.put(WD_COMMONS_MEDIA_PATH_TYPE_URI, filePath.getTextContent());
825                // model.put(WD_COMMONS_MEDIA_DESCR_TYPE_URI, defaultLanguageDescr.getTextContent());
826                model.put(WD_COMMONS_AUTHOR_HTML_URI, authorText);
827                model.put(WD_COMMONS_LICENSE_HTML_URI, permissionText);
828                log.fine(" --- Wikimedia Commons Response is FINE ---");
829            }
830        } catch (MalformedURLException e) {
831            log.log(Level.SEVERE, "Wikidata Plugin: MalformedURLException ...", e);
832        } catch (ParserConfigurationException e) {
833            log.log(Level.SEVERE, "Wikidata Plugin: ParserConfigurationException ...", e);
834        } catch (IOException ioe) {
835            log.log(Level.SEVERE, "Wikidata Plugin: IOException ...", ioe);
836        } catch (SAXException ex) {
837            log.log(Level.SEVERE, null, ex);
838
839        } catch (DOMException e) {
840            log.log(Level.SEVERE, null , e);
841        }
842    }
843    
844    // --
845    // --- DeepaMehta 4 Plugin Related Private Methods
846    // --
847    
848    @Override
849    public void assignToWikidataWorkspace(Topic topic) {
850        if (topic == null) return;
851        Topic wikidataWorkspace = dms.getTopic("uri", new SimpleValue(WS_WIKIDATA_URI));
852        if (!associationExists("dm4.core.aggregation", topic, wikidataWorkspace)) {
853            dms.createAssociation(new AssociationModel("dm4.core.aggregation",
854                new TopicRoleModel(topic.getId(), "dm4.core.parent"),
855                new TopicRoleModel(wikidataWorkspace.getId(), "dm4.core.child")
856            ));   
857        }
858    }
859
860    private boolean associationExists(String edge_type, Topic item, Topic user) {
861        List<Association> results = dms.getAssociations(item.getId(), user.getId(), edge_type);
862        return (results.size() > 0) ? true : false;
863    }
864
865}