From 3ffdfd5735ab1357f5bae0d1d423259827038d47 Mon Sep 17 00:00:00 2001 From: Istvan Orban Date: Wed, 17 Sep 2014 21:15:47 +0100 Subject: [PATCH] adding inner hash class to calculate hash to be able to identify duplicates inside array elements. Please note if you have nested array in array duplication check will only work if the nested array is the same in both structures --- .../plugin/jdbc/PlainIndexableObject.java | 63 +++++++++++++++++++ .../jdbc/support/ValueListenerTests.java | 4 +- 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/xbib/elasticsearch/plugin/jdbc/PlainIndexableObject.java b/src/main/java/org/xbib/elasticsearch/plugin/jdbc/PlainIndexableObject.java index 8a99ff17..54de9f82 100644 --- a/src/main/java/org/xbib/elasticsearch/plugin/jdbc/PlainIndexableObject.java +++ b/src/main/java/org/xbib/elasticsearch/plugin/jdbc/PlainIndexableObject.java @@ -178,7 +178,16 @@ protected boolean checkCollapsedMapLength(Map map) { @SuppressWarnings({"unchecked"}) protected XContentBuilder toXContent(XContentBuilder builder, Params params, List list) throws IOException { builder.startArray(); + HashValueCalculator hashUtil = new HashValueCalculator(); + long prev = 0; + long current = 0; for (Object o : list) { + current = hashUtil.calculate(o); + if ( current == prev ) { + continue; + } else { + prev = current; + } if (o instanceof Values) { Values v = (Values) o; v.toXContent(builder, ToXContent.EMPTY_PARAMS); @@ -233,4 +242,58 @@ public int hashCode() { return hash; } + /** + * Implementing hash calculator on es graph object. it is used to identify same graps in the structure + */ + private class HashValueCalculator { + + public long calculate(Object o) throws IOException { + long hash = 0; + if (o instanceof Values) { + Values v = (Values) o; + hash = calculate(v); + } else if (o instanceof Map) { + hash = calculate((Map) o); + } else if (o instanceof List) { + hash = calculate((List) o); + } else { + try { + hash = hash + o.hashCode(); + } catch (Exception e) { + throw new IOException("unknown object class:" + o.getClass().getName()); + } + } + return hash; + } + + public long calculate(Values values) throws IOException { + long hash = 0; + if ( values != null && !values.isNull() ) { + for ( Object o : values.getValues()) { + hash = hash + o.hashCode(); + } + } + return hash; + } + + public long calculate(List list) throws IOException { + long hash = 0; + for (Object o : list) { + hash = hash + calculate(o); + } + return hash; + } + + public long calculate(Map map) throws IOException { + long hash = 0; + for (Map.Entry k : map.entrySet()) { + Object o = k.getValue(); + if (ignoreNull && (o == null || (o instanceof Values) && ((Values) o).isNull())) { + continue; + } + hash = hash + calculate(o); + } + return hash; + } + } } diff --git a/src/test/java/org/xbib/elasticsearch/river/jdbc/support/ValueListenerTests.java b/src/test/java/org/xbib/elasticsearch/river/jdbc/support/ValueListenerTests.java index 567ec82e..e4088b40 100644 --- a/src/test/java/org/xbib/elasticsearch/river/jdbc/support/ValueListenerTests.java +++ b/src/test/java/org/xbib/elasticsearch/river/jdbc/support/ValueListenerTests.java @@ -231,6 +231,7 @@ public void testArrays() throws Exception { List row1 = Arrays.asList("4679", "Tesla, Abe and Elba", "2014-01-06 00:00:00", "3917", "Idris Elba", "9450", "/web/q/g/h/57436356.jpg"); List row2 = Arrays.asList("4679", "Tesla, Abe and Elba", "2014-01-06 00:00:00", "3917", "Idris Elba", "9965", "/web/i/s/q/GS3193626.jpg"); List row3 = Arrays.asList("4679", "Tesla, Abe and Elba", "2014-01-06 00:00:00", "3917", "Idris Elba", "9451", "/web/i/s/q/GS3193626.jpg"); + List row4 = Arrays.asList("4679", "Tesla, Abe and Elba", "2014-01-06 00:00:00", "3918", "Idris Elba", "9451", "/web/i/s/q/GS3193626.jpg"); MockRiverMouth output = new MockRiverMouth(); new StringKeyValueStreamListener() .output(output) @@ -239,9 +240,10 @@ public void testArrays() throws Exception { .values(row1) .values(row2) .values(row3) + .values(row4) .end(); assertEquals(output.data().toString(), - "{[null/null/null/4679]->{blog={name=\"Tesla, Abe and Elba\", published=\"2014-01-06 00:00:00\", association=[{id=\"3917\", name=\"Idris Elba\"}, {id=\"3917\", name=\"Idris Elba\"}, {id=\"3917\", name=\"Idris Elba\"}], attachment=[{id=\"9450\", name=\"/web/q/g/h/57436356.jpg\"}, {id=\"9965\", name=\"/web/i/s/q/GS3193626.jpg\"}, {id=\"9451\", name=\"/web/i/s/q/GS3193626.jpg\"}]}}={\"blog\":{\"name\":\"Tesla, Abe and Elba\",\"published\":\"2014-01-06 00:00:00\",\"association\":[{\"id\":\"3917\",\"name\":\"Idris Elba\"},{\"id\":\"3917\",\"name\":\"Idris Elba\"},{\"id\":\"3917\",\"name\":\"Idris Elba\"}],\"attachment\":[{\"id\":\"9450\",\"name\":\"/web/q/g/h/57436356.jpg\"},{\"id\":\"9965\",\"name\":\"/web/i/s/q/GS3193626.jpg\"},{\"id\":\"9451\",\"name\":\"/web/i/s/q/GS3193626.jpg\"}]}}}" + "{[null/null/null/4679]->{blog={name=\"Tesla, Abe and Elba\", published=\"2014-01-06 00:00:00\", association=[{id=\"3917\", name=\"Idris Elba\"}, {id=\"3917\", name=\"Idris Elba\"}, {id=\"3917\", name=\"Idris Elba\"}, {id=\"3918\", name=\"Idris Elba\"}], attachment=[{id=\"9450\", name=\"/web/q/g/h/57436356.jpg\"}, {id=\"9965\", name=\"/web/i/s/q/GS3193626.jpg\"}, {id=\"9451\", name=\"/web/i/s/q/GS3193626.jpg\"}, {id=\"9451\", name=\"/web/i/s/q/GS3193626.jpg\"}]}}={\"blog\":{\"name\":\"Tesla, Abe and Elba\",\"published\":\"2014-01-06 00:00:00\",\"association\":[{\"id\":\"3917\",\"name\":\"Idris Elba\"},{\"id\":\"3918\",\"name\":\"Idris Elba\"}],\"attachment\":[{\"id\":\"9450\",\"name\":\"/web/q/g/h/57436356.jpg\"},{\"id\":\"9965\",\"name\":\"/web/i/s/q/GS3193626.jpg\"},{\"id\":\"9451\",\"name\":\"/web/i/s/q/GS3193626.jpg\"}]}}}" ); }