Context Navigation

Back to Ticket #3733

Ticket #3733: patch.diff

File patch.diff, 6.6 KB (added by mdk, 14 years ago)
changes

josm/src/org/openstreetmap/josm/data/validation/tests/SimilarNamedWays.java

 // License: GPL. See LICENSE file for details.
 package org.openstreetmap.josm.data.validation.tests;
+import static java.util.regex.Pattern.CASE_INSENSITIVE;
+import static java.util.regex.Pattern.UNICODE_CASE;
 import static org.openstreetmap.josm.tools.I18n.tr;
 import java.awt.geom.Point2D;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 import org.openstreetmap.josm.data.osm.OsmPrimitive;
 import org.openstreetmap.josm.data.osm.Way;
 …
     /** The already detected errors */
     MultiMap<Way, Way> errorWays;
+    private ArrayList<NormalizeRule> rules_ = new ArrayList<NormalizeRule>();
     /**
      * Constructor
      */
 …
                     continue;
+                }
+                int levenshteinDistance = getLevenshteinDistance(name, name2);
+                if (0 < levenshteinDistance && levenshteinDistance <= 2) {
+                if (similaryName(name, name2)) {
                     List<OsmPrimitive> primitives = new ArrayList<OsmPrimitive>();
                     primitives.add(w);
                     primitives.add(w2);
 …
         // Step 7
         return d[n][m];
+    }
+    /**
+     * Add a regular expression rule.
+     * @param regExpr the regular expression to search for
+     * @param replacement a string to replace with, which should match the expression.
+     */
+    public void addRegExprRule(String regExpr, String replacement) {
+        rules_.add(new RegExprRule(regExpr, replacement));
+    }
+    /**
+     * Add a rule with synonym words.
+     * @param words words which are synonyms
+     */
+    public void addSynonyms(String... words) {
+        for (String word : words) {
+            rules_.add(new SynonymRule(word, words));
+        }
+    }
+    /**
+     * Check if two names are similar, but not identical. First both names will be "normalized".
+     * Afterwards the Levenshtein distance will be calculated.<br>
+     * Examples for normalization rules:<br>
+     * <code>replaceAll("\\d+", "0")</code><br>
+     * would cause similaryName("track 1", "track 2") = false, but similaryName("Track 1", "track 2") = true
+     * @param name first name to compare
+     * @param name2 second name to compare
+     * @return true if the normalized names are different but only a "little bit"
+     */
+    public boolean similaryName(String name, String name2) {
+        // check plain strings
+        int distance = getLevenshteinDistance(name, name2);
+        boolean similar = distance>0 && distance<=2;
+        // try all rules
+        for (NormalizeRule rule : rules_) {
+            int levenshteinDistance = getLevenshteinDistance(rule.normalize(name), rule.normalize(name2));
+            if (levenshteinDistance == 0)
+                // one rule results in identical names: identical
+                return false;
+            else if (levenshteinDistance <= 2) {
+                // 0 < distance <= 2
+                similar = true;
+            }
+        }
+        return similar;
+    }
+    public interface NormalizeRule {
+        /**
+         * Normalize the string by replacing parts.
+         * @param name name to normalize
+         * @return normalized string
+         */
+        String normalize(String name);
+    }
+    public class RegExprRule implements NormalizeRule {
+        private Pattern regExpr_;
+        private String replacement_;
+        public RegExprRule(String expression, String replacement) {
+            regExpr_ = Pattern.compile(expression);
+            replacement_ = replacement;
+        }
+        @Override
+        public String normalize(String name) {
+            return regExpr_.matcher(name).replaceAll(replacement_);
+        }
+        @Override
+        public String toString() {
+            return "replaceAll(" + regExpr_ + ", " + replacement_ + ")";
+        }
+    }
+    public class SynonymRule implements NormalizeRule {
+        private String[] words_;
+        private Pattern regExpr_;
+        private String replacement_;
+        public SynonymRule(String replacement, String[] words) {
+            replacement_ = replacement.toLowerCase();
+            words_ = words;
+            // build regular expression for other words (for fast match)
+            StringBuilder expression = new StringBuilder();
+            int maxLength = 0;
+            for (int i = 0; i < words.length; i++) {
+                if (words[i].length() > maxLength) {
+                    maxLength = words[i].length();
+                }
+                if (expression.length() > 0) {
+                    expression.append("|");
+                }
+                expression.append(Pattern.quote(words[i]));
+            }
+            regExpr_ = Pattern.compile(expression.toString(), CASE_INSENSITIVE + UNICODE_CASE);
+        }
+        @Override
+        public String normalize(String name) {
+            // find first match
+            Matcher matcher = regExpr_.matcher(name);
+            if (!matcher.find())
+                return name;
+            int start = matcher.start();
+            // which word matches?
+            String part = "";
+            for (int i = 0; i < words_.length; i++) {
+                String word = words_[i];
+                part = name.substring(start, start + word.length());
+                if (word.equalsIgnoreCase(part)) {
+                    break;
+                }
+            }
+            // replace the word
+            char[] newName = matcher.replaceFirst(replacement_).toCharArray();
+            // adjust case (replacement is not shorter than matching word!)
+            int minLength = Math.min(replacement_.length(), part.length());
+            for (int i = 0; i < minLength; i++) {
+                if (Character.isUpperCase(part.charAt(i))) {
+                    newName[start + i] = Character.toUpperCase(newName[start + i]);
+                }
+            }
+            return new String(newName);
+        }
+        @Override
+        public String toString() {
+            return "synonyms(" + replacement_ + ", " + Arrays.toString(words_) + ")";
+        }
+    }
+}

Download in other formats:

Original Format