Ticket #3733: 3733-brianegge.patch

File 3733-brianegge.patch, 13.5 KB (added by brianegge, 12 years ago)

Incorporates previous patches

  • src/org/openstreetmap/josm/data/validation/TestError.java

     
    326326
    327327    @Override
    328328    public String toString() {
    329         return "TestError [tester=" + tester + ", code=" + code + "]";
     329        return "TestError [tester=" + tester + ", code=" + code + ", message=" + message + "]";
    330330    }
    331331}
  • src/org/openstreetmap/josm/data/validation/tests/SimilarNamedWays.java

     
    11// License: GPL. See LICENSE file for details.
    22package org.openstreetmap.josm.data.validation.tests;
    33
     4import static java.util.regex.Pattern.CASE_INSENSITIVE;
     5import static java.util.regex.Pattern.UNICODE_CASE;
    46import static org.openstreetmap.josm.tools.I18n.tr;
    57
    68import java.awt.geom.Point2D;
    79import java.util.ArrayList;
     10import java.util.Arrays;
    811import java.util.HashMap;
    912import java.util.List;
    1013import java.util.Map;
     14import java.util.regex.Matcher;
     15import java.util.regex.Pattern;
    1116
    1217import org.openstreetmap.josm.data.osm.OsmPrimitive;
    1318import org.openstreetmap.josm.data.osm.Way;
     
    3439    /** The already detected errors */
    3540    private MultiMap<Way, Way> errorWays;
    3641
     42    private ArrayList<NormalizeRule> rules_ = new ArrayList<NormalizeRule>();
     43
     44
    3745    /**
    3846     * Constructor
    3947     */
     
    4048    public SimilarNamedWays() {
    4149        super(tr("Similarly named ways"),
    4250                tr("This test checks for ways with similar names that may have been misspelled."));
     51
     52        // TODO: hardcode these rules for now. Replace them with preferences later
     53        // See https://josm.openstreetmap.de/ticket/3733#comment:19
     54        addRegExprRule("\\d+", "0"); // Highway 66
     55        addRegExprRule("\\d+(st|nd|rd|th)", "0st"); // 3rd Ave
     56        addRegExprRule("^[A-Z] ", "X"); // E Street
     57        addSynonyms("east", "west", "north", "south");
     58        addSynonyms("first", "second", "third");
    4359    }
    4460
    4561    @Override
     
    7793                    continue;
    7894                }
    7995
    80                 int levenshteinDistance = getLevenshteinDistance(name, name2);
    81                 if (0 < levenshteinDistance && levenshteinDistance <= 2) {
     96                if (similaryName(name, name2)) {
    8297                    List<OsmPrimitive> primitives = new ArrayList<>(2);
    8398                    primitives.add(w);
    8499                    primitives.add(w2);
     
    97112     * @param t Second word
    98113     * @return The distance between words
    99114     */
    100     public int getLevenshteinDistance(String s, String t) {
     115    public static int getLevenshteinDistance(String s, String t) {
    101116        int[][] d; // matrix
    102117        int n; // length of s
    103118        int m; // length of t
     
    149164        // Step 7
    150165        return d[n][m];
    151166    }
     167
     168    /**
     169     * Add a regular expression rule.
     170     * @param regExpr the regular expression to search for
     171     * @param replacement a string to replace with, which should match the expression.
     172     */
     173    public void addRegExprRule(String regExpr, String replacement) {
     174        rules_.add(new RegExprRule(regExpr, replacement));
     175    }
     176
     177    /**
     178     * Add a rule with synonym words.
     179     * @param words words which are synonyms
     180     */
     181    public void addSynonyms(String... words) {
     182        for (String word : words) {
     183            rules_.add(new SynonymRule(word, words));
     184        }
     185    }
     186
     187    /**
     188     * Check if two names are similar, but not identical. First both names will be "normalized".
     189     * Afterwards the Levenshtein distance will be calculated.<br>
     190     * Examples for normalization rules:<br>
     191     * <code>replaceAll("\\d+", "0")</code><br>
     192     * would cause similaryName("track 1", "track 2") = false, but similaryName("Track 1", "track 2") = true
     193     * @param name first name to compare
     194     * @param name2 second name to compare
     195     * @return true if the normalized names are different but only a "little bit"
     196     */
     197    public boolean similaryName(String name, String name2) {
     198        // check plain strings
     199        int distance = getLevenshteinDistance(name, name2);
     200        boolean similar = distance>0 && distance<=2;
     201
     202        // try all rules
     203        for (NormalizeRule rule : rules_) {
     204            int levenshteinDistance = getLevenshteinDistance(rule.normalize(name), rule.normalize(name2));
     205            if (levenshteinDistance == 0)
     206                // one rule results in identical names: identical
     207                return false;
     208            else if (levenshteinDistance <= 2) {
     209                // 0 < distance <= 2
     210                similar = true;
     211            }
     212        }
     213        return similar;
     214    }
     215
     216    public interface NormalizeRule {
     217
     218        /**
     219         * Normalize the string by replacing parts.
     220         * @param name name to normalize
     221         * @return normalized string
     222         */
     223        String normalize(String name);
     224
     225    }
     226
     227    public class RegExprRule implements NormalizeRule {
     228        private Pattern regExpr_;
     229        private String replacement_;
     230
     231        public RegExprRule(String expression, String replacement) {
     232            regExpr_ = Pattern.compile(expression);
     233            replacement_ = replacement;
     234        }
     235
     236        @Override
     237        public String normalize(String name) {
     238            return regExpr_.matcher(name).replaceAll(replacement_);
     239        }
     240
     241        @Override
     242        public String toString() {
     243            return "replaceAll(" + regExpr_ + ", " + replacement_ + ")";
     244        }
     245    }
     246
     247    public class SynonymRule implements NormalizeRule {
     248
     249        private String[] words_;
     250        private Pattern regExpr_;
     251        private String replacement_;
     252
     253        public SynonymRule(String replacement, String[] words) {
     254            replacement_ = replacement.toLowerCase();
     255            words_ = words;
     256
     257            // build regular expression for other words (for fast match)
     258            StringBuilder expression = new StringBuilder();
     259            int maxLength = 0;
     260            for (int i = 0; i < words.length; i++) {
     261                if (words[i].length() > maxLength) {
     262                    maxLength = words[i].length();
     263                }
     264                if (expression.length() > 0) {
     265                    expression.append("|");
     266                }
     267                expression.append(Pattern.quote(words[i]));
     268            }
     269            regExpr_ = Pattern.compile(expression.toString(), CASE_INSENSITIVE + UNICODE_CASE);
     270        }
     271
     272        @Override
     273        public String normalize(String name) {
     274            // find first match
     275            Matcher matcher = regExpr_.matcher(name);
     276            if (!matcher.find())
     277                return name;
     278
     279            int start = matcher.start();
     280
     281            // which word matches?
     282            String part = "";
     283            for (int i = 0; i < words_.length; i++) {
     284                String word = words_[i];
     285                part = name.substring(start, start + word.length());
     286                if (word.equalsIgnoreCase(part)) {
     287                    break;
     288                }
     289            }
     290
     291            // replace the word
     292            char[] newName = matcher.replaceFirst(replacement_).toCharArray();
     293
     294            // adjust case (replacement is not shorter than matching word!)
     295            int minLength = Math.min(replacement_.length(), part.length());
     296            for (int i = 0; i < minLength; i++) {
     297                if (Character.isUpperCase(part.charAt(i))) {
     298                    newName[start + i] = Character.toUpperCase(newName[start + i]);
     299                }
     300            }
     301
     302            return new String(newName);
     303        }
     304
     305        @Override
     306        public String toString() {
     307            return "synonyms(" + replacement_ + ", " + Arrays.toString(words_) + ")";
     308        }
     309
     310    }
     311
    152312}
  • test/unit/org/openstreetmap/josm/data/validation/tests/SimilarNamedWaysTest.groovy

     
     1// License: GPL. See LICENSE file for details.
     2package org.openstreetmap.josm.data.validation.tests
     3
     4import org.openstreetmap.josm.JOSMFixture
     5import org.openstreetmap.josm.data.coor.LatLon
     6import org.openstreetmap.josm.data.osm.DataSet
     7import org.openstreetmap.josm.data.osm.Way
     8import org.openstreetmap.josm.data.validation.TestError
     9
     10class SimilarNamedWaysTest extends GroovyTestCase {
     11
     12    @Override
     13    void setUp() {
     14        JOSMFixture.createUnitTestFixture().init();
     15    }
     16
     17    public static List<TestError> testWays(String namea, String nameb) {
     18        def ds = new DataSet()
     19
     20        def n00 = new org.openstreetmap.josm.data.osm.Node(new LatLon(0, 0))
     21        def n10 = new org.openstreetmap.josm.data.osm.Node(new LatLon(1, 0))
     22        def n20 = new org.openstreetmap.josm.data.osm.Node(new LatLon(2, 0))
     23        def n30 = new org.openstreetmap.josm.data.osm.Node(new LatLon(3, 0))
     24        def n40 = new org.openstreetmap.josm.data.osm.Node(new LatLon(4, 0))
     25
     26        ds.addPrimitive(n00)
     27        ds.addPrimitive(n10)
     28        ds.addPrimitive(n20)
     29        ds.addPrimitive(n30)
     30        ds.addPrimitive(n40)
     31
     32        def waya = new Way()
     33        waya.addNode(n00)
     34        waya.addNode(n10)
     35        waya.addNode(n20)
     36        waya.put("name", namea)
     37        def wayb = new Way()
     38        wayb.addNode(n20)
     39        wayb.addNode(n30)
     40        wayb.addNode(n40)
     41        wayb.put("name", nameb)
     42
     43        ds.addPrimitive(waya)
     44        ds.addPrimitive(wayb)
     45
     46        assert waya.isUsable()
     47        assert wayb.isUsable()
     48
     49        def t = new SimilarNamedWays()
     50        t.startTest(null)
     51        t.visit(waya)
     52        t.visit(wayb)
     53        return t.errors
     54    }
     55
     56
     57    void testCombinations() {
     58        assert testWays("Church Street", "Water Street").isEmpty()
     59        assert !testWays("Main Street", "Maim Street").isEmpty()
     60        assert !testWays("First Street", "Frist Street").isEmpty()
     61
     62        assert testWays("1st Street", "2nd Street").isEmpty()
     63        assert testWays("First Avenue", "Second Avenue").isEmpty()
     64        assert testWays("West Main Street", "East Main Street").isEmpty()
     65        assert testWays("A Street", "B Street").isEmpty()
     66    }
     67}
  • test/unit/org/openstreetmap/josm/data/validation/tests/SimilarNamesTest.java

     
     1// License: GPL. For details, see LICENSE file.
     2package org.openstreetmap.josm.data.validation.tests;
     3
     4import static org.junit.Assert.assertEquals;
     5
     6import org.junit.BeforeClass;
     7import org.junit.Test;
     8
     9public class SimilarNamesTest {
     10
     11    private static SimilarNamedWays similarity_;
     12
     13    /**
     14     * Setup SimilarityRules.
     15     */
     16    @BeforeClass
     17    public static void init() {
     18        similarity_ = new SimilarNamedWays();
     19    }
     20
     21    @Test
     22    public void testSimilarNames() {
     23        checkSimilarity("same string", "Testname", "Testname", false);
     24        checkSimilarity("different case", "Testname", "TestName", true);
     25        checkSimilarity("typo", "Testname", "Testxame", true);
     26        checkSimilarity("missing char", "Testname", "Testame", true);
     27        checkSimilarity("additional char", "Testname", "Testxname", true);
     28        checkSimilarity("2 changes", "Testname", "Tostxname", true);
     29        checkSimilarity("3 changes", "Testname", "Tostxnam", false);
     30
     31        // regular expression rule
     32        checkSimilarity("same number", "track 1", "track 1", false);
     33        checkSimilarity("different number", "track 1", "track 2", false);
     34        checkSimilarity("different number length", "track 9", "track 10", false);
     35        checkSimilarity("multiple numbers", "track 8 - 9", "track 10 - 11", false);
     36
     37        checkSimilarity("1st and 2nd", "1st Street", "2nd Street", false);
     38        checkSimilarity("1st case", "1St Street", "1st Street", true);
     39        checkSimilarity("1st and 2nd case", "1St Street", "2nd Street", true);
     40        checkSimilarity("3rd and 4th", "2rd Street", "4th Street", false);
     41
     42        // synonyms
     43        checkSimilarity("east and west", "East Foothill Drive", "West Foothill Drive", false);
     44        checkSimilarity("east and west case", "east Foothill Drive", "West Foothill Drive", true);
     45        checkSimilarity("first and second", "First Street", "Second Street", false);
     46        checkSimilarity("first and second case", "First Street", "second Street", true);
     47        checkSimilarity("first and second typo", "Forst Street", "Second Street", true);
     48        checkSimilarity("first and second typo2", "First Street", "Socond Street", true);
     49        checkSimilarity("first and second 2 changes", "First Street", "Soconds Street", true);
     50        checkSimilarity("first and second 3 changes", "First Street", "Soconds Stret", false);
     51    }
     52
     53    private void checkSimilarity(String message, String name1, String name2, boolean expected) {
     54        boolean actual = similarity_.similaryName(name1, name2);
     55        assertEquals(message, expected, actual);
     56
     57    }
     58}