Ticket #3733: 3733-brianegge.patch
| File 3733-brianegge.patch, 13.5 KB (added by , 12 years ago) |
|---|
-
src/org/openstreetmap/josm/data/validation/TestError.java
326 326 327 327 @Override 328 328 public String toString() { 329 return "TestError [tester=" + tester + ", code=" + code + " ]";329 return "TestError [tester=" + tester + ", code=" + code + ", message=" + message + "]"; 330 330 } 331 331 } -
src/org/openstreetmap/josm/data/validation/tests/SimilarNamedWays.java
1 1 // License: GPL. See LICENSE file for details. 2 2 package org.openstreetmap.josm.data.validation.tests; 3 3 4 import static java.util.regex.Pattern.CASE_INSENSITIVE; 5 import static java.util.regex.Pattern.UNICODE_CASE; 4 6 import static org.openstreetmap.josm.tools.I18n.tr; 5 7 6 8 import java.awt.geom.Point2D; 7 9 import java.util.ArrayList; 10 import java.util.Arrays; 8 11 import java.util.HashMap; 9 12 import java.util.List; 10 13 import java.util.Map; 14 import java.util.regex.Matcher; 15 import java.util.regex.Pattern; 11 16 12 17 import org.openstreetmap.josm.data.osm.OsmPrimitive; 13 18 import org.openstreetmap.josm.data.osm.Way; … … 34 39 /** The already detected errors */ 35 40 private MultiMap<Way, Way> errorWays; 36 41 42 private ArrayList<NormalizeRule> rules_ = new ArrayList<NormalizeRule>(); 43 44 37 45 /** 38 46 * Constructor 39 47 */ … … 40 48 public SimilarNamedWays() { 41 49 super(tr("Similarly named ways"), 42 50 tr("This test checks for ways with similar names that may have been misspelled.")); 51 52 // TODO: hardcode these rules for now. Replace them with preferences later 53 // See https://josm.openstreetmap.de/ticket/3733#comment:19 54 addRegExprRule("\\d+", "0"); // Highway 66 55 addRegExprRule("\\d+(st|nd|rd|th)", "0st"); // 3rd Ave 56 addRegExprRule("^[A-Z] ", "X"); // E Street 57 addSynonyms("east", "west", "north", "south"); 58 addSynonyms("first", "second", "third"); 43 59 } 44 60 45 61 @Override … … 77 93 continue; 78 94 } 79 95 80 int levenshteinDistance = getLevenshteinDistance(name, name2); 81 if (0 < levenshteinDistance && levenshteinDistance <= 2) { 96 if (similaryName(name, name2)) { 82 97 List<OsmPrimitive> primitives = new ArrayList<>(2); 83 98 primitives.add(w); 84 99 primitives.add(w2); … … 97 112 * @param t Second word 98 113 * @return The distance between words 99 114 */ 100 public int getLevenshteinDistance(String s, String t) {115 public static int getLevenshteinDistance(String s, String t) { 101 116 int[][] d; // matrix 102 117 int n; // length of s 103 118 int m; // length of t … … 149 164 // Step 7 150 165 return d[n][m]; 151 166 } 167 168 /** 169 * Add a regular expression rule. 170 * @param regExpr the regular expression to search for 171 * @param replacement a string to replace with, which should match the expression. 172 */ 173 public void addRegExprRule(String regExpr, String replacement) { 174 rules_.add(new RegExprRule(regExpr, replacement)); 175 } 176 177 /** 178 * Add a rule with synonym words. 179 * @param words words which are synonyms 180 */ 181 public void addSynonyms(String... words) { 182 for (String word : words) { 183 rules_.add(new SynonymRule(word, words)); 184 } 185 } 186 187 /** 188 * Check if two names are similar, but not identical. First both names will be "normalized". 189 * Afterwards the Levenshtein distance will be calculated.<br> 190 * Examples for normalization rules:<br> 191 * <code>replaceAll("\\d+", "0")</code><br> 192 * would cause similaryName("track 1", "track 2") = false, but similaryName("Track 1", "track 2") = true 193 * @param name first name to compare 194 * @param name2 second name to compare 195 * @return true if the normalized names are different but only a "little bit" 196 */ 197 public boolean similaryName(String name, String name2) { 198 // check plain strings 199 int distance = getLevenshteinDistance(name, name2); 200 boolean similar = distance>0 && distance<=2; 201 202 // try all rules 203 for (NormalizeRule rule : rules_) { 204 int levenshteinDistance = getLevenshteinDistance(rule.normalize(name), rule.normalize(name2)); 205 if (levenshteinDistance == 0) 206 // one rule results in identical names: identical 207 return false; 208 else if (levenshteinDistance <= 2) { 209 // 0 < distance <= 2 210 similar = true; 211 } 212 } 213 return similar; 214 } 215 216 public interface NormalizeRule { 217 218 /** 219 * Normalize the string by replacing parts. 220 * @param name name to normalize 221 * @return normalized string 222 */ 223 String normalize(String name); 224 225 } 226 227 public class RegExprRule implements NormalizeRule { 228 private Pattern regExpr_; 229 private String replacement_; 230 231 public RegExprRule(String expression, String replacement) { 232 regExpr_ = Pattern.compile(expression); 233 replacement_ = replacement; 234 } 235 236 @Override 237 public String normalize(String name) { 238 return regExpr_.matcher(name).replaceAll(replacement_); 239 } 240 241 @Override 242 public String toString() { 243 return "replaceAll(" + regExpr_ + ", " + replacement_ + ")"; 244 } 245 } 246 247 public class SynonymRule implements NormalizeRule { 248 249 private String[] words_; 250 private Pattern regExpr_; 251 private String replacement_; 252 253 public SynonymRule(String replacement, String[] words) { 254 replacement_ = replacement.toLowerCase(); 255 words_ = words; 256 257 // build regular expression for other words (for fast match) 258 StringBuilder expression = new StringBuilder(); 259 int maxLength = 0; 260 for (int i = 0; i < words.length; i++) { 261 if (words[i].length() > maxLength) { 262 maxLength = words[i].length(); 263 } 264 if (expression.length() > 0) { 265 expression.append("|"); 266 } 267 expression.append(Pattern.quote(words[i])); 268 } 269 regExpr_ = Pattern.compile(expression.toString(), CASE_INSENSITIVE + UNICODE_CASE); 270 } 271 272 @Override 273 public String normalize(String name) { 274 // find first match 275 Matcher matcher = regExpr_.matcher(name); 276 if (!matcher.find()) 277 return name; 278 279 int start = matcher.start(); 280 281 // which word matches? 282 String part = ""; 283 for (int i = 0; i < words_.length; i++) { 284 String word = words_[i]; 285 part = name.substring(start, start + word.length()); 286 if (word.equalsIgnoreCase(part)) { 287 break; 288 } 289 } 290 291 // replace the word 292 char[] newName = matcher.replaceFirst(replacement_).toCharArray(); 293 294 // adjust case (replacement is not shorter than matching word!) 295 int minLength = Math.min(replacement_.length(), part.length()); 296 for (int i = 0; i < minLength; i++) { 297 if (Character.isUpperCase(part.charAt(i))) { 298 newName[start + i] = Character.toUpperCase(newName[start + i]); 299 } 300 } 301 302 return new String(newName); 303 } 304 305 @Override 306 public String toString() { 307 return "synonyms(" + replacement_ + ", " + Arrays.toString(words_) + ")"; 308 } 309 310 } 311 152 312 } -
test/unit/org/openstreetmap/josm/data/validation/tests/SimilarNamedWaysTest.groovy
1 // License: GPL. See LICENSE file for details. 2 package org.openstreetmap.josm.data.validation.tests 3 4 import org.openstreetmap.josm.JOSMFixture 5 import org.openstreetmap.josm.data.coor.LatLon 6 import org.openstreetmap.josm.data.osm.DataSet 7 import org.openstreetmap.josm.data.osm.Way 8 import org.openstreetmap.josm.data.validation.TestError 9 10 class SimilarNamedWaysTest extends GroovyTestCase { 11 12 @Override 13 void setUp() { 14 JOSMFixture.createUnitTestFixture().init(); 15 } 16 17 public static List<TestError> testWays(String namea, String nameb) { 18 def ds = new DataSet() 19 20 def n00 = new org.openstreetmap.josm.data.osm.Node(new LatLon(0, 0)) 21 def n10 = new org.openstreetmap.josm.data.osm.Node(new LatLon(1, 0)) 22 def n20 = new org.openstreetmap.josm.data.osm.Node(new LatLon(2, 0)) 23 def n30 = new org.openstreetmap.josm.data.osm.Node(new LatLon(3, 0)) 24 def n40 = new org.openstreetmap.josm.data.osm.Node(new LatLon(4, 0)) 25 26 ds.addPrimitive(n00) 27 ds.addPrimitive(n10) 28 ds.addPrimitive(n20) 29 ds.addPrimitive(n30) 30 ds.addPrimitive(n40) 31 32 def waya = new Way() 33 waya.addNode(n00) 34 waya.addNode(n10) 35 waya.addNode(n20) 36 waya.put("name", namea) 37 def wayb = new Way() 38 wayb.addNode(n20) 39 wayb.addNode(n30) 40 wayb.addNode(n40) 41 wayb.put("name", nameb) 42 43 ds.addPrimitive(waya) 44 ds.addPrimitive(wayb) 45 46 assert waya.isUsable() 47 assert wayb.isUsable() 48 49 def t = new SimilarNamedWays() 50 t.startTest(null) 51 t.visit(waya) 52 t.visit(wayb) 53 return t.errors 54 } 55 56 57 void testCombinations() { 58 assert testWays("Church Street", "Water Street").isEmpty() 59 assert !testWays("Main Street", "Maim Street").isEmpty() 60 assert !testWays("First Street", "Frist Street").isEmpty() 61 62 assert testWays("1st Street", "2nd Street").isEmpty() 63 assert testWays("First Avenue", "Second Avenue").isEmpty() 64 assert testWays("West Main Street", "East Main Street").isEmpty() 65 assert testWays("A Street", "B Street").isEmpty() 66 } 67 } -
test/unit/org/openstreetmap/josm/data/validation/tests/SimilarNamesTest.java
1 // License: GPL. For details, see LICENSE file. 2 package org.openstreetmap.josm.data.validation.tests; 3 4 import static org.junit.Assert.assertEquals; 5 6 import org.junit.BeforeClass; 7 import org.junit.Test; 8 9 public class SimilarNamesTest { 10 11 private static SimilarNamedWays similarity_; 12 13 /** 14 * Setup SimilarityRules. 15 */ 16 @BeforeClass 17 public static void init() { 18 similarity_ = new SimilarNamedWays(); 19 } 20 21 @Test 22 public void testSimilarNames() { 23 checkSimilarity("same string", "Testname", "Testname", false); 24 checkSimilarity("different case", "Testname", "TestName", true); 25 checkSimilarity("typo", "Testname", "Testxame", true); 26 checkSimilarity("missing char", "Testname", "Testame", true); 27 checkSimilarity("additional char", "Testname", "Testxname", true); 28 checkSimilarity("2 changes", "Testname", "Tostxname", true); 29 checkSimilarity("3 changes", "Testname", "Tostxnam", false); 30 31 // regular expression rule 32 checkSimilarity("same number", "track 1", "track 1", false); 33 checkSimilarity("different number", "track 1", "track 2", false); 34 checkSimilarity("different number length", "track 9", "track 10", false); 35 checkSimilarity("multiple numbers", "track 8 - 9", "track 10 - 11", false); 36 37 checkSimilarity("1st and 2nd", "1st Street", "2nd Street", false); 38 checkSimilarity("1st case", "1St Street", "1st Street", true); 39 checkSimilarity("1st and 2nd case", "1St Street", "2nd Street", true); 40 checkSimilarity("3rd and 4th", "2rd Street", "4th Street", false); 41 42 // synonyms 43 checkSimilarity("east and west", "East Foothill Drive", "West Foothill Drive", false); 44 checkSimilarity("east and west case", "east Foothill Drive", "West Foothill Drive", true); 45 checkSimilarity("first and second", "First Street", "Second Street", false); 46 checkSimilarity("first and second case", "First Street", "second Street", true); 47 checkSimilarity("first and second typo", "Forst Street", "Second Street", true); 48 checkSimilarity("first and second typo2", "First Street", "Socond Street", true); 49 checkSimilarity("first and second 2 changes", "First Street", "Soconds Street", true); 50 checkSimilarity("first and second 3 changes", "First Street", "Soconds Stret", false); 51 } 52 53 private void checkSimilarity(String message, String name1, String name2, boolean expected) { 54 boolean actual = similarity_.similaryName(name1, name2); 55 assertEquals(message, expected, actual); 56 57 } 58 }
