| 534 | | // try to fix common typos and check again if value is still unknown |
| 535 | | final String harmonizedValue = harmonizeValue(prop.getValue()); |
| 536 | | String fixedValue = null; |
| 537 | | Set<String> possibleValues = getPresetValues(key); |
| 538 | | List<String> fixVals = new ArrayList<>(); |
| 539 | | int maxPresetValueLen = 0; |
| 540 | | if (possibleValues.contains(harmonizedValue)) { |
| 541 | | fixedValue = harmonizedValue; |
| 542 | | } else { |
| 543 | | // use Levenshtein distance to find typical typos |
| 544 | | int minDist = MAX_LEVENSHTEIN_DISTANCE + 1; |
| 545 | | String closest = null; |
| 546 | | for (String possibleVal : possibleValues) { |
| 547 | | if (possibleVal.isEmpty()) |
| 548 | | continue; |
| 549 | | maxPresetValueLen = Math.max(maxPresetValueLen, possibleVal.length()); |
| 550 | | if (harmonizedValue.length() < 3 && possibleVal.length() >= harmonizedValue.length() + MAX_LEVENSHTEIN_DISTANCE) { |
| 551 | | // don't suggest fix value when given value is short and lengths are too different |
| 552 | | // for example surface=u would result in surface=mud |
| 553 | | continue; |
| 554 | | } |
| 555 | | int dist = Utils.getLevenshteinDistance(possibleVal, harmonizedValue); |
| 556 | | if (dist >= harmonizedValue.length()) { |
| 557 | | // short value, all characters are different. Don't warn, might say Value '10' for key 'fee' looks like 'no'. |
| 558 | | continue; |
| 559 | | } |
| 560 | | if (dist < minDist) { |
| 561 | | closest = possibleVal; |
| 562 | | minDist = dist; |
| 563 | | fixVals.clear(); |
| 564 | | fixVals.add(possibleVal); |
| 565 | | } else if (dist == minDist) { |
| 566 | | fixVals.add(possibleVal); |
| 567 | | } |
| 568 | | } |
| 569 | | if (minDist <= MAX_LEVENSHTEIN_DISTANCE && maxPresetValueLen > MAX_LEVENSHTEIN_DISTANCE |
| 570 | | && (harmonizedValue.length() > 3 || minDist < MAX_LEVENSHTEIN_DISTANCE)) { |
| 571 | | if (fixVals.size() < 2) { |
| 572 | | fixedValue = closest; |
| 573 | | } else { |
| 574 | | Collections.sort(fixVals); |
| 575 | | // misspelled preset value with multiple good alternatives |
| 576 | | errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE_NO_FIX) |
| 577 | | .message(tr("Misspelled property value"), |
| 578 | | marktr("Value ''{0}'' for key ''{1}'' looks like one of {2}."), |
| 579 | | prop.getValue(), key, fixVals) |
| 580 | | .primitives(p).build()); |
| 581 | | withErrors.put(p, "WPV"); |
| 582 | | continue; |
| 583 | | } |
| 584 | | } |
| | 539 | tryGuess(p, key, value, withErrors); |
| | 540 | } |
| | 541 | } |
| | 542 | if (checkFixmes && key != null && value != null && !value.isEmpty() && isFixme(key, value) && !withErrors.contains(p, "FIXME")) { |
| | 543 | errors.add(TestError.builder(this, Severity.OTHER, FIXME) |
| | 544 | .message(tr("FIXMES")) |
| | 545 | .primitives(p) |
| | 546 | .build()); |
| | 547 | withErrors.put(p, "FIXME"); |
| | 548 | } |
| | 549 | } |
| | 550 | } |
| | 551 | |
| | 552 | private void tryGuess(OsmPrimitive p, String key, String value, MultiMap<OsmPrimitive, String> withErrors) { |
| | 553 | // try to fix common typos and check again if value is still unknown |
| | 554 | final String harmonizedValue = harmonizeValue(value); |
| | 555 | String fixedValue = null; |
| | 556 | List<String> fixVals = new ArrayList<>(); |
| | 557 | int maxPresetValueLen = 0; |
| | 558 | Set<String> presetValues = getPresetValues(key); |
| | 559 | Set<String> oftenUsedValues = oftenUsedValueData.get(key); |
| | 560 | for (Set<String> possibleValues: Arrays.asList(presetValues, oftenUsedValues)) { |
| | 561 | if (possibleValues != null && possibleValues.contains(harmonizedValue)) { |
| | 562 | fixedValue = harmonizedValue; |
| | 563 | break; |
| | 564 | } |
| | 565 | } |
| | 566 | if (fixedValue == null) { |
| | 567 | // use Levenshtein distance to find typical typos |
| | 568 | int minDist = MAX_LEVENSHTEIN_DISTANCE + 1; |
| | 569 | String closest = null; |
| | 570 | for (Set<String> possibleValues: Arrays.asList(presetValues, oftenUsedValues)) { |
| | 571 | if (possibleValues == null) |
| | 572 | continue; |
| | 573 | for (String possibleVal : possibleValues) { |
| | 574 | if (possibleVal.isEmpty()) |
| | 575 | continue; |
| | 576 | maxPresetValueLen = Math.max(maxPresetValueLen, possibleVal.length()); |
| | 577 | if (harmonizedValue.length() < 3 && possibleVal.length() >= harmonizedValue.length() + MAX_LEVENSHTEIN_DISTANCE) { |
| | 578 | // don't suggest fix value when given value is short and lengths are too different |
| | 579 | // for example surface=u would result in surface=mud |
| | 580 | continue; |
| 586 | | if (fixedValue != null && possibleValues.contains(fixedValue)) { |
| 587 | | final String newValue = fixedValue; |
| 588 | | // misspelled preset value |
| 589 | | errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE) |
| 590 | | .message(tr("Misspelled property value"), |
| 591 | | marktr("Value ''{0}'' for key ''{1}'' looks like ''{2}''."), prop.getValue(), key, newValue) |
| 592 | | .primitives(p) |
| 593 | | .build()); |
| 594 | | withErrors.put(p, "WPV"); |
| 595 | | } else { |
| 596 | | // unknown preset value |
| 597 | | errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE) |
| 598 | | .message(tr("Presets do not contain property value"), |
| 599 | | marktr("Value ''{0}'' for key ''{1}'' not in presets."), prop.getValue(), key) |
| 600 | | .primitives(p) |
| 601 | | .build()); |
| 602 | | withErrors.put(p, "UPV"); |
| | 582 | int dist = Utils.getLevenshteinDistance(possibleVal, harmonizedValue); |
| | 583 | if (dist >= harmonizedValue.length()) { |
| | 584 | // short value, all characters are different. Don't warn, might say Value '10' for key 'fee' looks like 'no'. |
| | 585 | continue; |
| 606 | | if (checkFixmes && key != null && value != null && !value.isEmpty() && isFixme(key, value) && !withErrors.contains(p, "FIXME")) { |
| 607 | | errors.add(TestError.builder(this, Severity.OTHER, FIXME) |
| 608 | | .message(tr("FIXMES")) |
| 609 | | .primitives(p) |
| 610 | | .build()); |
| 611 | | withErrors.put(p, "FIXME"); |
| | 597 | |
| | 598 | if (minDist <= MAX_LEVENSHTEIN_DISTANCE && maxPresetValueLen > MAX_LEVENSHTEIN_DISTANCE |
| | 599 | && (harmonizedValue.length() > 3 || minDist < MAX_LEVENSHTEIN_DISTANCE)) { |
| | 600 | if (fixVals.size() < 2) { |
| | 601 | fixedValue = closest; |
| | 602 | } else { |
| | 603 | Collections.sort(fixVals); |
| | 604 | // misspelled preset value with multiple good alternatives |
| | 605 | errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE_NO_FIX) |
| | 606 | .message(tr("Unknown property value"), |
| | 607 | marktr("Value ''{0}'' for key ''{1}'' is not in the presets, maybe one of {2} is meant?"), |
| | 608 | value, key, fixVals) |
| | 609 | .primitives(p).build()); |
| | 610 | withErrors.put(p, "WPV"); |
| | 611 | return; |
| | 612 | } |
| | 615 | if (fixedValue != null) { |
| | 616 | final String newValue = fixedValue; |
| | 617 | // misspelled preset value |
| | 618 | errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE) |
| | 619 | .message(tr("Unknown property value"), |
| | 620 | marktr("Value ''{0}'' for key ''{1}'' is not in the presets, maybe ''{2}'' is meant?"), value, key, newValue) |
| | 621 | .primitives(p) |
| | 622 | .build()); |
| | 623 | withErrors.put(p, "WPV"); |
| | 624 | } else { |
| | 625 | // unknown preset value |
| | 626 | errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE) |
| | 627 | .message(tr("Presets do not contain property value"), |
| | 628 | marktr("Value ''{0}'' for key ''{1}'' not in presets."), value, key) |
| | 629 | .primitives(p) |
| | 630 | .build()); |
| | 631 | withErrors.put(p, "UPV"); |
| | 632 | } |