| | 1 | package org.openstreetmap.josm.tools; |
| | 2 | |
| | 3 | import java.util.HashMap; |
| | 4 | import java.util.Map; |
| | 5 | import java.util.regex.Matcher; |
| | 6 | import java.util.regex.Pattern; |
| | 7 | import javax.swing.JOptionPane; |
| | 8 | import org.openstreetmap.josm.Main; |
| | 9 | import org.openstreetmap.josm.gui.ExtendedDialog; |
| | 10 | import org.openstreetmap.josm.io.XmlWriter; |
| | 11 | |
| | 12 | import static org.openstreetmap.josm.tools.I18n.tr; |
| | 13 | import static org.openstreetmap.josm.tools.I18n.trn; |
| | 14 | |
| | 15 | /** |
| | 16 | * Class that helps to parse tags from arbitrary text |
| | 17 | */ |
| | 18 | public class TextTagParser { |
| | 19 | |
| | 20 | protected static final int MAX_KEY_LENGTH = 50; |
| | 21 | protected static final int MAX_VALUE_LENGTH = 255; |
| | 22 | |
| | 23 | public static class TextAnalyzer { |
| | 24 | int start = 0; |
| | 25 | boolean keyFound = false; |
| | 26 | boolean quotesStarted = false; |
| | 27 | boolean esc = false; |
| | 28 | StringBuilder s = new StringBuilder(200); |
| | 29 | int pos; |
| | 30 | String data; |
| | 31 | int n; |
| | 32 | boolean notFound; |
| | 33 | |
| | 34 | public TextAnalyzer(String text) { |
| | 35 | pos = 0; |
| | 36 | data = text; |
| | 37 | n = data.length(); |
| | 38 | } |
| | 39 | |
| | 40 | /** |
| | 41 | * Read tags from "Free format" |
| | 42 | */ |
| | 43 | Map<String, String> getFreeParsedTags() { |
| | 44 | String k, v; |
| | 45 | Map<String, String> tags = new HashMap<String,String>(); |
| | 46 | |
| | 47 | while (true) { |
| | 48 | skipEmpty(); |
| | 49 | if (pos == n) { break; } |
| | 50 | k = parseString(true); |
| | 51 | if (pos == n) { tags.clear(); break; } |
| | 52 | skipSign(); |
| | 53 | if (pos == n) { tags.clear(); break; } |
| | 54 | v = parseString(false); |
| | 55 | tags.put(k, v); |
| | 56 | } |
| | 57 | return tags; |
| | 58 | } |
| | 59 | |
| | 60 | private String parseString(boolean stopOnEquals) { |
| | 61 | char c; |
| | 62 | while (pos < n) { |
| | 63 | c = data.charAt(pos); |
| | 64 | if (esc) { |
| | 65 | esc = false; |
| | 66 | s.append(c); // \" \\ |
| | 67 | } else if (c == '\\') { |
| | 68 | esc = true; |
| | 69 | } else if (c == '\"' && !quotesStarted) { // opening " |
| | 70 | if (s.toString().trim().length()>0) { // we had ||some text"|| |
| | 71 | s.append(c); // just add ", not open |
| | 72 | } else { |
| | 73 | s.delete(0, s.length()); // forget that empty characthers and start reading ".... |
| | 74 | quotesStarted = true; |
| | 75 | } |
| | 76 | } else if (c == '\"' && quotesStarted) { // closing " |
| | 77 | quotesStarted = false; |
| | 78 | pos++; |
| | 79 | break; |
| | 80 | } else if (!quotesStarted && (c=='\n'|| c=='\t'|| c==' ' || c=='\r' |
| | 81 | || (c=='=' && stopOnEquals))) { // stop-symbols |
| | 82 | pos++; |
| | 83 | break; |
| | 84 | } else { |
| | 85 | // skip non-printable characters |
| | 86 | if(c>=32) s.append(c); |
| | 87 | } |
| | 88 | pos++; |
| | 89 | } |
| | 90 | |
| | 91 | String res = s.toString(); |
| | 92 | s.delete(0, s.length()); |
| | 93 | return res.trim(); |
| | 94 | } |
| | 95 | |
| | 96 | private void skipSign() { |
| | 97 | char c; |
| | 98 | boolean signFound = false;; |
| | 99 | while (pos < n) { |
| | 100 | c = data.charAt(pos); |
| | 101 | if (c == '\t' || c == '\n' || c == ' ') { |
| | 102 | pos++; |
| | 103 | } else if (c== '=') { |
| | 104 | if (signFound) break; // a = =qwerty means "a"="=qwerty" |
| | 105 | signFound = true; |
| | 106 | pos++; |
| | 107 | } else { |
| | 108 | break; |
| | 109 | } |
| | 110 | } |
| | 111 | } |
| | 112 | |
| | 113 | private void skipEmpty() { |
| | 114 | char c; |
| | 115 | while (pos < n) { |
| | 116 | c = data.charAt(pos); |
| | 117 | if (c == '\t' || c == '\n' || c == '\r' || c == ' ' ) { |
| | 118 | pos++; |
| | 119 | } else { |
| | 120 | break; |
| | 121 | } |
| | 122 | } |
| | 123 | } |
| | 124 | } |
| | 125 | |
| | 126 | private static String unescape(String k) { |
| | 127 | if(! (k.startsWith("\"") && k.endsWith("\"")) ) { |
| | 128 | if (k.contains("=")) { |
| | 129 | // '=' not in quotes will be treated as an error! |
| | 130 | return null; |
| | 131 | } else { |
| | 132 | return k; |
| | 133 | } |
| | 134 | } |
| | 135 | String text = k.substring(1,k.length()-1); |
| | 136 | return (new TextAnalyzer(text)).parseString(false); |
| | 137 | } |
| | 138 | |
| | 139 | /** |
| | 140 | * Try to find tag-value pairs in given text |
| | 141 | * @param text - text in which tags are looked for |
| | 142 | * @param splitRegex - text is splitted into parts with this delimiter |
| | 143 | * @param tagRegex - each part is matched against this regex |
| | 144 | * @param unescapeTextInQuotes - if true, matched tag and value will be analyzed more thoroughly |
| | 145 | */ |
| | 146 | public static Map<String, String> readTagsByRegexp(String text, String splitRegex, String tagRegex, boolean unescapeTextInQuotes) { |
| | 147 | String lines[] = text.split(splitRegex); |
| | 148 | Pattern p = Pattern.compile(tagRegex); |
| | 149 | Map<String, String> tags = new HashMap<String,String>(); |
| | 150 | String k=null, v=null; |
| | 151 | for (String line: lines) { |
| | 152 | if (line.trim().isEmpty()) continue; // skip empty lines |
| | 153 | Matcher m = p.matcher(line); |
| | 154 | if (m.matches()) { |
| | 155 | k=m.group(1).trim(); v=m.group(2).trim(); |
| | 156 | if (unescapeTextInQuotes) { |
| | 157 | k = unescape(k); |
| | 158 | v = unescape(v); |
| | 159 | if (k==null || v==null) return null; |
| | 160 | } |
| | 161 | tags.put(k,v); |
| | 162 | } else { |
| | 163 | return null; |
| | 164 | } |
| | 165 | } |
| | 166 | if (!tags.isEmpty()) { |
| | 167 | return tags; |
| | 168 | } else { |
| | 169 | return null; |
| | 170 | } |
| | 171 | } |
| | 172 | |
| | 173 | public static Map<String,String> getValidatedTagsFromText(String buf) { |
| | 174 | Map<String,String> tags = readTagsFromText(buf); |
| | 175 | return validateTags(tags) ? tags : null; |
| | 176 | } |
| | 177 | |
| | 178 | /** |
| | 179 | * Apply different methods to extract tag-value pairs from arbitrary text |
| | 180 | * @param buf |
| | 181 | * @return null if no format is suitable |
| | 182 | */ |
| | 183 | |
| | 184 | public static Map<String,String> readTagsFromText(String buf) { |
| | 185 | Map<String,String> tags; |
| | 186 | |
| | 187 | // Format |
| | 188 | // tag1\tval1\ntag2\tval2\n |
| | 189 | tags = readTagsByRegexp(buf, "[\r\n]+]", "(.*?)\t(.*?)", false); |
| | 190 | // try "tag\tvalue\n" format |
| | 191 | if (tags!=null) return tags; |
| | 192 | |
| | 193 | // Format |
| | 194 | // a=b \n c=d \n "a b"=hello |
| | 195 | // SORRY: "a=b" = c is not supported fror now, only first = will be considered |
| | 196 | // a = "b=c" is OK |
| | 197 | // a = b=c - this method of parsing fails intentionally |
| | 198 | tags = readTagsByRegexp(buf, "[\\n\\t\\r]+", "(.*?)=(.*?)", true); |
| | 199 | // try format t1=v1\n t2=v2\n ... |
| | 200 | if (tags!=null) return tags; |
| | 201 | |
| | 202 | // JSON-format |
| | 203 | String bufJson = buf.trim(); |
| | 204 | // trim { }, if there are any |
| | 205 | if (bufJson.startsWith("{") && bufJson.endsWith("}") ) bufJson = bufJson.substring(1,bufJson.length()-1); |
| | 206 | tags = readTagsByRegexp(bufJson, "[\\s]*,[\\s]*", |
| | 207 | "[\\s]*(\\\".*?[^\\\\]\\\")"+"[\\s]*:[\\s]*"+"(\\\".*?[^\\\\]\\\")[\\s]*", true); |
| | 208 | if (tags!=null) return tags; |
| | 209 | |
| | 210 | // Free format |
| | 211 | // a 1 "b" 2 c=3 d 4 e "5" |
| | 212 | TextAnalyzer parser = new TextAnalyzer(buf); |
| | 213 | tags = parser.getFreeParsedTags(); |
| | 214 | return tags; |
| | 215 | } |
| | 216 | |
| | 217 | /** |
| | 218 | * Check tags for correctness and display warnings if needed |
| | 219 | * @param tags - map key->value to check |
| | 220 | * @return true if user decision was "OK" |
| | 221 | */ |
| | 222 | public static boolean validateTags(Map<String, String> tags) { |
| | 223 | String value; |
| | 224 | int r; |
| | 225 | int s = tags.size(); |
| | 226 | if (s > 30) { |
| | 227 | // Use trn() even if for english it makes no sense, as s > 30 |
| | 228 | r=warning(trn("There was {0} tag found in the buffer, it is suspicious!", |
| | 229 | "There were {0} tags found in the buffer, it is suspicious!", s, |
| | 230 | s), "", "toomanytags"); |
| | 231 | if (r==2) return false; if (r==3) return true; |
| | 232 | } |
| | 233 | for (String key: tags.keySet()) { |
| | 234 | value = tags.get(key); |
| | 235 | if (key.length() > MAX_KEY_LENGTH) { |
| | 236 | r = warning(tr("Key is too long (max {0} characters):", MAX_KEY_LENGTH), key+"="+value, "keytoolong"); |
| | 237 | if (r==2) return false; if (r==3) return true; |
| | 238 | } |
| | 239 | if (!key.matches("[a-zA-Z:_]*")) { |
| | 240 | r = warning(tr("Suspicious characters in key:"), key, "keydoesnotmatch"); |
| | 241 | if (r==2) return false; if (r==3) return true; |
| | 242 | } |
| | 243 | if (value.length() > MAX_VALUE_LENGTH) { |
| | 244 | r = warning(tr("Value is too long (max {0} characters):", MAX_VALUE_LENGTH), value, "valuetoolong"); |
| | 245 | if (r==2) return false; if (r==3) return true; |
| | 246 | } |
| | 247 | } |
| | 248 | return true; |
| | 249 | } |
| | 250 | |
| | 251 | private static int warning(String text, String data, String code) { |
| | 252 | ExtendedDialog ed = new ExtendedDialog( |
| | 253 | Main.parent, |
| | 254 | tr("Do you want to paste these tags?"), |
| | 255 | new String[]{tr("Ok"), tr("Cancel"), tr("Ignore warnings")}); |
| | 256 | ed.setButtonIcons(new String[]{"ok.png", "cancel.png", "pastetags.png"}); |
| | 257 | ed.setContent("<html><b>"+text + "</b><br/><br/><div width=\"300px\">"+XmlWriter.encode(data,true)+"</html>"); |
| | 258 | ed.setDefaultButton(2); |
| | 259 | ed.setCancelButton(2); |
| | 260 | ed.setIcon(JOptionPane.WARNING_MESSAGE); |
| | 261 | ed.toggleEnable(code); |
| | 262 | ed.showDialog(); |
| | 263 | Object o = ed.getValue(); |
| | 264 | if (o instanceof Integer) |
| | 265 | return ((Integer)o).intValue(); |
| | 266 | else |
| | 267 | return 2; |
| | 268 | } |
| | 269 | } |