| | 1 | package org.openstreetmap.josm.tools; |
| | 2 | |
| | 3 | import java.util.HashMap; |
| | 4 | import java.util.Map; |
| | 5 | import java.util.regex.Matcher; |
| | 6 | import java.util.regex.Pattern; |
| | 7 | import javax.swing.JOptionPane; |
| | 8 | import org.openstreetmap.josm.Main; |
| | 9 | import org.openstreetmap.josm.gui.ExtendedDialog; |
| | 10 | import org.openstreetmap.josm.io.XmlWriter; |
| | 11 | |
| | 12 | import static org.openstreetmap.josm.tools.I18n.tr; |
| | 13 | import static org.openstreetmap.josm.tools.I18n.trn; |
| | 14 | |
| | 15 | /** |
| | 16 | * Class that helps to parse tags from arbitrary text |
| | 17 | */ |
| | 18 | public class TextTagParser { |
| | 19 | |
| | 20 | |
| | 21 | public static class TextAnalyzer { |
| | 22 | int start = 0; |
| | 23 | boolean keyFound = false; |
| | 24 | boolean quotesStarted = false; |
| | 25 | boolean esc = false; |
| | 26 | StringBuilder s = new StringBuilder(200); |
| | 27 | int pos; |
| | 28 | String data; |
| | 29 | int n; |
| | 30 | boolean notFound; |
| | 31 | |
| | 32 | public TextAnalyzer(String text) { |
| | 33 | pos = 0; |
| | 34 | data = text; |
| | 35 | n = data.length(); |
| | 36 | } |
| | 37 | |
| | 38 | /** |
| | 39 | * Read tags from "Free format" |
| | 40 | */ |
| | 41 | Map<String, String> getFreeParsedTags() { |
| | 42 | String k, v; |
| | 43 | Map<String, String> tags = new HashMap<String,String>(); |
| | 44 | |
| | 45 | while (true) { |
| | 46 | skipEmpty(); |
| | 47 | if (pos == n) { break; } |
| | 48 | k = parseString(true); |
| | 49 | if (pos == n) { tags.clear(); break; } |
| | 50 | skipSign(); |
| | 51 | if (pos == n) { tags.clear(); break; } |
| | 52 | v = parseString(false); |
| | 53 | tags.put(k, v); |
| | 54 | } |
| | 55 | return tags; |
| | 56 | } |
| | 57 | |
| | 58 | private String parseString(boolean stopOnEquals) { |
| | 59 | char c; |
| | 60 | while (pos < n) { |
| | 61 | c = data.charAt(pos); |
| | 62 | if (esc) { |
| | 63 | esc = false; |
| | 64 | s.append(c); // \" \\ |
| | 65 | } else if (c == '\\') { |
| | 66 | esc = true; |
| | 67 | } else if (c == '\"' && !quotesStarted) { // opening " |
| | 68 | if (s.toString().trim().length()>0) { // we had ||some text"|| |
| | 69 | s.append(c); // just add ", not open |
| | 70 | } else { |
| | 71 | s.delete(0, s.length()); // forget that empty characthers and start reading ".... |
| | 72 | quotesStarted = true; |
| | 73 | } |
| | 74 | } else if (c == '\"' && quotesStarted) { // closing " |
| | 75 | quotesStarted = false; |
| | 76 | pos++; |
| | 77 | break; |
| | 78 | } else if (!quotesStarted && (c=='\n'|| c=='\t'|| c==' ' || c=='\r' |
| | 79 | || (c=='=' && stopOnEquals))) { // stop-symbols |
| | 80 | pos++; |
| | 81 | break; |
| | 82 | } else { |
| | 83 | // skip non-printable characters |
| | 84 | if(c>=32) s.append(c); |
| | 85 | } |
| | 86 | pos++; |
| | 87 | } |
| | 88 | |
| | 89 | String res = s.toString(); |
| | 90 | s.delete(0, s.length()); |
| | 91 | return res.trim(); |
| | 92 | } |
| | 93 | |
| | 94 | private void skipSign() { |
| | 95 | char c; |
| | 96 | boolean signFound = false;; |
| | 97 | while (pos < n) { |
| | 98 | c = data.charAt(pos); |
| | 99 | if (c == '\t' || c == '\n' || c == ' ') { |
| | 100 | pos++; |
| | 101 | } else if (c== '=') { |
| | 102 | if (signFound) break; // a = =qwerty means "a"="=qwerty" |
| | 103 | signFound = true; |
| | 104 | pos++; |
| | 105 | } else { |
| | 106 | break; |
| | 107 | } |
| | 108 | } |
| | 109 | } |
| | 110 | |
| | 111 | private void skipEmpty() { |
| | 112 | char c; |
| | 113 | while (pos < n) { |
| | 114 | c = data.charAt(pos); |
| | 115 | if (c == '\t' || c == '\n' || c == '\r' || c == ' ' ) { |
| | 116 | pos++; |
| | 117 | } else { |
| | 118 | break; |
| | 119 | } |
| | 120 | } |
| | 121 | } |
| | 122 | |
| | 123 | } |
| | 124 | |
| | 125 | private static String unescape(String k) { |
| | 126 | if(! (k.startsWith("\"") && k.endsWith("\"")) ) { |
| | 127 | if (k.contains("=")) { |
| | 128 | // '=' not in quotes will be treated as an error! |
| | 129 | return null; |
| | 130 | } else { |
| | 131 | return k; |
| | 132 | } |
| | 133 | } |
| | 134 | String text = k.substring(1,k.length()-1); |
| | 135 | return (new TextAnalyzer(text)).parseString(false); |
| | 136 | } |
| | 137 | |
| | 138 | /** |
| | 139 | * Try to find tag-value pairs in given @param text |
| | 140 | * @param splitRegex - text is splitted into parts with this delimiter |
| | 141 | * @param tagRegex - each part is matched against this regex |
| | 142 | * @param unescapeTextInQuotes - if true, matched tag and value will be analyzed more thoroughly |
| | 143 | */ |
| | 144 | public static Map<String, String> readTagsByRegexp(String text, String splitRegex, String tagRegex, boolean unescapeTextInQuotes) { |
| | 145 | String lines[] = text.split(splitRegex); |
| | 146 | Pattern p = Pattern.compile(tagRegex); |
| | 147 | Map<String, String> tags = new HashMap<String,String>(); |
| | 148 | String k=null, v=null; |
| | 149 | for (String line: lines) { |
| | 150 | if (line.trim().isEmpty()) continue; // skiip empty lines |
| | 151 | Matcher m = p.matcher(line); |
| | 152 | if (m.matches()) { |
| | 153 | k=m.group(1).trim(); v=m.group(2).trim(); |
| | 154 | if (unescapeTextInQuotes) { |
| | 155 | k = unescape(k); |
| | 156 | v = unescape(v); |
| | 157 | if (k==null || v==null) return null; |
| | 158 | } |
| | 159 | tags.put(k,v); |
| | 160 | } else { |
| | 161 | return null; |
| | 162 | } |
| | 163 | } |
| | 164 | if (!tags.isEmpty()) { |
| | 165 | return tags; |
| | 166 | } else { |
| | 167 | return null; |
| | 168 | } |
| | 169 | } |
| | 170 | |
| | 171 | public static Map<String,String> getValidatedTagsFromText(String buf) { |
| | 172 | Map<String,String> tags = readTagsFromText(buf); |
| | 173 | return validateTags(tags) ? tags : null; |
| | 174 | } |
| | 175 | |
| | 176 | /** |
| | 177 | * Apply different methods to extract tag-value pairs from arbitrary text |
| | 178 | * @param buf |
| | 179 | * @return null if no format is suitable |
| | 180 | */ |
| | 181 | |
| | 182 | public static Map<String,String> readTagsFromText(String buf) { |
| | 183 | Map<String,String> tags; |
| | 184 | |
| | 185 | // Format |
| | 186 | // tag1\tval1\ntag2\tval2\n |
| | 187 | tags = readTagsByRegexp(buf, "[\r\n]+]", "(.*?)\t(.*?)", false); |
| | 188 | // try "tag\tvalue\n" format |
| | 189 | if (tags!=null) return tags; |
| | 190 | |
| | 191 | // Format |
| | 192 | // a=b \n c=d \n "a b"=hello |
| | 193 | // SORRY: "a=b" = c is not supported fror now, only first = will be considered |
| | 194 | // a = "b=c" is OK |
| | 195 | // a = b=c - this method of parsing fails intentionally |
| | 196 | tags = readTagsByRegexp(buf, "[\\n\\t\\r]+", "(.*?)=(.*?)", true); |
| | 197 | // try format t1=v1\n t2=v2\n ... |
| | 198 | if (tags!=null) return tags; |
| | 199 | |
| | 200 | // JSON-format |
| | 201 | String bufJson = buf.trim(); |
| | 202 | // trim { }, if there are any |
| | 203 | if (bufJson.startsWith("{") && bufJson.endsWith("}") ) bufJson = bufJson.substring(1,bufJson.length()-1); |
| | 204 | tags = readTagsByRegexp(bufJson, "[\\s]*,[\\s]*", |
| | 205 | "[\\s]*(\\\".*?[^\\\\]\\\")"+"[\\s]*:[\\s]*"+"(\\\".*?[^\\\\]\\\")[\\s]*", true); |
| | 206 | if (tags!=null) return tags; |
| | 207 | |
| | 208 | // Free format |
| | 209 | // a 1 "b" 2 c=3 d 4 e "5" |
| | 210 | TextAnalyzer parser = new TextAnalyzer(buf); |
| | 211 | tags = parser.getFreeParsedTags(); |
| | 212 | return tags; |
| | 213 | } |
| | 214 | |
| | 215 | /** |
| | 216 | * Check tags for correctness and display warnings if needed |
| | 217 | * @param tags - map key->value to check |
| | 218 | * @return true if user decision was "OK" |
| | 219 | */ |
| | 220 | public static boolean validateTags(Map<String, String> tags) { |
| | 221 | String value; |
| | 222 | int r; |
| | 223 | int s = tags.size(); |
| | 224 | if (s > 30) { |
| | 225 | // Use trn() even if for english it makes no sense, as s > 30 |
| | 226 | r=warning(trn("There was {0} tag found in the buffer, it is suspicious!", |
| | 227 | "There were {0} tags found in the buffer, it is suspicious!", s, |
| | 228 | s), "", "toomanytags"); |
| | 229 | if (r==2) return false; if (r==3) return true; |
| | 230 | } |
| | 231 | for (String key: tags.keySet()) { |
| | 232 | value = tags.get(key); |
| | 233 | if (key.length()>50) { |
| | 234 | r = warning(tr("Key is too long:"), key+"="+value, "keytoolong"); |
| | 235 | if (r==2) return false; if (r==3) return true; |
| | 236 | } |
| | 237 | if (!key.matches("[a-zA-Z:_]*")) { |
| | 238 | r = warning(tr("Suspiciouns characters in tag:"), key, "keydoesnotmatch"); |
| | 239 | if (r==2) return false; if (r==3) return true; |
| | 240 | } |
| | 241 | if (value.length()>255) { |
| | 242 | r= warning(tr("Value too long (max 255 characters):"), value, "valuetoolong"); |
| | 243 | if (r==2) return false; if (r==3) return true; |
| | 244 | } |
| | 245 | } |
| | 246 | return true; |
| | 247 | } |
| | 248 | |
| | 249 | private static int warning(String text, String data, String code) { |
| | 250 | ExtendedDialog ed = new ExtendedDialog( |
| | 251 | Main.parent, |
| | 252 | tr("Do you want to paste these tags?"), |
| | 253 | new String[]{tr("Ok"), tr("Cancel"), tr("Ingore warnings")}); |
| | 254 | ed.setButtonIcons(new String[]{"ok.png", "cancel.png", "pastetags.png"}); |
| | 255 | ed.setContent("<html><b>"+text + "</b><br/><br/> <div width=\"300px\">"+XmlWriter.encode(data,true)+"</html>"); |
| | 256 | ed.setDefaultButton(2); |
| | 257 | ed.setCancelButton(2); |
| | 258 | ed.setIcon(JOptionPane.WARNING_MESSAGE); |
| | 259 | ed.toggleEnable(code); |
| | 260 | ed.showDialog(); |
| | 261 | Object o = ed.getValue(); |
| | 262 | if (o instanceof Integer) |
| | 263 | return ((Integer)o).intValue(); |
| | 264 | else |
| | 265 | return 2; |
| | 266 | } |
| | 267 | } |