001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.openstreetmap.josm.data.validation.routines; 018 019import java.util.Arrays; 020import java.util.List; 021 022/** 023 * <p><b>Domain name</b> validation routines.</p> 024 * 025 * <p> 026 * This validator provides methods for validating Internet domain names 027 * and top-level domains. 028 * </p> 029 * 030 * <p>Domain names are evaluated according 031 * to the standards <a href="http://www.ietf.org/rfc/rfc1034.txt">RFC1034</a>, 032 * section 3, and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC1123</a>, 033 * section 2.1. No accomodation is provided for the specialized needs of 034 * other applications; if the domain name has been URL-encoded, for example, 035 * validation will fail even though the equivalent plaintext version of the 036 * same name would have passed. 037 * </p> 038 * 039 * <p> 040 * Validation is also provided for top-level domains (TLDs) as defined and 041 * maintained by the Internet Assigned Numbers Authority (IANA): 042 * </p> 043 * 044 * <ul> 045 * <li>{@link #isValidInfrastructureTld} - validates infrastructure TLDs 046 * (<code>.arpa</code>, etc.)</li> 047 * <li>{@link #isValidGenericTld} - validates generic TLDs 048 * (<code>.com, .org</code>, etc.)</li> 049 * <li>{@link #isValidCountryCodeTld} - validates country code TLDs 050 * (<code>.us, .uk, .cn</code>, etc.)</li> 051 * </ul> 052 * 053 * <p> 054 * (<b>NOTE</b>: This class does not provide IP address lookup for domain names or 055 * methods to ensure that a given domain name matches a specific IP; see 056 * {@link java.net.InetAddress} for that functionality.) 057 * </p> 058 * 059 * @version $Revision: 1227719 $ $Date: 2012-01-05 18:45:51 +0100 (Thu, 05 Jan 2012) $ 060 * @since Validator 1.4 061 */ 062public class DomainValidator extends AbstractValidator { 063 064 // Regular expression strings for hostnames (derived from RFC2396 and RFC 1123) 065 private static final String DOMAIN_LABEL_REGEX = "\\p{Alnum}(?>[\\p{Alnum}-]*\\p{Alnum})*"; 066 private static final String TOP_LABEL_REGEX = "\\p{Alpha}{2,}"; 067 private static final String DOMAIN_NAME_REGEX = 068 "^(?:" + DOMAIN_LABEL_REGEX + "\\.)+" + "(" + TOP_LABEL_REGEX + ")$"; 069 070 private final boolean allowLocal; 071 072 /** 073 * Singleton instance of this validator, which 074 * doesn't consider local addresses as valid. 075 */ 076 private static final DomainValidator DOMAIN_VALIDATOR = new DomainValidator(false); 077 078 /** 079 * Singleton instance of this validator, which does 080 * consider local addresses valid. 081 */ 082 private static final DomainValidator DOMAIN_VALIDATOR_WITH_LOCAL = new DomainValidator(true); 083 084 /** 085 * RegexValidator for matching domains. 086 */ 087 private final RegexValidator domainRegex = 088 new RegexValidator(DOMAIN_NAME_REGEX); 089 /** 090 * RegexValidator for matching the a local hostname 091 */ 092 private final RegexValidator hostnameRegex = 093 new RegexValidator(DOMAIN_LABEL_REGEX); 094 095 /** 096 * Returns the singleton instance of this validator. It 097 * will not consider local addresses as valid. 098 * @return the singleton instance of this validator 099 */ 100 public static DomainValidator getInstance() { 101 return DOMAIN_VALIDATOR; 102 } 103 104 /** 105 * Returns the singleton instance of this validator, 106 * with local validation as required. 107 * @param allowLocal Should local addresses be considered valid? 108 * @return the singleton instance of this validator 109 */ 110 public static DomainValidator getInstance(boolean allowLocal) { 111 if(allowLocal) { 112 return DOMAIN_VALIDATOR_WITH_LOCAL; 113 } 114 return DOMAIN_VALIDATOR; 115 } 116 117 /** Private constructor. */ 118 private DomainValidator(boolean allowLocal) { 119 this.allowLocal = allowLocal; 120 } 121 122 /** 123 * Returns true if the specified <code>String</code> parses 124 * as a valid domain name with a recognized top-level domain. 125 * The parsing is case-sensitive. 126 * @param domain the parameter to check for domain name syntax 127 * @return true if the parameter is a valid domain name 128 */ 129 public boolean isValid(String domain) { 130 String[] groups = domainRegex.match(domain); 131 if (groups != null && groups.length > 0) { 132 return isValidTld(groups[0]); 133 } else if(allowLocal) { 134 if (hostnameRegex.isValid(domain)) { 135 return true; 136 } 137 } 138 return false; 139 } 140 141 /** 142 * Returns true if the specified <code>String</code> matches any 143 * IANA-defined top-level domain. Leading dots are ignored if present. 144 * The search is case-sensitive. 145 * @param tld the parameter to check for TLD status 146 * @return true if the parameter is a TLD 147 */ 148 public boolean isValidTld(String tld) { 149 if(allowLocal && isValidLocalTld(tld)) { 150 return true; 151 } 152 return isValidInfrastructureTld(tld) 153 || isValidGenericTld(tld) 154 || isValidCountryCodeTld(tld); 155 } 156 157 /** 158 * Returns true if the specified <code>String</code> matches any 159 * IANA-defined infrastructure top-level domain. Leading dots are 160 * ignored if present. The search is case-sensitive. 161 * @param iTld the parameter to check for infrastructure TLD status 162 * @return true if the parameter is an infrastructure TLD 163 */ 164 public boolean isValidInfrastructureTld(String iTld) { 165 return INFRASTRUCTURE_TLD_LIST.contains(chompLeadingDot(iTld.toLowerCase())); 166 } 167 168 /** 169 * Returns true if the specified <code>String</code> matches any 170 * IANA-defined generic top-level domain. Leading dots are ignored 171 * if present. The search is case-sensitive. 172 * @param gTld the parameter to check for generic TLD status 173 * @return true if the parameter is a generic TLD 174 */ 175 public boolean isValidGenericTld(String gTld) { 176 return GENERIC_TLD_LIST.contains(chompLeadingDot(gTld.toLowerCase())); 177 } 178 179 /** 180 * Returns true if the specified <code>String</code> matches any 181 * IANA-defined country code top-level domain. Leading dots are 182 * ignored if present. The search is case-sensitive. 183 * @param ccTld the parameter to check for country code TLD status 184 * @return true if the parameter is a country code TLD 185 */ 186 public boolean isValidCountryCodeTld(String ccTld) { 187 return COUNTRY_CODE_TLD_LIST.contains(chompLeadingDot(ccTld.toLowerCase())); 188 } 189 190 /** 191 * Returns true if the specified <code>String</code> matches any 192 * widely used "local" domains (localhost or localdomain). Leading dots are 193 * ignored if present. The search is case-sensitive. 194 * @param iTld the parameter to check for local TLD status 195 * @return true if the parameter is an local TLD 196 */ 197 public boolean isValidLocalTld(String iTld) { 198 return LOCAL_TLD_LIST.contains(chompLeadingDot(iTld.toLowerCase())); 199 } 200 201 private String chompLeadingDot(String str) { 202 if (str.startsWith(".")) { 203 return str.substring(1); 204 } else { 205 return str; 206 } 207 } 208 209 // --------------------------------------------- 210 // ----- TLDs defined by IANA 211 // ----- Authoritative and comprehensive list at: 212 // ----- http://data.iana.org/TLD/tlds-alpha-by-domain.txt 213 214 private static final String[] INFRASTRUCTURE_TLDS = new String[] { 215 "arpa", // internet infrastructure 216 "root" // diagnostic marker for non-truncated root zone 217 }; 218 219 private static final String[] GENERIC_TLDS = new String[] { 220 "aero", // air transport industry 221 "asia", // Pan-Asia/Asia Pacific 222 "biz", // businesses 223 "cat", // Catalan linguistic/cultural community 224 "com", // commercial enterprises 225 "coop", // cooperative associations 226 "info", // informational sites 227 "jobs", // Human Resource managers 228 "mobi", // mobile products and services 229 "museum", // museums, surprisingly enough 230 "name", // individuals' sites 231 "net", // internet support infrastructure/business 232 "org", // noncommercial organizations 233 "pro", // credentialed professionals and entities 234 "tel", // contact data for businesses and individuals 235 "travel", // entities in the travel industry 236 "gov", // United States Government 237 "edu", // accredited postsecondary US education entities 238 "mil", // United States Military 239 "int" // organizations established by international treaty 240 }; 241 242 private static final String[] COUNTRY_CODE_TLDS = new String[] { 243 "ac", // Ascension Island 244 "ad", // Andorra 245 "ae", // United Arab Emirates 246 "af", // Afghanistan 247 "ag", // Antigua and Barbuda 248 "ai", // Anguilla 249 "al", // Albania 250 "am", // Armenia 251 "an", // Netherlands Antilles 252 "ao", // Angola 253 "aq", // Antarctica 254 "ar", // Argentina 255 "as", // American Samoa 256 "at", // Austria 257 "au", // Australia (includes Ashmore and Cartier Islands and Coral Sea Islands) 258 "aw", // Aruba 259 "ax", // Åland 260 "az", // Azerbaijan 261 "ba", // Bosnia and Herzegovina 262 "bb", // Barbados 263 "bd", // Bangladesh 264 "be", // Belgium 265 "bf", // Burkina Faso 266 "bg", // Bulgaria 267 "bh", // Bahrain 268 "bi", // Burundi 269 "bj", // Benin 270 "bm", // Bermuda 271 "bn", // Brunei Darussalam 272 "bo", // Bolivia 273 "br", // Brazil 274 "bs", // Bahamas 275 "bt", // Bhutan 276 "bv", // Bouvet Island 277 "bw", // Botswana 278 "by", // Belarus 279 "bz", // Belize 280 "ca", // Canada 281 "cc", // Cocos (Keeling) Islands 282 "cd", // Democratic Republic of the Congo (formerly Zaire) 283 "cf", // Central African Republic 284 "cg", // Republic of the Congo 285 "ch", // Switzerland 286 "ci", // Côte d'Ivoire 287 "ck", // Cook Islands 288 "cl", // Chile 289 "cm", // Cameroon 290 "cn", // China, mainland 291 "co", // Colombia 292 "cr", // Costa Rica 293 "cu", // Cuba 294 "cv", // Cape Verde 295 "cx", // Christmas Island 296 "cy", // Cyprus 297 "cz", // Czech Republic 298 "de", // Germany 299 "dj", // Djibouti 300 "dk", // Denmark 301 "dm", // Dominica 302 "do", // Dominican Republic 303 "dz", // Algeria 304 "ec", // Ecuador 305 "ee", // Estonia 306 "eg", // Egypt 307 "er", // Eritrea 308 "es", // Spain 309 "et", // Ethiopia 310 "eu", // European Union 311 "fi", // Finland 312 "fj", // Fiji 313 "fk", // Falkland Islands 314 "fm", // Federated States of Micronesia 315 "fo", // Faroe Islands 316 "fr", // France 317 "ga", // Gabon 318 "gb", // Great Britain (United Kingdom) 319 "gd", // Grenada 320 "ge", // Georgia 321 "gf", // French Guiana 322 "gg", // Guernsey 323 "gh", // Ghana 324 "gi", // Gibraltar 325 "gl", // Greenland 326 "gm", // The Gambia 327 "gn", // Guinea 328 "gp", // Guadeloupe 329 "gq", // Equatorial Guinea 330 "gr", // Greece 331 "gs", // South Georgia and the South Sandwich Islands 332 "gt", // Guatemala 333 "gu", // Guam 334 "gw", // Guinea-Bissau 335 "gy", // Guyana 336 "hk", // Hong Kong 337 "hm", // Heard Island and McDonald Islands 338 "hn", // Honduras 339 "hr", // Croatia (Hrvatska) 340 "ht", // Haiti 341 "hu", // Hungary 342 "id", // Indonesia 343 "ie", // Ireland (Éire) 344 "il", // Israel 345 "im", // Isle of Man 346 "in", // India 347 "io", // British Indian Ocean Territory 348 "iq", // Iraq 349 "ir", // Iran 350 "is", // Iceland 351 "it", // Italy 352 "je", // Jersey 353 "jm", // Jamaica 354 "jo", // Jordan 355 "jp", // Japan 356 "ke", // Kenya 357 "kg", // Kyrgyzstan 358 "kh", // Cambodia (Khmer) 359 "ki", // Kiribati 360 "km", // Comoros 361 "kn", // Saint Kitts and Nevis 362 "kp", // North Korea 363 "kr", // South Korea 364 "kw", // Kuwait 365 "ky", // Cayman Islands 366 "kz", // Kazakhstan 367 "la", // Laos (currently being marketed as the official domain for Los Angeles) 368 "lb", // Lebanon 369 "lc", // Saint Lucia 370 "li", // Liechtenstein 371 "lk", // Sri Lanka 372 "lr", // Liberia 373 "ls", // Lesotho 374 "lt", // Lithuania 375 "lu", // Luxembourg 376 "lv", // Latvia 377 "ly", // Libya 378 "ma", // Morocco 379 "mc", // Monaco 380 "md", // Moldova 381 "me", // Montenegro 382 "mg", // Madagascar 383 "mh", // Marshall Islands 384 "mk", // Republic of Macedonia 385 "ml", // Mali 386 "mm", // Myanmar 387 "mn", // Mongolia 388 "mo", // Macau 389 "mp", // Northern Mariana Islands 390 "mq", // Martinique 391 "mr", // Mauritania 392 "ms", // Montserrat 393 "mt", // Malta 394 "mu", // Mauritius 395 "mv", // Maldives 396 "mw", // Malawi 397 "mx", // Mexico 398 "my", // Malaysia 399 "mz", // Mozambique 400 "na", // Namibia 401 "nc", // New Caledonia 402 "ne", // Niger 403 "nf", // Norfolk Island 404 "ng", // Nigeria 405 "ni", // Nicaragua 406 "nl", // Netherlands 407 "no", // Norway 408 "np", // Nepal 409 "nr", // Nauru 410 "nu", // Niue 411 "nz", // New Zealand 412 "om", // Oman 413 "pa", // Panama 414 "pe", // Peru 415 "pf", // French Polynesia With Clipperton Island 416 "pg", // Papua New Guinea 417 "ph", // Philippines 418 "pk", // Pakistan 419 "pl", // Poland 420 "pm", // Saint-Pierre and Miquelon 421 "pn", // Pitcairn Islands 422 "pr", // Puerto Rico 423 "ps", // Palestinian territories (PA-controlled West Bank and Gaza Strip) 424 "pt", // Portugal 425 "pw", // Palau 426 "py", // Paraguay 427 "qa", // Qatar 428 "re", // Réunion 429 "ro", // Romania 430 "rs", // Serbia 431 "ru", // Russia 432 "rw", // Rwanda 433 "sa", // Saudi Arabia 434 "sb", // Solomon Islands 435 "sc", // Seychelles 436 "sd", // Sudan 437 "se", // Sweden 438 "sg", // Singapore 439 "sh", // Saint Helena 440 "si", // Slovenia 441 "sj", // Svalbard and Jan Mayen Islands Not in use (Norwegian dependencies; see .no) 442 "sk", // Slovakia 443 "sl", // Sierra Leone 444 "sm", // San Marino 445 "sn", // Senegal 446 "so", // Somalia 447 "sr", // Suriname 448 "st", // São Tomé and Príncipe 449 "su", // Soviet Union (deprecated) 450 "sv", // El Salvador 451 "sy", // Syria 452 "sz", // Swaziland 453 "tc", // Turks and Caicos Islands 454 "td", // Chad 455 "tf", // French Southern and Antarctic Lands 456 "tg", // Togo 457 "th", // Thailand 458 "tj", // Tajikistan 459 "tk", // Tokelau 460 "tl", // East Timor (deprecated old code) 461 "tm", // Turkmenistan 462 "tn", // Tunisia 463 "to", // Tonga 464 "tp", // East Timor 465 "tr", // Turkey 466 "tt", // Trinidad and Tobago 467 "tv", // Tuvalu 468 "tw", // Taiwan, Republic of China 469 "tz", // Tanzania 470 "ua", // Ukraine 471 "ug", // Uganda 472 "uk", // United Kingdom 473 "um", // United States Minor Outlying Islands 474 "us", // United States of America 475 "uy", // Uruguay 476 "uz", // Uzbekistan 477 "va", // Vatican City State 478 "vc", // Saint Vincent and the Grenadines 479 "ve", // Venezuela 480 "vg", // British Virgin Islands 481 "vi", // U.S. Virgin Islands 482 "vn", // Vietnam 483 "vu", // Vanuatu 484 "wf", // Wallis and Futuna 485 "ws", // Samoa (formerly Western Samoa) 486 "ye", // Yemen 487 "yt", // Mayotte 488 "yu", // Serbia and Montenegro (originally Yugoslavia) 489 "za", // South Africa 490 "zm", // Zambia 491 "zw", // Zimbabwe 492 }; 493 494 private static final String[] LOCAL_TLDS = new String[] { 495 "localhost", // RFC2606 defined 496 "localdomain" // Also widely used as localhost.localdomain 497 }; 498 499 private static final List<String> INFRASTRUCTURE_TLD_LIST = Arrays.asList(INFRASTRUCTURE_TLDS); 500 private static final List<String> GENERIC_TLD_LIST = Arrays.asList(GENERIC_TLDS); 501 private static final List<String> COUNTRY_CODE_TLD_LIST = Arrays.asList(COUNTRY_CODE_TLDS); 502 private static final List<String> LOCAL_TLD_LIST = Arrays.asList(LOCAL_TLDS); 503}