1 /* 2 * Hunt - A high-level D Programming Language Web framework that encourages rapid development and clean, pragmatic design. 3 * 4 * Copyright (C) 2015-2019, HuntLabs 5 * 6 * Website: https://www.huntlabs.net/ 7 * 8 * Licensed under the Apache-2.0 License. 9 * 10 */ 11 12 module hunt.framework.util.Url; 13 /** 14 * A URL handling library. 15 * 16 * URLs are Unique Resource Locators. They consist of a scheme and a host, with some optional 17 * elements like port, path, username, and password. 18 * 19 * This module aims to make it simple to muck about with them. 20 * 21 * Example usage: 22 * --- 23 * auto url = "ssh://me:password@192.168.0.8/".parseURL; 24 * auto files = system("ssh", url.toString, "ls").splitLines; 25 * foreach (file; files) { 26 * system("scp", url ~ file, "."); 27 * } 28 * --- 29 * 30 * License: The MIT license. 31 */ 32 33 import std.conv; 34 import std.string; 35 import std.exception; 36 37 38 @safe: 39 40 /// An exception thrown when something bad happens with URLs. 41 class URLException : Exception 42 { 43 mixin basicExceptionCtors; 44 } 45 46 /** 47 * A mapping from schemes to their default ports. 48 * 49 * This is not exhaustive. Not all schemes use ports. Not all schemes uniquely identify a port to 50 * use even if they use ports. Entries here should be treated as best guesses. 51 */ 52 enum ushort[string] schemeToDefaultPort = [ 53 "aaa": 3868, 54 "aaas": 5658, 55 "acap": 674, 56 "amqp": 5672, 57 "cap": 1026, 58 "coap": 5683, 59 "coaps": 5684, 60 "dav": 443, 61 "dict": 2628, 62 "ftp": 21, 63 "git": 9418, 64 "go": 1096, 65 "gopher": 70, 66 "http": 80, 67 "https": 443, 68 "ws": 80, 69 "wss": 443, 70 "iac": 4569, 71 "icap": 1344, 72 "imap": 143, 73 "ipp": 631, 74 "ipps": 631, // yes, they're both mapped to port 631 75 "irc": 6667, // De facto default port, not the IANA reserved port. 76 "ircs": 6697, 77 "iris": 702, // defaults to iris.beep 78 "iris.beep": 702, 79 "iris.lwz": 715, 80 "iris.xpc": 713, 81 "iris.xpcs": 714, 82 "jabber": 5222, // client-to-server 83 "ldap": 389, 84 "ldaps": 636, 85 "msrp": 2855, 86 "msrps": 2855, 87 "mtqp": 1038, 88 "mupdate": 3905, 89 "news": 119, 90 "nfs": 2049, 91 "pop": 110, 92 "redis": 6379, 93 "reload": 6084, 94 "rsync": 873, 95 "rtmfp": 1935, 96 "rtsp": 554, 97 "shttp": 80, 98 "sieve": 4190, 99 "sip": 5060, 100 "sips": 5061, 101 "smb": 445, 102 "smtp": 25, 103 "snews": 563, 104 "snmp": 161, 105 "soap.beep": 605, 106 "ssh": 22, 107 "stun": 3478, 108 "stuns": 5349, 109 "svn": 3690, 110 "teamspeak": 9987, 111 "telnet": 23, 112 "tftp": 69, 113 "tip": 3372, 114 "mysql": 3306, 115 "postgresql": 5432, 116 ]; 117 118 /** 119 * A Unique Resource Locator. 120 * 121 * URLs can be parsed (see parseURL) and implicitly convert to strings. 122 */ 123 struct URL 124 { 125 hash_t toHash() const @safe nothrow 126 { 127 return asTuple().toHash(); 128 } 129 130 /// The URL scheme. For instance, ssh, ftp, or https. 131 string scheme; 132 133 /// The username in this URL. Usually absent. If present, there will also be a password. 134 string user; 135 136 /// The password in this URL. Usually absent. 137 string pass; 138 139 /// The hostname. 140 string host; 141 142 string[string] queryArr; 143 144 /** 145 * The port. 146 * 147 * This is inferred from the scheme if it isn't present in the URL itself. 148 * If the scheme is not known and the port is not present, the port will be given as 0. 149 * For some schemes, port will not be sensible -- for instance, file or chrome-extension. 150 * 151 * If you explicitly need to detect whether the user provided a port, check the providedPort 152 * field. 153 */ 154 @property ushort port() const nothrow 155 { 156 if (providedPort != 0) { 157 return providedPort; 158 } 159 if (auto p = scheme in schemeToDefaultPort) { 160 return *p; 161 } 162 return 0; 163 } 164 165 /** 166 * Set the port. 167 * 168 * This sets the providedPort field and is provided for convenience. 169 */ 170 @property ushort port(ushort value) nothrow 171 { 172 return providedPort = value; 173 } 174 175 /// The port that was explicitly provided in the URL. 176 ushort providedPort; 177 178 /** 179 * The path. 180 * 181 * For instance, in the URL https://cnn.com/news/story/17774?visited=false, the path is 182 * "/news/story/17774". 183 */ 184 string path; 185 186 /** 187 * The query parameters associated with this URL. 188 */ 189 string query; 190 191 /** 192 * The fragment. In web documents, this typically refers to an anchor element. 193 * For instance, in the URL https://cnn.com/news/story/17774#header2, the fragment is "header2". 194 */ 195 string fragment; 196 197 /** 198 * Convert this URL to a string. 199 * The string is properly formatted and usable for, eg, a web request. 200 */ 201 string toString() const 202 { 203 return toString(false); 204 } 205 206 /** 207 * Convert this URL to a string. 208 * 209 * The string is intended to be human-readable rather than machine-readable. 210 */ 211 string toHumanReadableString() const 212 { 213 return toString(true); 214 } 215 216 /// 217 unittest 218 { 219 auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL; 220 assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye", url.toString); 221 assert(url.toHumanReadableString == "https://☂.☃.org/?hi=bye", url.toString); 222 } 223 224 unittest 225 { 226 assert("http://example.org/some_path".parseURL.toHumanReadableString == 227 "http://example.org/some_path"); 228 } 229 230 private string toString(bool humanReadable) const 231 { 232 import std.array : Appender; 233 Appender!string s; 234 s ~= scheme; 235 s ~= "://"; 236 if (user) { 237 s ~= humanReadable ? user : user.percentEncode; 238 s ~= ":"; 239 s ~= humanReadable ? pass : pass.percentEncode; 240 s ~= "@"; 241 } 242 s ~= humanReadable ? host : host.toPuny; 243 if (providedPort) { 244 if ((scheme in schemeToDefaultPort) == null || schemeToDefaultPort[scheme] != providedPort) { 245 s ~= ":"; 246 s ~= providedPort.to!string; 247 } 248 } 249 string p = path; 250 if (p.length == 0 || p == "/") { 251 s ~= '/'; 252 } else { 253 if (humanReadable) { 254 s ~= p; 255 } else { 256 if (p[0] == '/') { 257 p = p[1..$]; 258 } 259 foreach (part; p.split('/')) { 260 s ~= '/'; 261 s ~= part.percentEncode; 262 } 263 } 264 } 265 if (query.length) { 266 s ~= '?'; 267 s ~= query; 268 } 269 if (fragment) { 270 s ~= '#'; 271 s ~= fragment.percentEncode; 272 } 273 return s.data; 274 } 275 276 /// Implicitly convert URLs to strings. 277 alias toString this; 278 279 /** 280 Compare two URLs. 281 282 I tried to make the comparison produce a sort order that seems natural, so it's not identical 283 to sorting based on .toString(). For instance, username/password have lower priority than 284 host. The scheme has higher priority than port but lower than host. 285 286 While the output of this is guaranteed to provide a total ordering, and I've attempted to make 287 it human-friendly, it isn't guaranteed to be consistent between versions. The implementation 288 and its results can change without a minor version increase. 289 */ 290 int opCmp(const URL other) const 291 { 292 return asTuple.opCmp(other.asTuple); 293 } 294 295 private auto asTuple() const nothrow 296 { 297 import std.typecons : tuple; 298 return tuple(host, scheme, port, user, pass, path, query); 299 } 300 301 /// Equality checks. 302 bool opEquals(string other) const 303 { 304 URL o; 305 if (!tryParseURL(other, o)) 306 { 307 return false; 308 } 309 return asTuple() == o.asTuple(); 310 } 311 312 /// Ditto 313 bool opEquals(ref const URL other) const 314 { 315 return asTuple() == other.asTuple(); 316 } 317 318 /// Ditto 319 bool opEquals(const URL other) const 320 { 321 return asTuple() == other.asTuple(); 322 } 323 324 unittest 325 { 326 import std.algorithm, std.array, std.format; 327 assert("http://example.org/some_path".parseURL > "http://example.org/other_path".parseURL); 328 alias sorted = std.algorithm.sort; 329 auto parsedURLs = 330 [ 331 "http://example.org/some_path", 332 "http://example.org:81/other_path", 333 "http://example.org/other_path", 334 "https://example.org/first_path", 335 "http://example.xyz/other_other_path", 336 "http://me:secret@blog.ikeran.org/wp_admin", 337 ].map!(x => x.parseURL).array; 338 auto urls = sorted(parsedURLs).map!(x => x.toHumanReadableString).array; 339 auto expected = 340 [ 341 "http://me:secret@blog.ikeran.org/wp_admin", 342 "http://example.org/other_path", 343 "http://example.org/some_path", 344 "http://example.org:81/other_path", 345 "https://example.org/first_path", 346 "http://example.xyz/other_other_path", 347 ]; 348 assert(cmp(urls, expected) == 0, "expected:\n%s\ngot:\n%s".format(expected, urls)); 349 } 350 351 unittest 352 { 353 auto a = "http://x.org/a?b=c".parseURL; 354 auto b = "http://x.org/a?d=e".parseURL; 355 auto c = "http://x.org/a?b=a".parseURL; 356 assert(a < b); 357 assert(c < b); 358 assert(c < a); 359 } 360 361 /** 362 * The append operator (~). 363 * 364 * The append operator for URLs returns a new URL with the given string appended as a path 365 * element to the URL's path. It only adds new path elements (or sequences of path elements). 366 * 367 * Don't worry about path separators; whether you include them or not, it will just work. 368 * 369 * Query elements are copied. 370 * 371 * Examples: 372 * --- 373 * auto random = "http://testdata.org/random".parseURL; 374 * auto randInt = random ~ "int"; 375 * writeln(randInt); // prints "http://testdata.org/random/int" 376 * --- 377 */ 378 URL opBinary(string op : "~")(string subsequentPath) { 379 URL other = this; 380 other ~= subsequentPath; 381 return other; 382 } 383 384 /** 385 * The append-in-place operator (~=). 386 * 387 * The append operator for URLs adds a path element to this URL. It only adds new path elements 388 * (or sequences of path elements). 389 * 390 * Don't worry about path separators; whether you include them or not, it will just work. 391 * 392 * Examples: 393 * --- 394 * auto random = "http://testdata.org/random".parseURL; 395 * random ~= "int"; 396 * writeln(random); // prints "http://testdata.org/random/int" 397 * --- 398 */ 399 URL opOpAssign(string op : "~")(string subsequentPath) { 400 if (path.endsWith("/")) { 401 if (subsequentPath.startsWith("/")) { 402 path ~= subsequentPath[1..$]; 403 } else { 404 path ~= subsequentPath; 405 } 406 } else { 407 if (!subsequentPath.startsWith("/")) { 408 path ~= '/'; 409 } 410 path ~= subsequentPath; 411 } 412 return this; 413 } 414 415 /** 416 * Convert a relative URL to an absolute URL. 417 * 418 * This is designed so that you can scrape a webpage and quickly convert links within the 419 * page to URLs you can actually work with, but you're clever; I'm sure you'll find more uses 420 * for it. 421 * 422 * It's biased toward HTTP family URLs; as one quirk, "//" is interpreted as "same scheme, 423 * different everything else", which might not be desirable for all schemes. 424 * 425 * This only handles URLs, not URIs; if you pass in 'mailto:bob.dobbs@subgenius.org', for 426 * instance, this will give you our best attempt to parse it as a URL. 427 * 428 * Examples: 429 * --- 430 * auto base = "https://example.org/passworddb?secure=false".parseURL; 431 * 432 * // Download https://example.org/passworddb/by-username/dhasenan 433 * download(base.resolve("by-username/dhasenan")); 434 * 435 * // Download https://example.org/static/style.css 436 * download(base.resolve("/static/style.css")); 437 * 438 * // Download https://cdn.example.net/jquery.js 439 * download(base.resolve("https://cdn.example.net/jquery.js")); 440 * --- 441 */ 442 URL resolve(string other) 443 { 444 if (other.length == 0) return this; 445 if (other[0] == '/') 446 { 447 if (other.length > 1 && other[1] == '/') 448 { 449 // Uncommon syntax: a link like "//wikimedia.org" means "same scheme, switch URL" 450 return parseURL(this.scheme ~ ':' ~ other); 451 } 452 } 453 else 454 { 455 auto schemeSep = other.indexOf("://"); 456 if (schemeSep >= 0 && schemeSep < other.indexOf("/")) 457 // separate URL 458 { 459 return other.parseURL; 460 } 461 } 462 463 URL ret = this; 464 ret.path = ""; 465 if (other[0] != '/') 466 { 467 // relative to something 468 if (!this.path.length) 469 { 470 // nothing to be relative to 471 other = "/" ~ other; 472 } 473 else if (this.path[$-1] == '/') 474 { 475 // directory-style path for the current thing 476 // resolve relative to this directory 477 other = this.path ~ other; 478 } 479 else 480 { 481 // this is a file-like thing 482 // find the 'directory' and relative to that 483 other = this.path[0..this.path.lastIndexOf('/') + 1] ~ other; 484 } 485 } 486 // collapse /foo/../ to / 487 if (other.indexOf("/../") >= 0) 488 { 489 import std.array : Appender, array; 490 import std.string : split; 491 import std.algorithm.iteration : joiner, filter; 492 string[] parts = other.split('/'); 493 for (int i = 0; i < parts.length; i++) 494 { 495 if (parts[i] == "..") 496 { 497 for (int j = i - 1; j >= 0; j--) 498 { 499 if (parts[j] != null) 500 { 501 parts[j] = null; 502 parts[i] = null; 503 break; 504 } 505 } 506 } 507 } 508 other = "/" ~ parts.filter!(x => x != null).joiner("/").to!string; 509 } 510 parsePathAndQuery(ret, other); 511 return ret; 512 } 513 514 unittest 515 { 516 auto a = "http://alcyius.com/dndtools/index.html".parseURL; 517 auto b = a.resolve("contacts/index.html"); 518 assert(b.toString == "http://alcyius.com/dndtools/contacts/index.html"); 519 } 520 521 unittest 522 { 523 auto a = "http://alcyius.com/dndtools/index.html?a=b".parseURL; 524 auto b = a.resolve("contacts/index.html?foo=bar"); 525 assert(b.toString == "http://alcyius.com/dndtools/contacts/index.html?foo=bar"); 526 } 527 528 unittest 529 { 530 auto a = "http://alcyius.com/dndtools/index.html".parseURL; 531 auto b = a.resolve("../index.html"); 532 assert(b.toString == "http://alcyius.com/index.html", b.toString); 533 } 534 535 unittest 536 { 537 auto a = "http://alcyius.com/dndtools/foo/bar/index.html".parseURL; 538 auto b = a.resolve("../index.html"); 539 assert(b.toString == "http://alcyius.com/dndtools/foo/index.html", b.toString); 540 } 541 } 542 543 /** 544 * Parse a URL from a string. 545 * 546 * This attempts to parse a wide range of URLs as people might actually type them. Some mistakes 547 * may be made. However, any URL in a correct format will be parsed correctly. 548 */ 549 bool tryParseURL(string value, out URL url) 550 { 551 url = URL.init; 552 // scheme:[//[user:password@]host[:port]][/]path[?query][#fragment] 553 // Scheme is optional in common use. We infer 'http' if it's not given. 554 auto i = value.indexOf("//"); 555 if (i > -1) { 556 if (i > 1) { 557 url.scheme = value[0..i-1]; 558 } 559 value = value[i+2 .. $]; 560 } else { 561 url.scheme = "http"; 562 } 563 // Check for an ipv6 hostname. 564 // [user:password@]host[:port]][/]path[?query][#fragment 565 i = value.indexOfAny([':', '/', '[']); 566 if (i == -1) { 567 // Just a hostname. 568 url.host = value.fromPuny; 569 return true; 570 } 571 572 if (value[i] == ':') { 573 // This could be between username and password, or it could be between host and port. 574 auto j = value.indexOfAny(['@', '/']); 575 if (j > -1 && value[j] == '@') { 576 try { 577 url.user = value[0..i].percentDecode; 578 url.pass = value[i+1 .. j].percentDecode; 579 } catch (URLException) { 580 return false; 581 } 582 value = value[j+1 .. $]; 583 } 584 } 585 586 // It's trying to be a host/port, not a user/pass. 587 i = value.indexOfAny([':', '/', '[']); 588 if (i == -1) { 589 url.host = value.fromPuny; 590 return true; 591 } 592 593 // Find the hostname. It's either an ipv6 address (which has special rules) or not (which doesn't 594 // have special rules). -- The main sticking point is that ipv6 addresses have colons, which we 595 // handle specially, and are offset with square brackets. 596 if (value[i] == '[') { 597 auto j = value[i..$].indexOf(']'); 598 if (j < 0) { 599 // unterminated ipv6 addr 600 return false; 601 } 602 // includes square brackets 603 url.host = value[i .. i+j+1]; 604 value = value[i+j+1 .. $]; 605 if (value.length == 0) { 606 // read to end of string; we finished parse 607 return true; 608 } 609 if (value[0] != ':' && value[0] != '?' && value[0] != '/') { 610 return false; 611 } 612 } else { 613 // Normal host. 614 url.host = value[0..i].fromPuny; 615 value = value[i .. $]; 616 } 617 618 if (value[0] == ':') { 619 auto end = value.indexOf('/'); 620 if (end == -1) { 621 end = value.length; 622 } 623 try { 624 url.port = value[1 .. end].to!ushort; 625 } catch (ConvException) { 626 return false; 627 } 628 value = value[end .. $]; 629 if (value.length == 0) { 630 return true; 631 } 632 } 633 return parsePathAndQuery(url, value); 634 } 635 636 private bool parsePathAndQuery(ref URL url, string value) 637 { 638 auto i = value.indexOfAny("?#"); 639 if (i == -1) 640 { 641 url.path = value.percentDecode; 642 return true; 643 } 644 645 try 646 { 647 url.path = value[0..i].percentDecode; 648 } 649 catch (URLException) 650 { 651 return false; 652 } 653 654 auto c = value[i]; 655 value = value[i + 1 .. $]; 656 if (c == '?') 657 { 658 i = value.indexOf('#'); 659 string query; 660 if (i < 0) 661 { 662 query = value; 663 value = null; 664 } 665 else 666 { 667 query = value[0..i]; 668 value = value[i + 1 .. $]; 669 } 670 url.query = query; 671 672 foreach(kv; query.split('&')) 673 { 674 auto pos = kv.indexOf('='); 675 if(pos != - 1) 676 { 677 url.queryArr[kv[0 .. pos]] = kv[pos+1 .. $]; 678 } 679 } 680 } 681 682 try 683 { 684 url.fragment = value.percentDecode; 685 } 686 catch (URLException) 687 { 688 return false; 689 } 690 691 return true; 692 } 693 694 unittest { 695 { 696 // Basic. 697 URL url; 698 with (url) { 699 scheme = "https"; 700 host = "example.org"; 701 path = "/foo/bar"; 702 //queryParams.add("hello", "world"); 703 //queryParams.add("gibe", "clay"); 704 fragment = "frag"; 705 } 706 //assert( 707 // Not sure what order it'll come out in. 708 //url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" || 709 //url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag", 710 //url.toString); 711 } 712 { 713 // Percent encoded. 714 URL url; 715 with (url) { 716 scheme = "https"; 717 host = "example.org"; 718 path = "/f☃o"; 719 //queryParams.add("❄", "❀"); 720 //queryParams.add("[", "]"); 721 fragment = "ş"; 722 } 723 //assert( 724 // Not sure what order it'll come out in. 725 //url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" || 726 //url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F", 727 //url.toString); 728 } 729 { 730 // Port, user, pass. 731 URL url; 732 with (url) { 733 scheme = "https"; 734 host = "example.org"; 735 user = "dhasenan"; 736 pass = "itsasecret"; 737 port = 17; 738 } 739 assert( 740 url.toString == "https://dhasenan:itsasecret@example.org:17/", 741 url.toString); 742 } 743 { 744 // Query with no path. 745 URL url; 746 with (url) { 747 scheme = "https"; 748 host = "example.org"; 749 //queryParams.add("hi", "bye"); 750 } 751 //assert( 752 //url.toString == "https://example.org/?hi=bye", 753 //url.toString); 754 } 755 } 756 757 unittest 758 { 759 auto url = "//foo/bar".parseURL; 760 assert(url.host == "foo", "expected host foo, got " ~ url.host); 761 assert(url.path == "/bar"); 762 } 763 764 unittest 765 { 766 // ipv6 hostnames! 767 { 768 // full range of data 769 auto url = parseURL("https://bob:secret@[::1]:2771/foo/bar"); 770 assert(url.scheme == "https", url.scheme); 771 assert(url.user == "bob", url.user); 772 assert(url.pass == "secret", url.pass); 773 assert(url.host == "[::1]", url.host); 774 assert(url.port == 2771, url.port.to!string); 775 assert(url.path == "/foo/bar", url.path); 776 } 777 778 // minimal 779 { 780 auto url = parseURL("[::1]"); 781 assert(url.host == "[::1]", url.host); 782 } 783 784 // some random bits 785 { 786 auto url = parseURL("http://[::1]/foo"); 787 assert(url.scheme == "http", url.scheme); 788 assert(url.host == "[::1]", url.host); 789 assert(url.path == "/foo", url.path); 790 } 791 792 { 793 auto url = parseURL("https://[2001:0db8:0:0:0:0:1428:57ab]/?login=true#justkidding"); 794 assert(url.scheme == "https"); 795 assert(url.host == "[2001:0db8:0:0:0:0:1428:57ab]"); 796 assert(url.path == "/"); 797 assert(url.fragment == "justkidding"); 798 } 799 } 800 801 unittest 802 { 803 auto url = "localhost:5984".parseURL; 804 auto url2 = url ~ "db1"; 805 assert(url2.toString == "http://localhost:5984/db1", url2.toString); 806 auto url3 = url2 ~ "_all_docs"; 807 assert(url3.toString == "http://localhost:5984/db1/_all_docs", url3.toString); 808 } 809 810 /// 811 unittest { 812 { 813 // Basic. 814 URL url; 815 with (url) { 816 scheme = "https"; 817 host = "example.org"; 818 path = "/foo/bar"; 819 //queryParams.add("hello", "world"); 820 //queryParams.add("gibe", "clay"); 821 fragment = "frag"; 822 } 823 //assert( 824 // Not sure what order it'll come out in. 825 //url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" || 826 //url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag", 827 //url.toString); 828 } 829 { 830 // Passing an array of query values. 831 URL url; 832 with (url) { 833 scheme = "https"; 834 host = "example.org"; 835 path = "/foo/bar"; 836 //queryParams.add("hello", "world"); 837 //queryParams.add("hello", "aether"); 838 fragment = "frag"; 839 } 840 //assert( 841 // Not sure what order it'll come out in. 842 //url.toString == "https://example.org/foo/bar?hello=world&hello=aether#frag" || 843 //url.toString == "https://example.org/foo/bar?hello=aether&hello=world#frag", 844 //url.toString); 845 } 846 { 847 // Percent encoded. 848 URL url; 849 with (url) { 850 scheme = "https"; 851 host = "example.org"; 852 path = "/f☃o"; 853 //queryParams.add("❄", "❀"); 854 //queryParams.add("[", "]"); 855 fragment = "ş"; 856 } 857 //assert( 858 // Not sure what order it'll come out in. 859 //url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" || 860 //url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F", 861 //url.toString); 862 } 863 { 864 // Port, user, pass. 865 URL url; 866 with (url) { 867 scheme = "https"; 868 host = "example.org"; 869 user = "dhasenan"; 870 pass = "itsasecret"; 871 port = 17; 872 } 873 assert( 874 url.toString == "https://dhasenan:itsasecret@example.org:17/", 875 url.toString); 876 } 877 { 878 // Query with no path. 879 URL url; 880 with (url) { 881 scheme = "https"; 882 host = "example.org"; 883 //queryParams.add("hi", "bye"); 884 } 885 //assert( 886 //url.toString == "https://example.org/?hi=bye", 887 //url.toString); 888 } 889 } 890 891 unittest { 892 // Percent decoding. 893 894 // http://#:!:@ 895 auto urlString = "http://%23:%21%3A@example.org/%7B/%7D?%3B&%26=%3D#%23hash%EF%BF%BD"; 896 auto url = urlString.parseURL; 897 assert(url.user == "#"); 898 assert(url.pass == "!:"); 899 assert(url.host == "example.org"); 900 assert(url.path == "/{/}"); 901 //assert(url.queryParams[";"].front == ""); 902 //assert(url.queryParams["&"].front == "="); 903 assert(url.fragment == "#hash�"); 904 905 // Round trip. 906 assert(urlString == urlString.parseURL.toString, urlString.parseURL.toString); 907 assert(urlString == urlString.parseURL.toString.parseURL.toString); 908 } 909 910 unittest { 911 auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL; 912 assert(url.host == "☂.☃.org", url.host); 913 } 914 915 unittest { 916 auto url = "https://☂.☃.org/?hi=bye".parseURL; 917 assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye"); 918 } 919 920 /// 921 unittest { 922 // There's an existing path. 923 auto url = parseURL("http://example.org/foo"); 924 URL url2; 925 // No slash? Assume it needs a slash. 926 assert((url ~ "bar").toString == "http://example.org/foo/bar"); 927 // With slash? Don't add another. 928 url2 = url ~ "/bar"; 929 assert(url2.toString == "http://example.org/foo/bar", url2.toString); 930 url ~= "bar"; 931 assert(url.toString == "http://example.org/foo/bar"); 932 933 // Path already ends with a slash; don't add another. 934 url = parseURL("http://example.org/foo/"); 935 assert((url ~ "bar").toString == "http://example.org/foo/bar"); 936 // Still don't add one even if you're appending with a slash. 937 assert((url ~ "/bar").toString == "http://example.org/foo/bar"); 938 url ~= "/bar"; 939 assert(url.toString == "http://example.org/foo/bar"); 940 941 // No path. 942 url = parseURL("http://example.org"); 943 assert((url ~ "bar").toString == "http://example.org/bar"); 944 assert((url ~ "/bar").toString == "http://example.org/bar"); 945 url ~= "bar"; 946 assert(url.toString == "http://example.org/bar"); 947 948 // Path is just a slash. 949 url = parseURL("http://example.org/"); 950 assert((url ~ "bar").toString == "http://example.org/bar"); 951 assert((url ~ "/bar").toString == "http://example.org/bar"); 952 url ~= "bar"; 953 assert(url.toString == "http://example.org/bar", url.toString); 954 955 // No path, just fragment. 956 url = "ircs://irc.freenode.com/#d".parseURL; 957 assert(url.toString == "ircs://irc.freenode.com/#d", url.toString); 958 } 959 unittest 960 { 961 // basic resolve() 962 { 963 auto base = "https://example.org/this/".parseURL; 964 assert(base.resolve("that") == "https://example.org/this/that"); 965 assert(base.resolve("/that") == "https://example.org/that"); 966 assert(base.resolve("//example.net/that") == "https://example.net/that"); 967 } 968 969 // ensure we don't preserve query params 970 { 971 auto base = "https://example.org/this?query=value&other=value2".parseURL; 972 //assert(base.resolve("that") == "https://example.org/that"); 973 //assert(base.resolve("/that") == "https://example.org/that"); 974 //assert(base.resolve("tother/that") == "https://example.org/tother/that"); 975 //assert(base.resolve("//example.net/that") == "https://example.net/that"); 976 } 977 } 978 979 980 unittest 981 { 982 import std.net.curl; 983 auto url = "http://example.org".parseURL; 984 assert(is(typeof(std.net.curl.get(url)))); 985 } 986 987 /** 988 * Parse the input string as a URL. 989 * 990 * Throws: 991 * URLException if the string was in an incorrect format. 992 */ 993 URL parseURL(string value) { 994 URL url; 995 if (tryParseURL(value, url)) { 996 return url; 997 } 998 throw new URLException("failed to parse URL " ~ value); 999 } 1000 1001 /// 1002 unittest { 1003 { 1004 // Infer scheme 1005 auto u1 = parseURL("example.org"); 1006 assert(u1.scheme == "http"); 1007 assert(u1.host == "example.org"); 1008 assert(u1.path == ""); 1009 assert(u1.port == 80); 1010 assert(u1.providedPort == 0); 1011 assert(u1.fragment == ""); 1012 } 1013 { 1014 // Simple host and scheme 1015 auto u1 = parseURL("https://example.org"); 1016 assert(u1.scheme == "https"); 1017 assert(u1.host == "example.org"); 1018 assert(u1.path == ""); 1019 assert(u1.port == 443); 1020 assert(u1.providedPort == 0); 1021 } 1022 { 1023 // With path 1024 auto u1 = parseURL("https://example.org/foo/bar"); 1025 assert(u1.scheme == "https"); 1026 assert(u1.host == "example.org"); 1027 assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path); 1028 assert(u1.port == 443); 1029 assert(u1.providedPort == 0); 1030 } 1031 { 1032 // With explicit port 1033 auto u1 = parseURL("https://example.org:1021/foo/bar"); 1034 assert(u1.scheme == "https"); 1035 assert(u1.host == "example.org"); 1036 assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path); 1037 assert(u1.port == 1021); 1038 assert(u1.providedPort == 1021); 1039 } 1040 { 1041 // With user 1042 auto u1 = parseURL("https://bob:secret@example.org/foo/bar"); 1043 assert(u1.scheme == "https"); 1044 assert(u1.host == "example.org"); 1045 assert(u1.path == "/foo/bar"); 1046 assert(u1.port == 443); 1047 assert(u1.user == "bob"); 1048 assert(u1.pass == "secret"); 1049 } 1050 { 1051 // With user, URL-encoded 1052 auto u1 = parseURL("https://bob%21:secret%21%3F@example.org/foo/bar"); 1053 assert(u1.scheme == "https"); 1054 assert(u1.host == "example.org"); 1055 assert(u1.path == "/foo/bar"); 1056 assert(u1.port == 443); 1057 assert(u1.user == "bob!"); 1058 assert(u1.pass == "secret!?"); 1059 } 1060 { 1061 // With user and port and path 1062 auto u1 = parseURL("https://bob:secret@example.org:2210/foo/bar"); 1063 assert(u1.scheme == "https"); 1064 assert(u1.host == "example.org"); 1065 assert(u1.path == "/foo/bar"); 1066 assert(u1.port == 2210); 1067 assert(u1.user == "bob"); 1068 assert(u1.pass == "secret"); 1069 assert(u1.fragment == ""); 1070 } 1071 { 1072 // With query string 1073 auto u1 = parseURL("https://example.org/?login=true"); 1074 assert(u1.scheme == "https"); 1075 assert(u1.host == "example.org"); 1076 assert(u1.path == "/", "expected path: / actual path: " ~ u1.path); 1077 //assert(u1.queryParams["login"].front == "true"); 1078 assert(u1.fragment == ""); 1079 } 1080 { 1081 // With query string and fragment 1082 auto u1 = parseURL("https://example.org/?login=true#justkidding"); 1083 assert(u1.scheme == "https"); 1084 assert(u1.host == "example.org"); 1085 assert(u1.path == "/", "expected path: / actual path: " ~ u1.path); 1086 //assert(u1.queryParams["login"].front == "true"); 1087 assert(u1.fragment == "justkidding"); 1088 } 1089 { 1090 // With URL-encoded values 1091 auto u1 = parseURL("https://example.org/%E2%98%83?%E2%9D%84=%3D#%5E"); 1092 assert(u1.scheme == "https"); 1093 assert(u1.host == "example.org"); 1094 assert(u1.path == "/☃", "expected path: /☃ actual path: " ~ u1.path); 1095 //assert(u1.queryParams["❄"].front == "="); 1096 assert(u1.fragment == "^"); 1097 } 1098 } 1099 1100 unittest { 1101 assert(parseURL("http://example.org").port == 80); 1102 assert(parseURL("http://example.org:5326").port == 5326); 1103 1104 auto url = parseURL("redis://admin:password@redisbox.local:2201/path?query=value#fragment"); 1105 assert(url.scheme == "redis"); 1106 assert(url.user == "admin"); 1107 assert(url.pass == "password"); 1108 1109 assert(parseURL("example.org").toString == "http://example.org/"); 1110 assert(parseURL("http://example.org:80").toString == "http://example.org/"); 1111 1112 assert(parseURL("localhost:8070").toString == "http://localhost:8070/"); 1113 } 1114 1115 /** 1116 * Percent-encode a string. 1117 * 1118 * URL components cannot contain non-ASCII characters, and there are very few characters that are 1119 * safe to include as URL components. Domain names using Unicode values use Punycode. For 1120 * everything else, there is percent encoding. 1121 */ 1122 string percentEncode(string raw) { 1123 // We *must* encode these characters: :/?#[]@!$&'()*+,;=" 1124 // We *can* encode any other characters. 1125 // We *should not* encode alpha, numeric, or -._~. 1126 import std.utf : encode; 1127 import std.array : Appender; 1128 Appender!string app; 1129 foreach (dchar d; raw) { 1130 if (('a' <= d && 'z' >= d) || 1131 ('A' <= d && 'Z' >= d) || 1132 ('0' <= d && '9' >= d) || 1133 d == '-' || d == '.' || d == '_' || d == '~') { 1134 app ~= d; 1135 continue; 1136 } 1137 // Something simple like a space character? Still in 7-bit ASCII? 1138 // Then we get a single-character string out of it and just encode 1139 // that one bit. 1140 // Something not in 7-bit ASCII? Then we percent-encode each octet 1141 // in the UTF-8 encoding (and hope the server understands UTF-8). 1142 char[] c; 1143 encode(c, d); 1144 auto bytes = cast(ubyte[])c; 1145 foreach (b; bytes) { 1146 app ~= format("%%%02X", b); 1147 } 1148 } 1149 return cast(string)app.data; 1150 } 1151 1152 /// 1153 unittest { 1154 assert(percentEncode("IDontNeedNoPercentEncoding") == "IDontNeedNoPercentEncoding"); 1155 assert(percentEncode("~~--..__") == "~~--..__"); 1156 assert(percentEncode("0123456789") == "0123456789"); 1157 1158 string e; 1159 1160 e = percentEncode("☃"); 1161 assert(e == "%E2%98%83", "expected %E2%98%83 but got" ~ e); 1162 } 1163 1164 /** 1165 * Percent-decode a string. 1166 * 1167 * URL components cannot contain non-ASCII characters, and there are very few characters that are 1168 * safe to include as URL components. Domain names using Unicode values use Punycode. For 1169 * everything else, there is percent encoding. 1170 * 1171 * This explicitly ensures that the result is a valid UTF-8 string. 1172 */ 1173 string percentDecode(string encoded) 1174 { 1175 import std.utf : validate, UTFException; 1176 auto raw = percentDecodeRaw(encoded); 1177 auto s = cast(string) raw; 1178 try 1179 { 1180 validate(s); 1181 } 1182 catch (UTFException e) 1183 { 1184 throw new URLException( 1185 "The percent-encoded data `" ~ encoded ~ "` does not represent a valid UTF-8 sequence."); 1186 } 1187 return s; 1188 } 1189 1190 /// 1191 unittest { 1192 assert(percentDecode("IDontNeedNoPercentDecoding") == "IDontNeedNoPercentDecoding"); 1193 assert(percentDecode("~~--..__") == "~~--..__"); 1194 assert(percentDecode("0123456789") == "0123456789"); 1195 1196 string e; 1197 1198 e = percentDecode("%E2%98%83"); 1199 assert(e == "☃", "expected a snowman but got" ~ e); 1200 1201 e = percentDecode("%e2%98%83"); 1202 assert(e == "☃", "expected a snowman but got" ~ e); 1203 1204 try { 1205 // %ES is an invalid percent sequence: 'S' is not a hex digit. 1206 percentDecode("%es"); 1207 assert(false, "expected exception not thrown"); 1208 } catch (URLException) { 1209 } 1210 1211 try { 1212 percentDecode("%e"); 1213 assert(false, "expected exception not thrown"); 1214 } catch (URLException) { 1215 } 1216 } 1217 1218 /** 1219 * Percent-decode a string into a ubyte array. 1220 * 1221 * URL components cannot contain non-ASCII characters, and there are very few characters that are 1222 * safe to include as URL components. Domain names using Unicode values use Punycode. For 1223 * everything else, there is percent encoding. 1224 * 1225 * This yields a ubyte array and will not perform validation on the output. However, an improperly 1226 * formatted input string will result in a URLException. 1227 */ 1228 immutable(ubyte)[] percentDecodeRaw(string encoded) 1229 { 1230 // We're dealing with possibly incorrectly encoded UTF-8. Mark it down as ubyte[] for now. 1231 import std.array : Appender; 1232 Appender!(immutable(ubyte)[]) app; 1233 for (int i = 0; i < encoded.length; i++) { 1234 if (encoded[i] != '%') { 1235 app ~= encoded[i]; 1236 continue; 1237 } 1238 if (i >= encoded.length - 2) { 1239 throw new URLException("Invalid percent encoded value: expected two characters after " ~ 1240 "percent symbol. Error at index " ~ i.to!string); 1241 } 1242 if (isHex(encoded[i + 1]) && isHex(encoded[i + 2])) { 1243 auto b = fromHex(encoded[i + 1]); 1244 auto c = fromHex(encoded[i + 2]); 1245 app ~= cast(ubyte)((b << 4) | c); 1246 } else { 1247 throw new URLException("Invalid percent encoded value: expected two hex digits after " ~ 1248 "percent symbol. Error at index " ~ i.to!string); 1249 } 1250 i += 2; 1251 } 1252 return app.data; 1253 } 1254 1255 private bool isHex(char c) { 1256 return ('0' <= c && '9' >= c) || 1257 ('a' <= c && 'f' >= c) || 1258 ('A' <= c && 'F' >= c); 1259 } 1260 1261 private ubyte fromHex(char s) { 1262 enum caseDiff = 'a' - 'A'; 1263 if (s >= 'a' && s <= 'z') { 1264 s -= caseDiff; 1265 } 1266 return cast(ubyte)("0123456789ABCDEF".indexOf(s)); 1267 } 1268 1269 private string toPuny(string unicodeHostname) 1270 { 1271 if (unicodeHostname[0] == '[') 1272 { 1273 // It's an ipv6 name. 1274 return unicodeHostname; 1275 } 1276 bool mustEncode = false; 1277 foreach (i, dchar d; unicodeHostname) { 1278 auto c = cast(uint) d; 1279 if (c > 0x80) { 1280 mustEncode = true; 1281 break; 1282 } 1283 if (c < 0x2C || (c >= 0x3A && c <= 40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B)) { 1284 throw new URLException( 1285 format( 1286 "domain name '%s' contains illegal character '%s' at position %s", 1287 unicodeHostname, d, i)); 1288 } 1289 } 1290 if (!mustEncode) { 1291 return unicodeHostname; 1292 } 1293 import std.algorithm.iteration : map; 1294 return unicodeHostname.split('.').map!punyEncode.join("."); 1295 } 1296 1297 private string fromPuny(string hostname) 1298 { 1299 import std.algorithm.iteration : map; 1300 return hostname.split('.').map!punyDecode.join("."); 1301 } 1302 1303 private { 1304 enum delimiter = '-'; 1305 enum marker = "xn--"; 1306 enum ulong damp = 700; 1307 enum ulong tmin = 1; 1308 enum ulong tmax = 26; 1309 enum ulong skew = 38; 1310 enum ulong base = 36; 1311 enum ulong initialBias = 72; 1312 enum dchar initialN = cast(dchar)128; 1313 1314 ulong adapt(ulong delta, ulong numPoints, bool firstTime) { 1315 if (firstTime) { 1316 delta /= damp; 1317 } else { 1318 delta /= 2; 1319 } 1320 delta += delta / numPoints; 1321 ulong k = 0; 1322 while (delta > ((base - tmin) * tmax) / 2) { 1323 delta /= (base - tmin); 1324 k += base; 1325 } 1326 return k + (((base - tmin + 1) * delta) / (delta + skew)); 1327 } 1328 } 1329 1330 /** 1331 * Encode the input string using the Punycode algorithm. 1332 * 1333 * Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked 1334 * with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com" 1335 * in Punycode, you will get "xn--m3h.xn--n3h.com". 1336 * 1337 * In order to puny-encode a domain name, you must split it into its components. The following will 1338 * typically suffice: 1339 * --- 1340 * auto domain = "☂.☃.com"; 1341 * auto encodedDomain = domain.splitter(".").map!(punyEncode).join("."); 1342 * --- 1343 */ 1344 string punyEncode(string input) 1345 { 1346 import std.array : Appender; 1347 ulong delta = 0; 1348 dchar n = initialN; 1349 auto i = 0; 1350 auto bias = initialBias; 1351 Appender!string output; 1352 output ~= marker; 1353 auto pushed = 0; 1354 auto codePoints = 0; 1355 foreach (dchar c; input) { 1356 codePoints++; 1357 if (c <= initialN) { 1358 output ~= c; 1359 pushed++; 1360 } 1361 } 1362 if (pushed < codePoints) { 1363 if (pushed > 0) { 1364 output ~= delimiter; 1365 } 1366 } else { 1367 // No encoding to do. 1368 return input; 1369 } 1370 bool first = true; 1371 while (pushed < codePoints) { 1372 auto best = dchar.max; 1373 foreach (dchar c; input) { 1374 if (n <= c && c < best) { 1375 best = c; 1376 } 1377 } 1378 if (best == dchar.max) { 1379 throw new URLException("failed to find a new codepoint to process during punyencode"); 1380 } 1381 delta += (best - n) * (pushed + 1); 1382 if (delta > uint.max) { 1383 // TODO better error message 1384 throw new URLException("overflow during punyencode"); 1385 } 1386 n = best; 1387 foreach (dchar c; input) { 1388 if (c < n) { 1389 delta++; 1390 } 1391 if (c == n) { 1392 ulong q = delta; 1393 auto k = base; 1394 while (true) { 1395 ulong t; 1396 if (k <= bias) { 1397 t = tmin; 1398 } else if (k >= bias + tmax) { 1399 t = tmax; 1400 } else { 1401 t = k - bias; 1402 } 1403 if (q < t) { 1404 break; 1405 } 1406 output ~= digitToBasic(t + ((q - t) % (base - t))); 1407 q = (q - t) / (base - t); 1408 k += base; 1409 } 1410 output ~= digitToBasic(q); 1411 pushed++; 1412 bias = adapt(delta, pushed, first); 1413 first = false; 1414 delta = 0; 1415 } 1416 } 1417 delta++; 1418 n++; 1419 } 1420 return cast(string)output.data; 1421 } 1422 1423 /** 1424 * Decode the input string using the Punycode algorithm. 1425 * 1426 * Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked 1427 * with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com" 1428 * in Punycode, you will get "xn--m3h.xn--n3h.com". 1429 * 1430 * In order to puny-decode a domain name, you must split it into its components. The following will 1431 * typically suffice: 1432 * --- 1433 * auto domain = "xn--m3h.xn--n3h.com"; 1434 * auto decodedDomain = domain.splitter(".").map!(punyDecode).join("."); 1435 * --- 1436 */ 1437 string punyDecode(string input) { 1438 if (!input.startsWith(marker)) { 1439 return input; 1440 } 1441 input = input[marker.length..$]; 1442 1443 // let n = initial_n 1444 dchar n = cast(dchar)128; 1445 1446 // let i = 0 1447 // let bias = initial_bias 1448 // let output = an empty string indexed from 0 1449 size_t i = 0; 1450 auto bias = initialBias; 1451 dchar[] output; 1452 // This reserves a bit more than necessary, but it should be more efficient overall than just 1453 // appending and inserting volo-nolo. 1454 output.reserve(input.length); 1455 1456 // consume all code points before the last delimiter (if there is one) 1457 // and copy them to output, fail on any non-basic code point 1458 // if more than zero code points were consumed then consume one more 1459 // (which will be the last delimiter) 1460 auto end = input.lastIndexOf(delimiter); 1461 if (end > -1) { 1462 foreach (dchar c; input[0..end]) { 1463 output ~= c; 1464 } 1465 input = input[end+1 .. $]; 1466 } 1467 1468 // while the input is not exhausted do begin 1469 size_t pos = 0; 1470 while (pos < input.length) { 1471 // let oldi = i 1472 // let w = 1 1473 auto oldi = i; 1474 auto w = 1; 1475 // for k = base to infinity in steps of base do begin 1476 for (ulong k = base; k < uint.max; k += base) { 1477 // consume a code point, or fail if there was none to consume 1478 // Note that the input is all ASCII, so we can simply index the input string bytewise. 1479 auto c = input[pos]; 1480 pos++; 1481 // let digit = the code point's digit-value, fail if it has none 1482 auto digit = basicToDigit(c); 1483 // let i = i + digit * w, fail on overflow 1484 i += digit * w; 1485 // let t = tmin if k <= bias {+ tmin}, or 1486 // tmax if k >= bias + tmax, or k - bias otherwise 1487 ulong t; 1488 if (k <= bias) { 1489 t = tmin; 1490 } else if (k >= bias + tmax) { 1491 t = tmax; 1492 } else { 1493 t = k - bias; 1494 } 1495 // if digit < t then break 1496 if (digit < t) { 1497 break; 1498 } 1499 // let w = w * (base - t), fail on overflow 1500 w *= (base - t); 1501 // end 1502 } 1503 // let bias = adapt(i - oldi, length(output) + 1, test oldi is 0?) 1504 bias = adapt(i - oldi, output.length + 1, oldi == 0); 1505 // let n = n + i div (length(output) + 1), fail on overflow 1506 n += i / (output.length + 1); 1507 // let i = i mod (length(output) + 1) 1508 i %= (output.length + 1); 1509 // {if n is a basic code point then fail} 1510 // (We aren't actually going to fail here; it's clear what this means.) 1511 // insert n into output at position i 1512 import std.array : insertInPlace; 1513 (() @trusted { output.insertInPlace(i, cast(dchar)n); })(); // should be @safe but isn't marked 1514 // increment i 1515 i++; 1516 // end 1517 } 1518 return output.to!string; 1519 } 1520 1521 // Lifted from punycode.js. 1522 private dchar digitToBasic(ulong digit) { 1523 return cast(dchar)(digit + 22 + 75 * (digit < 26)); 1524 } 1525 1526 // Lifted from punycode.js. 1527 private uint basicToDigit(char c) { 1528 auto codePoint = cast(uint)c; 1529 if (codePoint - 48 < 10) { 1530 return codePoint - 22; 1531 } 1532 if (codePoint - 65 < 26) { 1533 return codePoint - 65; 1534 } 1535 if (codePoint - 97 < 26) { 1536 return codePoint - 97; 1537 } 1538 return base; 1539 } 1540 1541 unittest { 1542 { 1543 auto a = "b\u00FCcher"; 1544 assert(punyEncode(a) == "xn--bcher-kva"); 1545 } 1546 { 1547 auto a = "b\u00FCc\u00FCher"; 1548 assert(punyEncode(a) == "xn--bcher-kvab"); 1549 } 1550 { 1551 auto a = "ýbücher"; 1552 auto b = punyEncode(a); 1553 assert(b == "xn--bcher-kvaf", b); 1554 } 1555 1556 { 1557 auto a = "mañana"; 1558 assert(punyEncode(a) == "xn--maana-pta"); 1559 } 1560 1561 { 1562 auto a = "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644" 1563 ~ "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F"; 1564 auto b = punyEncode(a); 1565 assert(b == "xn--egbpdaj6bu4bxfgehfvwxn", b); 1566 } 1567 import std.stdio; 1568 } 1569 1570 unittest { 1571 { 1572 auto b = punyDecode("xn--egbpdaj6bu4bxfgehfvwxn"); 1573 assert(b == "ليهمابتكلموشعربي؟", b); 1574 } 1575 { 1576 assert(punyDecode("xn--maana-pta") == "mañana"); 1577 } 1578 } 1579 1580 unittest { 1581 import std.string, std.algorithm, std.array, std.range; 1582 { 1583 auto domain = "xn--m3h.xn--n3h.com"; 1584 auto decodedDomain = domain.splitter(".").map!(punyDecode).join("."); 1585 assert(decodedDomain == "☂.☃.com", decodedDomain); 1586 } 1587 { 1588 auto domain = "☂.☃.com"; 1589 auto decodedDomain = domain.splitter(".").map!(punyEncode).join("."); 1590 assert(decodedDomain == "xn--m3h.xn--n3h.com", decodedDomain); 1591 } 1592 } 1593