1 /** 2 URL parsing routines. 3 4 Copyright: © 2012-2017 Sönke Ludwig 5 License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 6 Authors: Sönke Ludwig 7 */ 8 module dub.internal.vibecompat.inet.url; 9 10 import dub.internal.vibecompat.inet.path2; 11 import dub.internal.vibecompat.inet.urlencode; 12 13 import std.array; 14 import std.algorithm; 15 import std.conv; 16 import std.exception; 17 import std.string; 18 import std.traits : isInstanceOf; 19 import std.ascii : isAlpha, isASCII, toLower; 20 import std.uri: decode, encode; 21 22 import core.checkedint : addu; 23 24 25 /** Parses a user-provided URL with relaxed rules. 26 27 Unlike `URL.parse`, this allows the URL to use special characters as part of 28 the host name and path, automatically employing puny code or percent-encoding 29 to convert this to a valid URL. 30 31 Params: 32 url = String representation of the URL 33 default_schema = If `url` does not contain a schema name, the URL parser 34 may choose to use this schema instead. A browser might use "http" or 35 "https", for example. 36 */ 37 URL parseUserURL(string url, string default_schema) 38 { 39 if (default_schema.length && !url.startsWith("/") && !url.canFind("://")) 40 url = default_schema ~ "://" ~ url; 41 42 return URL(url, false).normalized; 43 } 44 45 unittest { 46 // special characters in path 47 auto url = parseUserURL("http://example.com/hello-🌍", "foo"); 48 assert(url.pathString == "/hello-%F0%9F%8C%8D"); 49 url = parseUserURL("http://example.com/안녕하세요-세계", "foo"); 50 assert(url.pathString == "/%EC%95%88%EB%85%95%ED%95%98%EC%84%B8%EC%9A%94-%EC%84%B8%EA%B3%84"); 51 // special characters in host name 52 url = parseUserURL("http://hello-🌍.com/", "foo"); 53 assert(url.host == "xn--hello--8k34e.com"); 54 url = parseUserURL("http://hello-🌍.com:8080/", "foo"); 55 assert(url.host == "xn--hello--8k34e.com"); 56 url = parseUserURL("http://i-❤-이모티콘.io", "foo"); 57 assert(url.host == "xn--i---5r6aq903fubqabumj4g.io"); 58 url = parseUserURL("https://hello🌍.i-❤-이모티콘.com", "foo"); 59 assert(url.host == "xn--hello-oe93d.xn--i---5r6aq903fubqabumj4g.com"); 60 // default schema addition 61 assert(parseUserURL("example.com/foo/bar", "sftp") == URL("sftp://example.com/foo/bar")); 62 assert(parseUserURL("example.com:1234", "https") == URL("https://example.com:1234/")); 63 } 64 65 66 /** 67 Represents a URL decomposed into its components. 68 */ 69 struct URL { 70 @safe: 71 private { 72 string m_schema; 73 InetPath m_path; 74 string m_host; 75 ushort m_port; 76 string m_username; 77 string m_password; 78 string m_queryString; 79 string m_anchor; 80 } 81 82 /// Constructs a new URL object from its components. 83 this(string schema, string host, ushort port, InetPath path) pure nothrow 84 in { 85 assert(isValidSchema(schema), "Invalid URL schema name: " ~ schema); 86 assert(host.length == 0 || isValidHostName(host), "Invalid URL host name: " ~ host); 87 } 88 do { 89 m_schema = schema; 90 m_host = host; 91 m_port = port; 92 m_path = path; 93 } 94 /// ditto 95 this(string schema, InetPath path) pure nothrow 96 in { assert(isValidSchema(schema)); } 97 do { 98 this(schema, null, 0, path); 99 } 100 /// ditto 101 this(string schema, string host, ushort port, PosixPath path) pure nothrow 102 in { 103 assert(isValidSchema(schema)); 104 assert(host.length == 0 || isValidHostName(host)); 105 } 106 do { 107 InetPath ip; 108 try ip = cast(InetPath)path; 109 catch (Exception e) assert(false, e.msg); // InetPath should be able to capture all paths 110 this(schema, host, port, ip); 111 } 112 /// ditto 113 this(string schema, PosixPath path) pure nothrow 114 in { assert(isValidSchema(schema)); } 115 do { 116 this(schema, null, 0, path); 117 } 118 /// ditto 119 this(string schema, string host, ushort port, WindowsPath path) pure nothrow 120 in { 121 assert(isValidSchema(schema)); 122 assert(host.length == 0 || isValidHostName(host)); 123 } 124 do { 125 InetPath ip; 126 try ip = cast(InetPath)path; 127 catch (Exception e) assert(false, e.msg); // InetPath should be able to capture all paths 128 this(schema, host, port, ip); 129 } 130 /// ditto 131 this(string schema, WindowsPath path) pure nothrow 132 in { assert(isValidSchema(schema)); } 133 do { 134 this(schema, null, 0, path); 135 } 136 137 /** Constructs a "file:" URL from a native file system path. 138 139 Note that the path must be absolute. On Windows, both, paths starting 140 with a drive letter and UNC paths are supported. 141 */ 142 this(WindowsPath path) pure 143 { 144 import std.algorithm.iteration : map; 145 import std.range : chain, only, repeat; 146 147 enforce(path.absolute, "Only absolute paths can be converted to a URL."); 148 149 // treat UNC paths properly 150 if (path.startsWith(WindowsPath(`\\`))) { 151 auto segs = path.bySegment; 152 segs.popFront(); 153 segs.popFront(); 154 auto host = segs.front.name; 155 segs.popFront(); 156 157 InetPath ip; 158 ip = InetPath(only(InetPath.Segment("", '/')) 159 .chain(segs.map!(s => cast(InetPath.Segment)s))); 160 161 this("file", host, 0, ip); 162 } else this("file", host, 0, cast(InetPath)path); 163 } 164 /// ditto 165 this(PosixPath path) pure 166 { 167 enforce(path.absolute, "Only absolute paths can be converted to a URL."); 168 169 this("file", null, 0, cast(InetPath)path); 170 } 171 172 /** Constructs a URL from its string representation. 173 174 TODO: additional validation required (e.g. valid host and user names and port) 175 */ 176 this(string url_string) 177 { 178 this(url_string, true); 179 } 180 181 private this(string url_string, bool encoded) 182 { 183 auto str = url_string; 184 enforce(str.length > 0, "Empty URL."); 185 if( str[0] != '/' ){ 186 auto idx = str.indexOf(':'); 187 enforce(idx > 0, "No schema in URL:"~str); 188 m_schema = str[0 .. idx]; 189 enforce(m_schema[0].isAlpha, 190 "Schema must start with an alphabetical char, found: " ~ 191 m_schema[0]); 192 str = str[idx+1 .. $]; 193 bool requires_host = false; 194 195 if (str.startsWith("//")) { 196 // proto://server/path style 197 requires_host = true; 198 str = str[2 .. $]; 199 } 200 201 auto si = str.indexOf('/'); 202 if( si < 0 ) si = str.length; 203 auto ai = str[0 .. si].indexOf('@'); 204 sizediff_t hs = 0; 205 if( ai >= 0 ){ 206 hs = ai+1; 207 auto ci = str[0 .. ai].indexOf(':'); 208 if( ci >= 0 ){ 209 m_username = str[0 .. ci]; 210 m_password = str[ci+1 .. ai]; 211 } else m_username = str[0 .. ai]; 212 enforce(m_username.length > 0, "Empty user name in URL."); 213 } 214 215 m_host = str[hs .. si]; 216 217 auto findPort ( string src ) 218 { 219 auto pi = src.indexOf(':'); 220 if(pi > 0) { 221 enforce(pi < src.length-1, "Empty port in URL."); 222 m_port = to!ushort(src[pi+1..$]); 223 } 224 return pi; 225 } 226 227 228 auto ip6 = m_host.indexOf('['); 229 if (ip6 == 0) { // [ must be first char 230 auto pe = m_host.indexOf(']'); 231 if (pe > 0) { 232 findPort(m_host[pe..$]); 233 m_host = m_host[1 .. pe]; 234 } 235 } 236 else { 237 auto pi = findPort(m_host); 238 if(pi > 0) { 239 m_host = m_host[0 .. pi]; 240 } 241 if (!encoded) 242 m_host = m_host.splitter('.').map!(punyEncode).join('.'); 243 } 244 245 enforce(!requires_host || m_schema == "file" || m_host.length > 0, 246 "Empty server name in URL."); 247 str = str[si .. $]; 248 } 249 250 this.localURI = (encoded) ? str : str.encode; 251 } 252 /// ditto 253 static URL parse(string url_string) 254 { 255 return URL(url_string); 256 } 257 /// ditto 258 static URL fromString(string url_string) 259 { 260 return URL(url_string); 261 } 262 263 /// The schema/protocol part of the URL 264 @property string schema() const nothrow { return m_schema; } 265 /// ditto 266 @property void schema(string v) { m_schema = v; } 267 268 /// The url encoded path part of the URL 269 @property string pathString() const nothrow { return m_path.toString; } 270 271 /// Set the path part of the URL. It should be properly encoded. 272 @property void pathString(string s) 273 { 274 m_path = InetPath(s); 275 } 276 277 /// The path part of the URL 278 @property InetPath path() const nothrow { return m_path; } 279 /// ditto 280 @property void path(InetPath p) 281 nothrow { 282 m_path = p; 283 } 284 /// ditto 285 @property void path(Path)(Path p) 286 if (isInstanceOf!(GenericPath, Path) && !is(Path == InetPath)) 287 { 288 m_path = cast(InetPath)p; 289 } 290 291 /// The host part of the URL (depends on the schema) 292 @property string host() const pure nothrow { return m_host; } 293 /// ditto 294 @property void host(string v) { m_host = v; } 295 296 /// The port part of the URL (optional) 297 @property ushort port() const nothrow { return m_port ? m_port : defaultPort(m_schema); } 298 /// ditto 299 @property port(ushort v) nothrow { m_port = v; } 300 301 /// Get the default port for the given schema or 0 302 static ushort defaultPort(string schema) 303 nothrow { 304 import core.atomic : atomicLoad; 305 import std.uni : toLower; 306 307 string lowerschema; 308 309 try 310 lowerschema = schema.toLower(); 311 catch (Exception e) 312 assert(false, e.msg); 313 314 if (auto set = atomicLoad(map_commonInternetSchemas)) 315 if (set.contains(lowerschema)) 316 return set.get(lowerschema); 317 318 return 0; 319 } 320 /// ditto 321 ushort defaultPort() 322 const nothrow { 323 return defaultPort(m_schema); 324 } 325 326 /// The user name part of the URL (optional) 327 @property string username() const nothrow { return m_username; } 328 /// ditto 329 @property void username(string v) { m_username = v; } 330 331 /// The password part of the URL (optional) 332 @property string password() const nothrow { return m_password; } 333 /// ditto 334 @property void password(string v) { m_password = v; } 335 336 /// The query string part of the URL (optional) 337 @property string queryString() const nothrow { return m_queryString; } 338 /// ditto 339 @property void queryString(string v) { m_queryString = v; } 340 341 /// The anchor part of the URL (optional) 342 @property string anchor() const nothrow { return m_anchor; } 343 /// ditto 344 @property void anchor(string v) nothrow { m_anchor = v; } 345 346 /// The path part plus query string and anchor 347 @property string localURI() 348 const nothrow { 349 auto str = appender!string(); 350 str.put(m_path.toString); 351 if( queryString.length ) { 352 str.put("?"); 353 str.put(queryString); 354 } 355 if( anchor.length ) { 356 str.put("#"); 357 str.put(anchor); 358 } 359 return str.data; 360 } 361 /// ditto 362 @property void localURI(string str) 363 { 364 auto ai = str.indexOf('#'); 365 if( ai >= 0 ){ 366 m_anchor = str[ai+1 .. $]; 367 str = str[0 .. ai]; 368 } else m_anchor = null; 369 370 auto qi = str.indexOf('?'); 371 if( qi >= 0 ){ 372 m_queryString = str[qi+1 .. $]; 373 str = str[0 .. qi]; 374 } else m_queryString = null; 375 376 this.pathString = str; 377 } 378 379 /// The URL to the parent path with query string and anchor stripped. 380 @property URL parentURL() 381 const { 382 URL ret; 383 ret.schema = schema; 384 ret.host = host; 385 ret.port = port; 386 ret.username = username; 387 ret.password = password; 388 ret.path = path.parentPath; 389 return ret; 390 } 391 392 /// Converts this URL object to its string representation. 393 string toString() 394 const nothrow { 395 auto dst = appender!string(); 396 try this.toString(dst); 397 catch (Exception e) assert(false, e.msg); 398 return dst.data; 399 } 400 401 /// Ditto 402 void toString(OutputRange) (ref OutputRange dst) const { 403 import std.format; 404 dst.put(schema); 405 dst.put(":"); 406 if (isCommonInternetSchema(schema)) 407 dst.put("//"); 408 if (m_username.length || m_password.length) { 409 dst.put(username); 410 if (m_password.length) 411 { 412 dst.put(':'); 413 dst.put(password); 414 } 415 dst.put('@'); 416 } 417 418 import std.algorithm : canFind; 419 auto ipv6 = host.canFind(":"); 420 421 if ( ipv6 ) dst.put('['); 422 dst.put(host); 423 if ( ipv6 ) dst.put(']'); 424 425 if (m_port > 0) 426 formattedWrite(dst, ":%d", m_port); 427 428 dst.put(localURI); 429 } 430 431 /** Converts a "file" URL back to a native file system path. 432 */ 433 NativePath toNativePath() 434 const { 435 import std.algorithm.iteration : map; 436 import std.range : dropOne; 437 438 enforce(this.schema == "file", "Only file:// URLs can be converted to a native path."); 439 440 version (Windows) { 441 if (this.host.length) { 442 auto p = NativePath(this.path 443 .bySegment 444 .dropOne 445 .map!(s => cast(WindowsPath.Segment)s) 446 ); 447 return NativePath.fromTrustedString(`\\`~this.host) ~ p; 448 } 449 } 450 451 return cast(NativePath)this.path; 452 } 453 454 /// Decode percent encoded triplets for unreserved or convert to uppercase 455 private string normalize_percent_encoding(scope const(char)[] input) 456 { 457 auto normalized = appender!string; 458 normalized.reserve(input.length); 459 460 for (size_t i = 0; i < input.length; i++) 461 { 462 const char c = input[i]; 463 if (c == '%') 464 { 465 if (input.length < i + 3) 466 assert(false, "Invalid percent encoding"); 467 468 char conv = cast(char) input[i + 1 .. i + 3].to!ubyte(16); 469 switch (conv) 470 { 471 case 'A': .. case 'Z': 472 case 'a': .. case 'z': 473 case '0': .. case '9': 474 case '-': case '.': case '_': case '~': 475 normalized ~= conv; // Decode unreserved 476 break; 477 default: 478 normalized ~= input[i .. i + 3].toUpper(); // Uppercase HEX 479 break; 480 } 481 482 i += 2; 483 } 484 else 485 normalized ~= c; 486 } 487 488 return normalized.data; 489 } 490 491 /** 492 * Normalize the content of this `URL` in place 493 * 494 * Normalization can be used to create a more consistent and human-friendly 495 * string representation of the `URL`. 496 * The list of transformations applied in the process of normalization is as follows: 497 - Converting schema and host to lowercase 498 - Removing port if it is the default port for schema 499 - Removing dot segments in path 500 - Converting percent-encoded triplets to uppercase 501 - Adding slash when path is empty 502 - Adding slash to path when path represents a directory 503 - Decoding percent encoded triplets for unreserved characters 504 A-Z a-z 0-9 - . _ ~ 505 506 Params: 507 isDirectory = Path of the URL represents a directory, if one is 508 not already present, a trailing slash will be appended when 509 `true` 510 */ 511 void normalize(bool isDirectory = false) 512 { 513 import std.uni : toLower; 514 515 // Lowercase host and schema 516 this.m_schema = this.m_schema.toLower(); 517 this.m_host = this.m_host.toLower(); 518 519 // Remove default port 520 if (this.m_port == URL.defaultPort(this.m_schema)) 521 this.m_port = 0; 522 523 // Normalize percent encoding, decode unreserved or uppercase hex 524 this.m_queryString = normalize_percent_encoding(this.m_queryString); 525 this.m_anchor = normalize_percent_encoding(this.m_anchor); 526 527 // Normalize path (first remove dot segments then normalize path segments) 528 this.m_path = InetPath(this.m_path.normalized.bySegment.map!( 529 n => InetPath.Segment.fromTrustedEncodedString(normalize_percent_encoding(n.encodedName)) 530 ).array); 531 532 // Add trailing slash to empty path 533 if (this.m_path.empty || isDirectory) 534 this.m_path.endsWithSlash = true; 535 } 536 537 /** Returns the normalized form of the URL. 538 539 See `normalize` for a full description. 540 */ 541 URL normalized() 542 const { 543 URL ret = this; 544 ret.normalize(); 545 return ret; 546 } 547 548 bool startsWith(const URL rhs) 549 const nothrow { 550 if( m_schema != rhs.m_schema ) return false; 551 if( m_host != rhs.m_host ) return false; 552 // FIXME: also consider user, port, querystring, anchor etc 553 return this.path.bySegment.startsWith(rhs.path.bySegment); 554 } 555 556 URL opBinary(string OP, Path)(Path rhs) const if (OP == "~" && isAnyPath!Path) { 557 return URL(m_schema, m_host, m_port, (!this.path.empty ? this.path : InetPath(`/`)) ~ rhs); 558 } 559 URL opBinary(string OP, Path)(Path.Segment rhs) const if (OP == "~" && isAnyPath!Path) { 560 return URL(m_schema, m_host, m_port, (!this.path.empty ? this.path : InetPath(`/`)) ~ rhs); 561 } 562 void opOpAssign(string OP, Path)(Path rhs) if (OP == "~" && isAnyPath!Path) { 563 this.path = (!this.path.empty ? this.path : InetPath(`/`)) ~ rhs; 564 } 565 void opOpAssign(string OP, Path)(Path.Segment rhs) if (OP == "~" && isAnyPath!Path) { 566 this.path = (!this.path.empty ? this.path : InetPath(`/`)) ~ rhs; 567 } 568 569 /// Tests two URLs for equality using '=='. 570 bool opEquals(ref const URL rhs) 571 const nothrow { 572 if (m_schema != rhs.m_schema) return false; 573 if (m_host != rhs.m_host) return false; 574 if (m_path != rhs.m_path) return false; 575 if (m_port != rhs.m_port) return false; 576 return true; 577 } 578 /// ditto 579 bool opEquals(const URL other) const nothrow { return opEquals(other); } 580 581 int opCmp(ref const URL rhs) const nothrow { 582 if (m_schema != rhs.m_schema) return m_schema.cmp(rhs.m_schema); 583 if (m_host != rhs.m_host) return m_host.cmp(rhs.m_host); 584 if (m_path != rhs.m_path) return cmp(m_path.toString, rhs.m_path.toString); 585 return true; 586 } 587 } 588 589 bool isValidSchema(string schema) 590 @safe pure nothrow { 591 if (schema.length < 1) return false; 592 593 foreach (char ch; schema) { 594 switch (ch) { 595 default: return false; 596 case 'a': .. case 'z': break; 597 case 'A': .. case 'Z': break; 598 case '0': .. case '9': break; 599 case '+', '.', '-': break; 600 } 601 } 602 603 return true; 604 } 605 606 unittest { 607 assert(isValidSchema("http+ssh")); 608 assert(isValidSchema("http")); 609 assert(!isValidSchema("http/ssh")); 610 assert(isValidSchema("HTtp")); 611 } 612 613 614 bool isValidHostName(string name) 615 @safe pure nothrow { 616 import std.algorithm.iteration : splitter; 617 import std.string : representation; 618 619 // According to RFC 1034 620 if (name.length < 1) return false; 621 if (name.length > 255) return false; 622 foreach (seg; name.representation.splitter('.')) { 623 if (seg.length < 1) return false; 624 if (seg.length > 63) return false; 625 if (seg[0] == '-') return false; 626 627 foreach (char ch; seg) { 628 switch (ch) { 629 default: return false; 630 case 'a': .. case 'z': break; 631 case 'A': .. case 'Z': break; 632 case '0': .. case '9': break; 633 case '-': break; 634 } 635 } 636 } 637 return true; 638 } 639 640 unittest { 641 assert(isValidHostName("foo")); 642 assert(isValidHostName("foo-")); 643 assert(isValidHostName("foo.bar")); 644 assert(isValidHostName("foo.bar-baz")); 645 assert(isValidHostName("foo1")); 646 assert(!isValidHostName("-foo")); 647 } 648 649 650 private enum isAnyPath(P) = is(P == InetPath) || is(P == PosixPath) || is(P == WindowsPath); 651 652 private shared immutable(SchemaDefaultPortMap)* map_commonInternetSchemas; 653 654 shared static this() { 655 auto initial_schemas = new SchemaDefaultPortMap; 656 initial_schemas.add("file", 0); 657 initial_schemas.add("tcp", 0); 658 initial_schemas.add("ftp", 21); 659 initial_schemas.add("sftp", 22); 660 initial_schemas.add("http", 80); 661 initial_schemas.add("https", 443); 662 initial_schemas.add("http+unix", 80); 663 initial_schemas.add("https+unix", 443); 664 initial_schemas.add("spdy", 443); 665 initial_schemas.add("ws", 80); 666 initial_schemas.add("wss", 443); 667 initial_schemas.add("redis", 6379); 668 initial_schemas.add("rtsp", 554); 669 initial_schemas.add("rtsps", 322); 670 671 map_commonInternetSchemas = cast(immutable)initial_schemas; 672 } 673 674 deprecated("Use the overload that accepts a `ushort port` as second argument") 675 void registerCommonInternetSchema(string schema) 676 { 677 registerCommonInternetSchema(schema, 0); 678 } 679 680 /** Adds the name of a schema to be treated as double-slash style. 681 682 Params: 683 schema = Name of the schema 684 port = Default port for the schema 685 686 See_also: `isCommonInternetSchema`, RFC 1738 Section 3.1 687 */ 688 void registerCommonInternetSchema(string schema, ushort port) 689 @trusted nothrow { 690 import core.atomic : atomicLoad, cas; 691 import std.uni : toLower; 692 693 string lowerschema; 694 try { 695 lowerschema = schema.toLower(); 696 } catch (Exception e) { 697 assert(false, e.msg); 698 } 699 700 assert(lowerschema.length < 128, "Only schemas with less than 128 characters are supported"); 701 702 while (true) { 703 auto olds = atomicLoad(map_commonInternetSchemas); 704 auto news = olds ? olds.dup : new SchemaDefaultPortMap; 705 news.add(lowerschema, port); 706 static if (__VERSION__ < 2094) { 707 // work around bogus shared violation error on earlier versions of Druntime 708 if (cas(cast(shared(SchemaDefaultPortMap*)*)&map_commonInternetSchemas, cast(shared(SchemaDefaultPortMap)*)olds, cast(shared(SchemaDefaultPortMap)*)news)) 709 break; 710 } else { 711 if (cas(&map_commonInternetSchemas, olds, cast(immutable)news)) 712 break; 713 } 714 } 715 } 716 717 718 /** Determines whether an URL schema is double-slash based. 719 720 Double slash based schemas are of the form `schema://[host]/<path>` 721 and are parsed differently compared to generic schemas, which are simply 722 parsed as `schema:<path>`. 723 724 Built-in recognized double-slash schemas: ftp, http, https, 725 http+unix, https+unix, spdy, sftp, ws, wss, file, redis, tcp, 726 rtsp, rtsps 727 728 See_also: `registerCommonInternetSchema`, RFC 1738 Section 3.1 729 */ 730 bool isCommonInternetSchema(string schema) 731 @safe nothrow @nogc { 732 import core.atomic : atomicLoad; 733 char[128] buffer; 734 735 if (schema.length >= 128) return false; 736 737 foreach (ix, char c; schema) 738 { 739 if (!isASCII(c)) return false; 740 buffer[ix] = toLower(c); 741 } 742 743 scope lowerschema = buffer[0 .. schema.length]; 744 745 return () @trusted { 746 auto set = atomicLoad(map_commonInternetSchemas); 747 return set ? set.contains(cast(string) lowerschema) : false; 748 } (); 749 } 750 751 unittest { 752 assert(isCommonInternetSchema("http")); 753 assert(isCommonInternetSchema("HTtP")); 754 assert(URL.defaultPort("http") == 80); 755 assert(!isCommonInternetSchema("foobar")); 756 registerCommonInternetSchema("fooBar", 2522); 757 assert(isCommonInternetSchema("foobar")); 758 assert(isCommonInternetSchema("fOObAR")); 759 assert(URL.defaultPort("foobar") == 2522); 760 assert(URL.defaultPort("fOObar") == 2522); 761 762 assert(URL.defaultPort("unregistered") == 0); 763 } 764 765 766 private struct SchemaDefaultPortMap { 767 ushort[string] m_data; 768 769 void add(string str, ushort port) @safe nothrow { m_data[str] = port; } 770 bool contains(string str) const @safe nothrow @nogc { return !!(str in m_data); } 771 ushort get(string str) const @safe nothrow { return m_data[str]; } 772 SchemaDefaultPortMap* dup() const @safe nothrow { 773 auto ret = new SchemaDefaultPortMap; 774 foreach (s; m_data.byKeyValue) ret.add(s.key, s.value); 775 return ret; 776 } 777 } 778 779 // Puny encoding 780 private { 781 /** Bootstring parameters for Punycode 782 These parameters are designed for Unicode 783 784 See also: RFC 3492 Section 5 785 */ 786 enum uint base = 36; 787 enum uint tmin = 1; 788 enum uint tmax = 26; 789 enum uint skew = 38; 790 enum uint damp = 700; 791 enum uint initial_bias = 72; 792 enum uint initial_n = 128; 793 794 /* Bias adaptation 795 796 See also: RFC 3492 Section 6.1 797 */ 798 uint punyAdapt (uint pdelta, int numpoints, bool firsttime) 799 @safe @nogc nothrow pure { 800 uint delta = firsttime ? pdelta / damp : pdelta / 2; 801 delta += delta / numpoints; 802 uint k = 0; 803 804 while (delta > ((base - tmin) * tmax) / 2) 805 { 806 delta /= (base - tmin); 807 k += base; 808 } 809 810 return k + (((base - tmin + 1) * delta) / (delta + skew)); 811 } 812 813 /* Converts puny digit-codes to code point 814 815 See also: RFC 3492 Section 5 816 */ 817 dchar punyDigitToCP (uint digit) 818 @safe @nogc nothrow pure { 819 return cast(dchar) (digit + 22 + 75 * (digit < 26)); 820 } 821 822 /* Encodes `input` with puny encoding 823 824 If input is all characters below `initial_n` 825 input is returned as is. 826 827 See also: RFC 3492 Section 6.3 828 */ 829 string punyEncode (in string input) 830 @safe { 831 uint n = initial_n; 832 uint delta = 0; 833 uint bias = initial_bias; 834 uint h; 835 uint b; 836 dchar m = dchar.max; // minchar 837 bool delta_overflow; 838 839 uint input_len = 0; 840 auto output = appender!string(); 841 842 output.put("xn--"); 843 844 foreach (dchar cp; input) 845 { 846 if (cp <= initial_n) 847 { 848 output.put(cast(char) cp); 849 h += 1; 850 } 851 // Count length of input as code points, `input.length` counts bytes 852 input_len += 1; 853 } 854 855 b = h; 856 if (b == input_len) 857 return input; // No need to puny encode 858 859 if (b > 0) 860 output.put('-'); 861 862 while (h < input_len) 863 { 864 m = dchar.max; 865 foreach (dchar cp; input) 866 { 867 if (n <= cp && cp < m) 868 m = cp; 869 } 870 871 assert(m != dchar.max, "Punyencoding failed, cannot find code point"); 872 873 delta = addu(delta, ((m - n) * (h + 1)), delta_overflow); 874 assert(!delta_overflow, "Punyencoding failed, delta overflow"); 875 876 n = m; 877 878 foreach (dchar cp; input) 879 { 880 if (cp < n) 881 delta += 1; 882 883 if (cp == n) 884 { 885 uint q = delta; 886 uint k = base; 887 888 while (true) 889 { 890 uint t; 891 if (k <= bias /* + tmin */) 892 t = tmin; 893 else if (k >= bias + tmax) 894 t = tmax; 895 else 896 t = k - bias; 897 898 if (q < t) break; 899 900 output.put(punyDigitToCP(t + ((q - t) % (base - t)))); 901 q = (q - t) / (base - t); 902 k += base; 903 } 904 output.put(punyDigitToCP(q)); 905 bias = punyAdapt(delta, h + 1, h == b); 906 delta = 0; 907 h += 1; 908 } 909 } 910 delta += 1; 911 n += 1; 912 } 913 914 return output.data; 915 } 916 } 917 918 unittest { // IPv6 919 auto urlstr = "http://[2003:46:1a7b:6c01:64b:80ff:fe80:8003]:8091/abc"; 920 auto url = URL.parse(urlstr); 921 assert(url.schema == "http", url.schema); 922 assert(url.host == "2003:46:1a7b:6c01:64b:80ff:fe80:8003", url.host); 923 assert(url.port == 8091); 924 assert(url.path == InetPath("/abc"), url.path.toString()); 925 assert(url.toString == urlstr); 926 927 url.host = "abcd:46:1a7b:6c01:64b:80ff:fe80:8abc"; 928 urlstr = "http://[abcd:46:1a7b:6c01:64b:80ff:fe80:8abc]:8091/abc"; 929 assert(url.toString == urlstr); 930 } 931 932 933 unittest { 934 auto urlstr = "https://www.example.net/index.html"; 935 auto url = URL.parse(urlstr); 936 assert(url.schema == "https", url.schema); 937 assert(url.host == "www.example.net", url.host); 938 assert(url.path == InetPath("/index.html"), url.path.toString()); 939 assert(url.port == 443); 940 assert(url.toString == urlstr); 941 942 urlstr = "http://jo.doe:password@sub.www.example.net:4711/sub2/index.html?query#anchor"; 943 url = URL.parse(urlstr); 944 assert(url.schema == "http", url.schema); 945 assert(url.username == "jo.doe", url.username); 946 assert(url.password == "password", url.password); 947 assert(url.port == 4711, to!string(url.port)); 948 assert(url.host == "sub.www.example.net", url.host); 949 assert(url.path.toString() == "/sub2/index.html", url.path.toString()); 950 assert(url.queryString == "query", url.queryString); 951 assert(url.anchor == "anchor", url.anchor); 952 assert(url.toString == urlstr); 953 } 954 955 unittest { // issue #1044 956 URL url = URL.parse("http://example.com/p?query#anchor"); 957 assert(url.schema == "http"); 958 assert(url.host == "example.com"); 959 assert(url.port == 80); 960 assert(url.queryString == "query"); 961 assert(url.anchor == "anchor"); 962 assert(url.pathString == "/p"); 963 url.localURI = "/q"; 964 assert(url.schema == "http"); 965 assert(url.host == "example.com"); 966 assert(url.queryString == ""); 967 assert(url.anchor == ""); 968 assert(url.pathString == "/q"); 969 url.localURI = "/q?query"; 970 assert(url.schema == "http"); 971 assert(url.host == "example.com"); 972 assert(url.queryString == "query"); 973 assert(url.anchor == ""); 974 assert(url.pathString == "/q"); 975 url.localURI = "/q#anchor"; 976 assert(url.schema == "http"); 977 assert(url.host == "example.com"); 978 assert(url.queryString == ""); 979 assert(url.anchor == "anchor"); 980 assert(url.pathString == "/q"); 981 } 982 983 //websocket unittest 984 unittest { 985 URL url = URL("ws://127.0.0.1:8080/echo"); 986 assert(url.host == "127.0.0.1"); 987 assert(url.port == 8080); 988 assert(url.localURI == "/echo"); 989 } 990 991 //rtsp unittest 992 unittest { 993 URL url = URL("rtsp://127.0.0.1:554/echo"); 994 assert(url.host == "127.0.0.1"); 995 assert(url.port == 554); 996 assert(url.localURI == "/echo"); 997 } 998 999 unittest { 1000 auto p = PosixPath("/foo bar/boo oom/"); 1001 URL url = URL("http", "example.com", 0, p); // constructor test 1002 assert(url.path == cast(InetPath)p); 1003 url.path = p; 1004 assert(url.path == cast(InetPath)p); // path assignement test 1005 assert(url.pathString == "/foo%20bar/boo%20oom/"); 1006 assert(url.toString() == "http://example.com/foo%20bar/boo%20oom/"); 1007 url.pathString = "/foo%20bar/boo%2foom/"; 1008 assert(url.pathString == "/foo%20bar/boo%2foom/"); 1009 assert(url.toString() == "http://example.com/foo%20bar/boo%2foom/"); 1010 } 1011 1012 unittest { 1013 URL url = URL("http://user:password@example.com"); 1014 assert(url.toString() == "http://user:password@example.com"); 1015 1016 url = URL("http://user@example.com"); 1017 assert(url.toString() == "http://user@example.com"); 1018 } 1019 1020 unittest { 1021 auto url = URL("http://example.com/some%2bpath"); 1022 assert((cast(PosixPath)url.path).toString() == "/some+path", url.path.toString()); 1023 } 1024 1025 unittest { 1026 assert(URL("file:///test").pathString == "/test"); 1027 assert(URL("file:///test").port == 0); 1028 assert(URL("file:///test").path.toString() == "/test"); 1029 assert(URL("file://test").host == "test"); 1030 assert(URL("file://test").pathString() == ""); 1031 assert(URL("file://./test").host == "."); 1032 assert(URL("file://./test").pathString == "/test"); 1033 assert(URL("file://./test").path.toString() == "/test"); 1034 } 1035 1036 unittest { // issue #1318 1037 try { 1038 URL("http://something/inval%id"); 1039 assert(false, "Expected to throw an exception."); 1040 } catch (Exception e) {} 1041 } 1042 1043 unittest { 1044 assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "http+unix"); 1045 assert(URL("https+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "https+unix"); 1046 assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").host == "%2Fvar%2Frun%2Fdocker.sock"); 1047 assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").pathString == ""); 1048 assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json").pathString == "/container/json"); 1049 auto url = URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json"); 1050 assert(URL(url.toString()) == url); 1051 } 1052 1053 unittest { 1054 import dub.internal.vibecompat.data.serialization; 1055 static assert(isStringSerializable!URL); 1056 } 1057 1058 unittest { // issue #1732 1059 auto url = URL("tcp://0.0.0.0:1234"); 1060 url.port = 4321; 1061 assert(url.toString == "tcp://0.0.0.0:4321", url.toString); 1062 } 1063 1064 unittest { // host name role in file:// URLs 1065 auto url = URL.parse("file:///foo/bar"); 1066 assert(url.host == ""); 1067 assert(url.path == InetPath("/foo/bar")); 1068 assert(url.toString() == "file:///foo/bar"); 1069 1070 url = URL.parse("file://foo/bar/baz"); 1071 assert(url.host == "foo"); 1072 assert(url.path == InetPath("/bar/baz")); 1073 assert(url.toString() == "file://foo/bar/baz"); 1074 } 1075 1076 unittest { // native path <-> URL conversion 1077 import std.exception : assertThrown; 1078 1079 auto url = URL(NativePath("/foo/bar")); 1080 assert(url.schema == "file"); 1081 assert(url.host == ""); 1082 assert(url.path == InetPath("/foo/bar")); 1083 assert(url.toNativePath == NativePath("/foo/bar")); 1084 1085 assertThrown(URL("http://example.org/").toNativePath); 1086 assertThrown(URL(NativePath("foo/bar"))); 1087 } 1088 1089 unittest { // URL Normalization 1090 auto url = URL.parse("http://example.com/foo%2a"); 1091 assert(url.normalized.toString() == "http://example.com/foo%2A"); 1092 1093 url = URL.parse("HTTP://User@Example.COM/Foo"); 1094 assert(url.normalized.toString() == "http://User@example.com/Foo"); 1095 1096 url = URL.parse("http://example.com/%7Efoo"); 1097 assert(url.normalized.toString() == "http://example.com/~foo"); 1098 1099 url = URL.parse("http://example.com/foo/./bar/baz/../qux"); 1100 assert(url.normalized.toString() == "http://example.com/foo/bar/qux"); 1101 1102 url = URL.parse("http://example.com"); 1103 assert(url.normalized.toString() == "http://example.com/"); 1104 1105 url = URL.parse("http://example.com:80/"); 1106 assert(url.normalized.toString() == "http://example.com/"); 1107 1108 url = URL.parse("hTTPs://examPLe.COM:443/my/path"); 1109 assert(url.normalized.toString() == "https://example.com/my/path"); 1110 1111 url = URL.parse("http://example.com/foo"); 1112 url.normalize(true); 1113 assert(url.toString() == "http://example.com/foo/"); 1114 } 1115 1116 version (Windows) unittest { // Windows drive letter paths 1117 auto url = URL(WindowsPath(`C:\foo`)); 1118 assert(url.schema == "file"); 1119 assert(url.host == ""); 1120 assert(url.path == InetPath("/C:/foo")); 1121 auto p = url.toNativePath; 1122 p.normalize(); 1123 assert(p == WindowsPath(`C:\foo`)); 1124 } 1125 1126 version (Windows) unittest { // UNC paths 1127 auto url = URL(WindowsPath(`\\server\share\path`)); 1128 assert(url.schema == "file"); 1129 assert(url.host == "server"); 1130 assert(url.path == InetPath("/share/path")); 1131 1132 auto p = url.toNativePath; 1133 p.normalize(); // convert slash to backslash if necessary 1134 assert(p == WindowsPath(`\\server\share\path`)); 1135 } 1136 1137 unittest { 1138 assert((URL.parse("http://example.com/foo") ~ InetPath("bar")).toString() 1139 == "http://example.com/foo/bar"); 1140 assert((URL.parse("http://example.com/foo") ~ InetPath.Segment("bar")).toString() 1141 == "http://example.com/foo/bar"); 1142 1143 assert((URL.parse("http://example.com") ~ InetPath("foo")).toString() 1144 == "http://example.com/foo"); 1145 assert((URL.parse("http://example.com") ~ InetPath.Segment("foo")).toString() 1146 == "http://example.com/foo"); 1147 1148 URL url = URL.parse("http://example.com/"); 1149 url ~= InetPath("foo"); 1150 url ~= InetPath.Segment("bar"); 1151 assert(url.toString() == "http://example.com/foo/bar"); 1152 } 1153 1154 unittest { 1155 assert(URL.parse("foo:/foo/bar").toString() == "foo:/foo/bar"); 1156 assert(URL.parse("foo:/foo/bar").path.toString() == "/foo/bar"); 1157 assert(URL.parse("foo:foo/bar").toString() == "foo:foo/bar"); 1158 }