1 /**
2 	URL parsing routines.
3 
4 	Copyright: © 2012-2017 Sönke Ludwig
5 	License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6 	Authors: Sönke Ludwig
7 */
8 module dub.internal.vibecompat.inet.url;
9 
10 import dub.internal.vibecompat.inet.path2;
11 import dub.internal.vibecompat.inet.urlencode;
12 
13 import std.array;
14 import std.algorithm;
15 import std.conv;
16 import std.exception;
17 import std.string;
18 import std.traits : isInstanceOf;
19 import std.ascii : isAlpha, isASCII, toLower;
20 import std.uri: decode, encode;
21 
22 import core.checkedint : addu;
23 
24 
25 /** Parses a user-provided URL with relaxed rules.
26 
27 	Unlike `URL.parse`, this allows the URL to use special characters as part of
28 	the host name and path, automatically employing puny code or percent-encoding
29 	to convert this to a valid URL.
30 
31 	Params:
32 		url = String representation of the URL
33 		default_schema = If `url` does not contain a schema name, the URL parser
34 			may choose to use this schema instead. A browser might use "http" or
35 			"https", for example.
36 */
37 URL parseUserURL(string url, string default_schema)
38 {
39 	if (default_schema.length && !url.startsWith("/") && !url.canFind("://"))
40 		url = default_schema ~ "://" ~ url;
41 
42 	return URL(url, false).normalized;
43 }
44 
45 unittest {
46 	// special characters in path
47 	auto url = parseUserURL("http://example.com/hello-🌍", "foo");
48 	assert(url.pathString == "/hello-%F0%9F%8C%8D");
49 	url = parseUserURL("http://example.com/안녕하세요-세계", "foo");
50 	assert(url.pathString == "/%EC%95%88%EB%85%95%ED%95%98%EC%84%B8%EC%9A%94-%EC%84%B8%EA%B3%84");
51 	// special characters in host name
52 	url = parseUserURL("http://hello-🌍.com/", "foo");
53 	assert(url.host == "xn--hello--8k34e.com");
54 	url = parseUserURL("http://hello-🌍.com:8080/", "foo");
55 	assert(url.host == "xn--hello--8k34e.com");
56 	url = parseUserURL("http://i-❤-이모티콘.io", "foo");
57 	assert(url.host == "xn--i---5r6aq903fubqabumj4g.io");
58 	url = parseUserURL("https://hello🌍.i-❤-이모티콘.com", "foo");
59 	assert(url.host == "xn--hello-oe93d.xn--i---5r6aq903fubqabumj4g.com");
60 	// default schema addition
61 	assert(parseUserURL("example.com/foo/bar", "sftp") == URL("sftp://example.com/foo/bar"));
62 	assert(parseUserURL("example.com:1234", "https") == URL("https://example.com:1234/"));
63 }
64 
65 
66 /**
67 	Represents a URL decomposed into its components.
68 */
69 struct URL {
70 @safe:
71 	private {
72 		string m_schema;
73 		InetPath m_path;
74 		string m_host;
75 		ushort m_port;
76 		string m_username;
77 		string m_password;
78 		string m_queryString;
79 		string m_anchor;
80 	}
81 
82 	/// Constructs a new URL object from its components.
83 	this(string schema, string host, ushort port, InetPath path) pure nothrow
84 	in {
85 		assert(isValidSchema(schema), "Invalid URL schema name: " ~ schema);
86 		assert(host.length == 0 || isValidHostName(host), "Invalid URL host name: " ~ host);
87 	}
88 	do {
89 		m_schema = schema;
90 		m_host = host;
91 		m_port = port;
92 		m_path = path;
93 	}
94 	/// ditto
95 	this(string schema, InetPath path) pure nothrow
96 	in { assert(isValidSchema(schema)); }
97 	do {
98 		this(schema, null, 0, path);
99 	}
100 	/// ditto
101 	this(string schema, string host, ushort port, PosixPath path) pure nothrow
102 	in {
103 		assert(isValidSchema(schema));
104 		assert(host.length == 0 || isValidHostName(host));
105 	}
106 	do {
107 		InetPath ip;
108 		try ip = cast(InetPath)path;
109 		catch (Exception e) assert(false, e.msg); // InetPath should be able to capture all paths
110 		this(schema, host, port, ip);
111 	}
112 	/// ditto
113 	this(string schema, PosixPath path) pure nothrow
114 	in { assert(isValidSchema(schema)); }
115 	do {
116 		this(schema, null, 0, path);
117 	}
118 	/// ditto
119 	this(string schema, string host, ushort port, WindowsPath path) pure nothrow
120 	in {
121 		assert(isValidSchema(schema));
122 		assert(host.length == 0 || isValidHostName(host));
123 	}
124 	do {
125 		InetPath ip;
126 		try ip = cast(InetPath)path;
127 		catch (Exception e) assert(false, e.msg); // InetPath should be able to capture all paths
128 		this(schema, host, port, ip);
129 	}
130 	/// ditto
131 	this(string schema, WindowsPath path) pure nothrow
132 	in { assert(isValidSchema(schema)); }
133 	do {
134 		this(schema, null, 0, path);
135 	}
136 
137 	/** Constructs a "file:" URL from a native file system path.
138 
139 		Note that the path must be absolute. On Windows, both, paths starting
140 		with a drive letter and UNC paths are supported.
141 	*/
142 	this(WindowsPath path) pure
143 	{
144 		import std.algorithm.iteration : map;
145 		import std.range : chain, only, repeat;
146 
147 		enforce(path.absolute, "Only absolute paths can be converted to a URL.");
148 
149 		// treat UNC paths properly
150 		if (path.startsWith(WindowsPath(`\\`))) {
151 			auto segs = path.bySegment;
152 			segs.popFront();
153 			segs.popFront();
154 			auto host = segs.front.name;
155 			segs.popFront();
156 
157 			InetPath ip;
158 			ip = InetPath(only(InetPath.Segment("", '/'))
159 				.chain(segs.map!(s => cast(InetPath.Segment)s)));
160 
161 			this("file", host, 0, ip);
162 		} else this("file", host, 0, cast(InetPath)path);
163 	}
164 	/// ditto
165 	this(PosixPath path) pure
166 	{
167 		enforce(path.absolute, "Only absolute paths can be converted to a URL.");
168 
169 		this("file", null, 0, cast(InetPath)path);
170 	}
171 
172 	/** Constructs a URL from its string representation.
173 
174 		TODO: additional validation required (e.g. valid host and user names and port)
175 	*/
176 	this(string url_string)
177 	{
178 		this(url_string, true);
179 	}
180 
181 	private this(string url_string, bool encoded)
182 	{
183 		auto str = url_string;
184 		enforce(str.length > 0, "Empty URL.");
185 		if( str[0] != '/' ){
186 			auto idx = str.indexOf(':');
187 			enforce(idx > 0, "No schema in URL:"~str);
188 			m_schema = str[0 .. idx];
189 			enforce(m_schema[0].isAlpha,
190 					"Schema must start with an alphabetical char, found: " ~
191 					m_schema[0]);
192 			str = str[idx+1 .. $];
193 			bool requires_host = false;
194 
195 			if (str.startsWith("//")) {
196 				// proto://server/path style
197 				requires_host = true;
198 				str = str[2 .. $];
199 			}
200 
201 			auto si = str.indexOf('/');
202 			if( si < 0 ) si = str.length;
203 			auto ai = str[0 .. si].indexOf('@');
204 			sizediff_t hs = 0;
205 			if( ai >= 0 ){
206 				hs = ai+1;
207 				auto ci = str[0 .. ai].indexOf(':');
208 				if( ci >= 0 ){
209 					m_username = str[0 .. ci];
210 					m_password = str[ci+1 .. ai];
211 				} else m_username = str[0 .. ai];
212 				enforce(m_username.length > 0, "Empty user name in URL.");
213 			}
214 
215 			m_host = str[hs .. si];
216 
217 			auto findPort ( string src )
218 			{
219 				auto pi = src.indexOf(':');
220 				if(pi > 0) {
221 					enforce(pi < src.length-1, "Empty port in URL.");
222 					m_port = to!ushort(src[pi+1..$]);
223 				}
224 				return pi;
225 			}
226 
227 
228 			auto ip6 = m_host.indexOf('[');
229 			if (ip6 == 0) { // [ must be first char
230 				auto pe = m_host.indexOf(']');
231 				if (pe > 0) {
232 					findPort(m_host[pe..$]);
233 					m_host = m_host[1 .. pe];
234 				}
235 			}
236 			else {
237 				auto pi = findPort(m_host);
238 				if(pi > 0) {
239 					m_host = m_host[0 .. pi];
240 				}
241 				if (!encoded)
242 					m_host = m_host.splitter('.').map!(punyEncode).join('.');
243 			}
244 
245 			enforce(!requires_host || m_schema == "file" || m_host.length > 0,
246 					"Empty server name in URL.");
247 			str = str[si .. $];
248 		}
249 
250 		this.localURI = (encoded) ? str : str.encode;
251 	}
252 	/// ditto
253 	static URL parse(string url_string)
254 	{
255 		return URL(url_string);
256 	}
257 	/// ditto
258 	static URL fromString(string url_string)
259 	{
260 		return URL(url_string);
261 	}
262 
263 	/// The schema/protocol part of the URL
264 	@property string schema() const nothrow { return m_schema; }
265 	/// ditto
266 	@property void schema(string v) { m_schema = v; }
267 
268 	/// The url encoded path part of the URL
269 	@property string pathString() const nothrow { return m_path.toString; }
270 
271 	/// Set the path part of the URL. It should be properly encoded.
272 	@property void pathString(string s)
273 	{
274 		m_path = InetPath(s);
275 	}
276 
277 	/// The path part of the URL
278 	@property InetPath path() const nothrow { return m_path; }
279 	/// ditto
280 	@property void path(InetPath p)
281 	nothrow {
282 		m_path = p;
283 	}
284 	/// ditto
285 	@property void path(Path)(Path p)
286 		if (isInstanceOf!(GenericPath, Path) && !is(Path == InetPath))
287 	{
288 		m_path = cast(InetPath)p;
289 	}
290 
291 	/// The host part of the URL (depends on the schema)
292 	@property string host() const pure nothrow { return m_host; }
293 	/// ditto
294 	@property void host(string v) { m_host = v; }
295 
296 	/// The port part of the URL (optional)
297 	@property ushort port() const nothrow { return m_port ? m_port : defaultPort(m_schema); }
298 	/// ditto
299 	@property port(ushort v) nothrow { m_port = v; }
300 
301 	/// Get the default port for the given schema or 0
302 	static ushort defaultPort(string schema)
303 	nothrow {
304 		import core.atomic : atomicLoad;
305 		import std.uni : toLower;
306 
307 		string lowerschema;
308 
309 		try
310 			lowerschema = schema.toLower();
311 		catch (Exception e)
312 			assert(false, e.msg);
313 
314 		if (auto set = atomicLoad(map_commonInternetSchemas))
315 			if (set.contains(lowerschema))
316 				return set.get(lowerschema);
317 
318 		return 0;
319 	}
320 	/// ditto
321 	ushort defaultPort()
322 	const nothrow {
323 		return defaultPort(m_schema);
324 	}
325 
326 	/// The user name part of the URL (optional)
327 	@property string username() const nothrow { return m_username; }
328 	/// ditto
329 	@property void username(string v) { m_username = v; }
330 
331 	/// The password part of the URL (optional)
332 	@property string password() const nothrow { return m_password; }
333 	/// ditto
334 	@property void password(string v) { m_password = v; }
335 
336 	/// The query string part of the URL (optional)
337 	@property string queryString() const nothrow { return m_queryString; }
338 	/// ditto
339 	@property void queryString(string v) { m_queryString = v; }
340 
341 	/// The anchor part of the URL (optional)
342 	@property string anchor() const nothrow { return m_anchor; }
343 	/// ditto
344 	@property void anchor(string v) nothrow { m_anchor = v; }
345 
346 	/// The path part plus query string and anchor
347 	@property string localURI()
348 	const nothrow {
349 		auto str = appender!string();
350 		str.put(m_path.toString);
351 		if( queryString.length ) {
352 			str.put("?");
353 			str.put(queryString);
354 		}
355 		if( anchor.length ) {
356 			str.put("#");
357 			str.put(anchor);
358 		}
359 		return str.data;
360 	}
361 	/// ditto
362 	@property void localURI(string str)
363 	{
364 		auto ai = str.indexOf('#');
365 		if( ai >= 0 ){
366 			m_anchor = str[ai+1 .. $];
367 			str = str[0 .. ai];
368 		} else m_anchor = null;
369 
370 		auto qi = str.indexOf('?');
371 		if( qi >= 0 ){
372 			m_queryString = str[qi+1 .. $];
373 			str = str[0 .. qi];
374 		} else m_queryString = null;
375 
376 		this.pathString = str;
377 	}
378 
379 	/// The URL to the parent path with query string and anchor stripped.
380 	@property URL parentURL()
381 	const {
382 		URL ret;
383 		ret.schema = schema;
384 		ret.host = host;
385 		ret.port = port;
386 		ret.username = username;
387 		ret.password = password;
388 		ret.path = path.parentPath;
389 		return ret;
390 	}
391 
392 	/// Converts this URL object to its string representation.
393 	string toString()
394 	const nothrow {
395 		auto dst = appender!string();
396 		try this.toString(dst);
397 		catch (Exception e) assert(false, e.msg);
398 		return dst.data;
399 	}
400 
401 	/// Ditto
402 	void toString(OutputRange) (ref OutputRange dst) const {
403 		import std.format;
404 		dst.put(schema);
405 		dst.put(":");
406 		if (isCommonInternetSchema(schema))
407 			dst.put("//");
408 		if (m_username.length || m_password.length) {
409 			dst.put(username);
410 			if (m_password.length)
411 			{
412 				dst.put(':');
413 				dst.put(password);
414 			}
415 			dst.put('@');
416 		}
417 
418 		import std.algorithm : canFind;
419 		auto ipv6 = host.canFind(":");
420 
421 		if ( ipv6 ) dst.put('[');
422 		dst.put(host);
423 		if ( ipv6 ) dst.put(']');
424 
425 		if (m_port > 0)
426 			formattedWrite(dst, ":%d", m_port);
427 
428 		dst.put(localURI);
429 	}
430 
431 	/** Converts a "file" URL back to a native file system path.
432 	*/
433 	NativePath toNativePath()
434 	const {
435 		import std.algorithm.iteration : map;
436 		import std.range : dropOne;
437 
438 		enforce(this.schema == "file", "Only file:// URLs can be converted to a native path.");
439 
440 		version (Windows) {
441 			if (this.host.length) {
442 				auto p = NativePath(this.path
443 						.bySegment
444 						.dropOne
445 						.map!(s => cast(WindowsPath.Segment)s)
446 					);
447 				return NativePath.fromTrustedString(`\\`~this.host) ~ p;
448 			}
449 		}
450 
451 		return cast(NativePath)this.path;
452 	}
453 
454 	/// Decode percent encoded triplets for unreserved or convert to uppercase
455 	private string normalize_percent_encoding(scope const(char)[] input)
456 	{
457 		auto normalized = appender!string;
458 		normalized.reserve(input.length);
459 
460 		for (size_t i = 0; i < input.length; i++)
461 		{
462 			const char c = input[i];
463 			if (c == '%')
464 			{
465 				if (input.length < i + 3)
466 					assert(false, "Invalid percent encoding");
467 
468 				char conv = cast(char) input[i + 1 .. i + 3].to!ubyte(16);
469 				switch (conv)
470 				{
471 					case 'A': .. case 'Z':
472 					case 'a': .. case 'z':
473 					case '0': .. case '9':
474 					case '-': case '.': case '_': case '~':
475 						normalized ~= conv; // Decode unreserved
476 						break;
477 					default:
478 						normalized ~= input[i .. i + 3].toUpper(); // Uppercase HEX
479 						break;
480 				}
481 
482 				i += 2;
483 			}
484 			else
485 				normalized ~= c;
486 		}
487 
488 		return normalized.data;
489 	}
490 
491 	/**
492 	  * Normalize the content of this `URL` in place
493 	  *
494 	  * Normalization can be used to create a more consistent and human-friendly
495 	  * string representation of the `URL`.
496 	  * The list of transformations applied in the process of normalization is as follows:
497 			- Converting schema and host to lowercase
498 			- Removing port if it is the default port for schema
499 			- Removing dot segments in path
500 			- Converting percent-encoded triplets to uppercase
501 			- Adding slash when path is empty
502 			- Adding slash to path when path represents a directory
503 			- Decoding percent encoded triplets for unreserved characters
504 				A-Z a-z 0-9 - . _ ~
505 
506 		Params:
507 			isDirectory = Path of the URL represents a directory, if one is
508 				not already present, a trailing slash will be appended when
509 				`true`
510 	*/
511 	void normalize(bool isDirectory = false)
512 	{
513 		import std.uni : toLower;
514 
515 		// Lowercase host and schema
516 		this.m_schema = this.m_schema.toLower();
517 		this.m_host = this.m_host.toLower();
518 
519 		// Remove default port
520 		if (this.m_port == URL.defaultPort(this.m_schema))
521 			this.m_port = 0;
522 
523 		// Normalize percent encoding, decode unreserved or uppercase hex
524 		this.m_queryString = normalize_percent_encoding(this.m_queryString);
525 		this.m_anchor = normalize_percent_encoding(this.m_anchor);
526 
527 		// Normalize path (first remove dot segments then normalize path segments)
528 		this.m_path = InetPath(this.m_path.normalized.bySegment.map!(
529 				n => InetPath.Segment.fromTrustedEncodedString(normalize_percent_encoding(n.encodedName))
530 			).array);
531 
532 		// Add trailing slash to empty path
533 		if (this.m_path.empty || isDirectory)
534 			this.m_path.endsWithSlash = true;
535 	}
536 
537 	/** Returns the normalized form of the URL.
538 
539 		See `normalize` for a full description.
540 	*/
541 	URL normalized()
542 	const {
543 		URL ret = this;
544 		ret.normalize();
545 		return ret;
546 	}
547 
548 	bool startsWith(const URL rhs)
549 	const nothrow {
550 		if( m_schema != rhs.m_schema ) return false;
551 		if( m_host != rhs.m_host ) return false;
552 		// FIXME: also consider user, port, querystring, anchor etc
553 		return this.path.bySegment.startsWith(rhs.path.bySegment);
554 	}
555 
556 	URL opBinary(string OP, Path)(Path rhs) const if (OP == "~" && isAnyPath!Path) {
557 		return URL(m_schema, m_host, m_port, (!this.path.empty ? this.path : InetPath(`/`)) ~ rhs);
558 	}
559 	URL opBinary(string OP, Path)(Path.Segment rhs) const if (OP == "~" && isAnyPath!Path) {
560 		return URL(m_schema, m_host, m_port, (!this.path.empty ? this.path : InetPath(`/`)) ~ rhs);
561 	}
562 	void opOpAssign(string OP, Path)(Path rhs) if (OP == "~" && isAnyPath!Path) {
563 		this.path = (!this.path.empty ? this.path : InetPath(`/`)) ~ rhs;
564 	}
565 	void opOpAssign(string OP, Path)(Path.Segment rhs) if (OP == "~" && isAnyPath!Path) {
566 		this.path = (!this.path.empty ? this.path : InetPath(`/`)) ~ rhs;
567 	}
568 
569 	/// Tests two URLs for equality using '=='.
570 	bool opEquals(ref const URL rhs)
571 	const nothrow {
572 		if (m_schema != rhs.m_schema) return false;
573 		if (m_host != rhs.m_host) return false;
574 		if (m_path != rhs.m_path) return false;
575 		if (m_port != rhs.m_port) return false;
576 		return true;
577 	}
578 	/// ditto
579 	bool opEquals(const URL other) const nothrow { return opEquals(other); }
580 
581 	int opCmp(ref const URL rhs) const nothrow {
582 		if (m_schema != rhs.m_schema) return m_schema.cmp(rhs.m_schema);
583 		if (m_host != rhs.m_host) return m_host.cmp(rhs.m_host);
584 		if (m_path != rhs.m_path) return cmp(m_path.toString, rhs.m_path.toString);
585 		return true;
586 	}
587 }
588 
589 bool isValidSchema(string schema)
590 @safe pure nothrow {
591 	if (schema.length < 1) return false;
592 
593 	foreach (char ch; schema) {
594 		switch (ch) {
595 			default: return false;
596 			case 'a': .. case 'z': break;
597 			case 'A': .. case 'Z': break;
598 			case '0': .. case '9': break;
599 			case '+', '.', '-': break;
600 		}
601 	}
602 
603 	return true;
604 }
605 
606 unittest {
607 	assert(isValidSchema("http+ssh"));
608 	assert(isValidSchema("http"));
609 	assert(!isValidSchema("http/ssh"));
610 	assert(isValidSchema("HTtp"));
611 }
612 
613 
614 bool isValidHostName(string name)
615 @safe pure nothrow {
616 	import std.algorithm.iteration : splitter;
617 	import std.string : representation;
618 
619 	// According to RFC 1034
620 	if (name.length < 1) return false;
621 	if (name.length > 255) return false;
622 	foreach (seg; name.representation.splitter('.')) {
623 		if (seg.length < 1) return false;
624 		if (seg.length > 63) return false;
625 		if (seg[0] == '-') return false;
626 
627 		foreach (char ch; seg) {
628 			switch (ch) {
629 				default: return false;
630 				case 'a': .. case 'z': break;
631 				case 'A': .. case 'Z': break;
632 				case '0': .. case '9': break;
633 				case '-': break;
634 			}
635 		}
636 	}
637 	return true;
638 }
639 
640 unittest {
641 	assert(isValidHostName("foo"));
642 	assert(isValidHostName("foo-"));
643 	assert(isValidHostName("foo.bar"));
644 	assert(isValidHostName("foo.bar-baz"));
645 	assert(isValidHostName("foo1"));
646 	assert(!isValidHostName("-foo"));
647 }
648 
649 
650 private enum isAnyPath(P) = is(P == InetPath) || is(P == PosixPath) || is(P == WindowsPath);
651 
652 private shared immutable(SchemaDefaultPortMap)* map_commonInternetSchemas;
653 
654 shared static this() {
655 	auto initial_schemas = new SchemaDefaultPortMap;
656 	initial_schemas.add("file", 0);
657 	initial_schemas.add("tcp", 0);
658 	initial_schemas.add("ftp", 21);
659 	initial_schemas.add("sftp", 22);
660 	initial_schemas.add("http", 80);
661 	initial_schemas.add("https", 443);
662 	initial_schemas.add("http+unix", 80);
663 	initial_schemas.add("https+unix", 443);
664 	initial_schemas.add("spdy", 443);
665 	initial_schemas.add("ws", 80);
666 	initial_schemas.add("wss", 443);
667 	initial_schemas.add("redis", 6379);
668 	initial_schemas.add("rtsp", 554);
669 	initial_schemas.add("rtsps", 322);
670 
671 	map_commonInternetSchemas = cast(immutable)initial_schemas;
672 }
673 
674 deprecated("Use the overload that accepts a `ushort port` as second argument")
675 void registerCommonInternetSchema(string schema)
676 {
677     registerCommonInternetSchema(schema, 0);
678 }
679 
680 /** Adds the name of a schema to be treated as double-slash style.
681 
682 	Params:
683 		schema = Name of the schema
684 		port = Default port for the schema
685 
686 	See_also: `isCommonInternetSchema`, RFC 1738 Section 3.1
687 */
688 void registerCommonInternetSchema(string schema, ushort port)
689 @trusted nothrow {
690 	import core.atomic : atomicLoad, cas;
691 	import std.uni : toLower;
692 
693 	string lowerschema;
694 	try {
695 		lowerschema = schema.toLower();
696 	} catch (Exception e) {
697 		assert(false, e.msg);
698 	}
699 
700 	assert(lowerschema.length < 128, "Only schemas with less than 128 characters are supported");
701 
702 	while (true) {
703 		auto olds = atomicLoad(map_commonInternetSchemas);
704 		auto news = olds ? olds.dup : new SchemaDefaultPortMap;
705 		news.add(lowerschema, port);
706 		static if (__VERSION__ < 2094) {
707 			// work around bogus shared violation error on earlier versions of Druntime
708 			if (cas(cast(shared(SchemaDefaultPortMap*)*)&map_commonInternetSchemas, cast(shared(SchemaDefaultPortMap)*)olds, cast(shared(SchemaDefaultPortMap)*)news))
709 				break;
710 		} else {
711 			if (cas(&map_commonInternetSchemas, olds, cast(immutable)news))
712 				break;
713 		}
714 	}
715 }
716 
717 
718 /** Determines whether an URL schema is double-slash based.
719 
720 	Double slash based schemas are of the form `schema://[host]/<path>`
721 	and are parsed differently compared to generic schemas, which are simply
722 	parsed as `schema:<path>`.
723 
724 	Built-in recognized double-slash schemas: ftp, http, https,
725 	http+unix, https+unix, spdy, sftp, ws, wss, file, redis, tcp,
726 	rtsp, rtsps
727 
728 	See_also: `registerCommonInternetSchema`, RFC 1738 Section 3.1
729 */
730 bool isCommonInternetSchema(string schema)
731 @safe nothrow @nogc {
732 	import core.atomic : atomicLoad;
733 	char[128] buffer;
734 
735 	if (schema.length >= 128) return false;
736 
737 	foreach (ix, char c; schema)
738 	{
739 		if (!isASCII(c)) return false;
740 		buffer[ix] = toLower(c);
741 	}
742 
743 	scope lowerschema = buffer[0 .. schema.length];
744 
745 	return () @trusted {
746 		auto set = atomicLoad(map_commonInternetSchemas);
747 		return set ? set.contains(cast(string) lowerschema) : false;
748 	} ();
749 }
750 
751 unittest {
752 	assert(isCommonInternetSchema("http"));
753 	assert(isCommonInternetSchema("HTtP"));
754 	assert(URL.defaultPort("http") == 80);
755 	assert(!isCommonInternetSchema("foobar"));
756 	registerCommonInternetSchema("fooBar", 2522);
757 	assert(isCommonInternetSchema("foobar"));
758 	assert(isCommonInternetSchema("fOObAR"));
759 	assert(URL.defaultPort("foobar") == 2522);
760 	assert(URL.defaultPort("fOObar") == 2522);
761 
762 	assert(URL.defaultPort("unregistered") == 0);
763 }
764 
765 
766 private struct SchemaDefaultPortMap {
767 	ushort[string] m_data;
768 
769 	void add(string str, ushort port) @safe nothrow { m_data[str] = port; }
770 	bool contains(string str) const @safe nothrow @nogc { return !!(str in m_data); }
771 	ushort get(string str) const @safe nothrow { return m_data[str]; }
772 	SchemaDefaultPortMap* dup() const @safe nothrow {
773 		auto ret = new SchemaDefaultPortMap;
774 		foreach (s; m_data.byKeyValue) ret.add(s.key, s.value);
775 		return ret;
776 	}
777 }
778 
779 // Puny encoding
780 private {
781 	/** Bootstring parameters for Punycode
782 		These parameters are designed for Unicode
783 
784 		See also: RFC 3492 Section 5
785 	*/
786 	enum uint base = 36;
787 	enum uint tmin = 1;
788 	enum uint tmax = 26;
789 	enum uint skew = 38;
790 	enum uint damp = 700;
791 	enum uint initial_bias = 72;
792 	enum uint initial_n = 128;
793 
794 	/*	Bias adaptation
795 
796 		See also: RFC 3492 Section 6.1
797 	*/
798 	uint punyAdapt (uint pdelta, int numpoints, bool firsttime)
799 	@safe @nogc nothrow pure {
800 		uint delta = firsttime ? pdelta / damp : pdelta / 2;
801 		delta += delta / numpoints;
802 		uint k = 0;
803 
804 		while (delta > ((base - tmin) * tmax) / 2)
805 		{
806 			delta /= (base - tmin);
807 			k += base;
808 		}
809 
810 		return k + (((base - tmin + 1) * delta) / (delta + skew));
811 	}
812 
813 	/*	Converts puny digit-codes to code point
814 
815 		See also: RFC 3492 Section 5
816 	*/
817 	dchar punyDigitToCP (uint digit)
818 	@safe @nogc nothrow pure {
819 		return cast(dchar) (digit + 22 + 75 * (digit < 26));
820 	}
821 
822 	/*	Encodes `input` with puny encoding
823 
824 		If input is all characters below `initial_n`
825 		input is returned as is.
826 
827 		See also: RFC 3492 Section 6.3
828 	*/
829 	string punyEncode (in string input)
830 	@safe {
831 		uint n = initial_n;
832 		uint delta = 0;
833 		uint bias = initial_bias;
834 		uint h;
835 		uint b;
836 		dchar m = dchar.max; // minchar
837 		bool delta_overflow;
838 
839 		uint input_len = 0;
840 		auto output = appender!string();
841 
842 		output.put("xn--");
843 
844 		foreach (dchar cp; input)
845 		{
846 			if (cp <= initial_n)
847 			{
848 				output.put(cast(char) cp);
849 				h += 1;
850 			}
851 			// Count length of input as code points, `input.length` counts bytes
852 			input_len += 1;
853 		}
854 
855 		b = h;
856 		if (b == input_len)
857 			return input; // No need to puny encode
858 
859 		if (b > 0)
860 			output.put('-');
861 
862 		while (h < input_len)
863 		{
864 			m = dchar.max;
865 			foreach (dchar cp; input)
866 			{
867 				if (n <= cp && cp < m)
868 					m = cp;
869 			}
870 
871 			assert(m != dchar.max, "Punyencoding failed, cannot find code point");
872 
873 			delta = addu(delta, ((m - n) * (h + 1)), delta_overflow);
874 			assert(!delta_overflow, "Punyencoding failed, delta overflow");
875 
876 			n = m;
877 
878 			foreach (dchar cp; input)
879 			{
880 				if (cp < n)
881 					delta += 1;
882 
883 				if (cp == n)
884 				{
885 					uint q = delta;
886 					uint k = base;
887 
888 					while (true)
889 					{
890 						uint t;
891 						if (k <= bias /* + tmin */)
892 							t = tmin;
893 						else if (k >=  bias + tmax)
894 							t = tmax;
895 						else
896 							t = k - bias;
897 
898 						if (q < t) break;
899 
900 						output.put(punyDigitToCP(t + ((q - t) % (base - t))));
901 						q = (q - t) / (base - t);
902 						k += base;
903 					}
904 					output.put(punyDigitToCP(q));
905 					bias = punyAdapt(delta, h + 1, h == b);
906 					delta = 0;
907 					h += 1;
908 				}
909 			}
910 			delta += 1;
911 			n += 1;
912 		}
913 
914 		return output.data;
915 	}
916 }
917 
918 unittest { // IPv6
919 	auto urlstr = "http://[2003:46:1a7b:6c01:64b:80ff:fe80:8003]:8091/abc";
920 	auto url = URL.parse(urlstr);
921 	assert(url.schema == "http", url.schema);
922 	assert(url.host == "2003:46:1a7b:6c01:64b:80ff:fe80:8003", url.host);
923 	assert(url.port == 8091);
924 	assert(url.path == InetPath("/abc"), url.path.toString());
925 	assert(url.toString == urlstr);
926 
927 	url.host = "abcd:46:1a7b:6c01:64b:80ff:fe80:8abc";
928 	urlstr = "http://[abcd:46:1a7b:6c01:64b:80ff:fe80:8abc]:8091/abc";
929 	assert(url.toString == urlstr);
930 }
931 
932 
933 unittest {
934 	auto urlstr = "https://www.example.net/index.html";
935 	auto url = URL.parse(urlstr);
936 	assert(url.schema == "https", url.schema);
937 	assert(url.host == "www.example.net", url.host);
938 	assert(url.path == InetPath("/index.html"), url.path.toString());
939 	assert(url.port == 443);
940 	assert(url.toString == urlstr);
941 
942 	urlstr = "http://jo.doe:password@sub.www.example.net:4711/sub2/index.html?query#anchor";
943 	url = URL.parse(urlstr);
944 	assert(url.schema == "http", url.schema);
945 	assert(url.username == "jo.doe", url.username);
946 	assert(url.password == "password", url.password);
947 	assert(url.port == 4711, to!string(url.port));
948 	assert(url.host == "sub.www.example.net", url.host);
949 	assert(url.path.toString() == "/sub2/index.html", url.path.toString());
950 	assert(url.queryString == "query", url.queryString);
951 	assert(url.anchor == "anchor", url.anchor);
952 	assert(url.toString == urlstr);
953 }
954 
955 unittest { // issue #1044
956 	URL url = URL.parse("http://example.com/p?query#anchor");
957 	assert(url.schema == "http");
958 	assert(url.host == "example.com");
959 	assert(url.port == 80);
960 	assert(url.queryString == "query");
961 	assert(url.anchor == "anchor");
962 	assert(url.pathString == "/p");
963 	url.localURI = "/q";
964 	assert(url.schema == "http");
965 	assert(url.host == "example.com");
966 	assert(url.queryString == "");
967 	assert(url.anchor == "");
968 	assert(url.pathString == "/q");
969 	url.localURI = "/q?query";
970 	assert(url.schema == "http");
971 	assert(url.host == "example.com");
972 	assert(url.queryString == "query");
973 	assert(url.anchor == "");
974 	assert(url.pathString == "/q");
975 	url.localURI = "/q#anchor";
976 	assert(url.schema == "http");
977 	assert(url.host == "example.com");
978 	assert(url.queryString == "");
979 	assert(url.anchor == "anchor");
980 	assert(url.pathString == "/q");
981 }
982 
983 //websocket unittest
984 unittest {
985 	URL url = URL("ws://127.0.0.1:8080/echo");
986 	assert(url.host == "127.0.0.1");
987 	assert(url.port == 8080);
988 	assert(url.localURI == "/echo");
989 }
990 
991 //rtsp unittest
992 unittest {
993 	URL url = URL("rtsp://127.0.0.1:554/echo");
994 	assert(url.host == "127.0.0.1");
995 	assert(url.port == 554);
996 	assert(url.localURI == "/echo");
997 }
998 
999 unittest {
1000 	auto p = PosixPath("/foo bar/boo oom/");
1001 	URL url = URL("http", "example.com", 0, p); // constructor test
1002 	assert(url.path == cast(InetPath)p);
1003 	url.path = p;
1004 	assert(url.path == cast(InetPath)p);					   // path assignement test
1005 	assert(url.pathString == "/foo%20bar/boo%20oom/");
1006 	assert(url.toString() == "http://example.com/foo%20bar/boo%20oom/");
1007 	url.pathString = "/foo%20bar/boo%2foom/";
1008 	assert(url.pathString == "/foo%20bar/boo%2foom/");
1009 	assert(url.toString() == "http://example.com/foo%20bar/boo%2foom/");
1010 }
1011 
1012 unittest {
1013 	URL url = URL("http://user:password@example.com");
1014 	assert(url.toString() == "http://user:password@example.com");
1015 
1016 	url = URL("http://user@example.com");
1017 	assert(url.toString() == "http://user@example.com");
1018 }
1019 
1020 unittest {
1021 	auto url = URL("http://example.com/some%2bpath");
1022 	assert((cast(PosixPath)url.path).toString() == "/some+path", url.path.toString());
1023 }
1024 
1025 unittest {
1026 	assert(URL("file:///test").pathString == "/test");
1027 	assert(URL("file:///test").port == 0);
1028 	assert(URL("file:///test").path.toString() == "/test");
1029 	assert(URL("file://test").host == "test");
1030 	assert(URL("file://test").pathString() == "");
1031 	assert(URL("file://./test").host == ".");
1032 	assert(URL("file://./test").pathString == "/test");
1033 	assert(URL("file://./test").path.toString() == "/test");
1034 }
1035 
1036 unittest { // issue #1318
1037 	try {
1038 		URL("http://something/inval%id");
1039 		assert(false, "Expected to throw an exception.");
1040 	} catch (Exception e) {}
1041 }
1042 
1043 unittest {
1044 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "http+unix");
1045 	assert(URL("https+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "https+unix");
1046 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").host == "%2Fvar%2Frun%2Fdocker.sock");
1047 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").pathString == "");
1048 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json").pathString == "/container/json");
1049 	auto url = URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json");
1050 	assert(URL(url.toString()) == url);
1051 }
1052 
1053 unittest {
1054 	import dub.internal.vibecompat.data.serialization;
1055 	static assert(isStringSerializable!URL);
1056 }
1057 
1058 unittest { // issue #1732
1059 	auto url = URL("tcp://0.0.0.0:1234");
1060 	url.port = 4321;
1061 	assert(url.toString == "tcp://0.0.0.0:4321", url.toString);
1062 }
1063 
1064 unittest { // host name role in file:// URLs
1065 	auto url = URL.parse("file:///foo/bar");
1066 	assert(url.host == "");
1067 	assert(url.path == InetPath("/foo/bar"));
1068 	assert(url.toString() == "file:///foo/bar");
1069 
1070 	url = URL.parse("file://foo/bar/baz");
1071 	assert(url.host == "foo");
1072 	assert(url.path == InetPath("/bar/baz"));
1073 	assert(url.toString() == "file://foo/bar/baz");
1074 }
1075 
1076 unittest { // native path <-> URL conversion
1077 	import std.exception : assertThrown;
1078 
1079 	auto url = URL(NativePath("/foo/bar"));
1080 	assert(url.schema == "file");
1081 	assert(url.host == "");
1082 	assert(url.path == InetPath("/foo/bar"));
1083 	assert(url.toNativePath == NativePath("/foo/bar"));
1084 
1085 	assertThrown(URL("http://example.org/").toNativePath);
1086 	assertThrown(URL(NativePath("foo/bar")));
1087 }
1088 
1089 unittest { // URL Normalization
1090 	auto url = URL.parse("http://example.com/foo%2a");
1091 	assert(url.normalized.toString() == "http://example.com/foo%2A");
1092 
1093 	url = URL.parse("HTTP://User@Example.COM/Foo");
1094 	assert(url.normalized.toString() == "http://User@example.com/Foo");
1095 
1096 	url = URL.parse("http://example.com/%7Efoo");
1097 	assert(url.normalized.toString() == "http://example.com/~foo");
1098 
1099 	url = URL.parse("http://example.com/foo/./bar/baz/../qux");
1100 	assert(url.normalized.toString() == "http://example.com/foo/bar/qux");
1101 
1102 	url = URL.parse("http://example.com");
1103 	assert(url.normalized.toString() == "http://example.com/");
1104 
1105 	url = URL.parse("http://example.com:80/");
1106 	assert(url.normalized.toString() == "http://example.com/");
1107 
1108 	url = URL.parse("hTTPs://examPLe.COM:443/my/path");
1109 	assert(url.normalized.toString() == "https://example.com/my/path");
1110 
1111 	url = URL.parse("http://example.com/foo");
1112 	url.normalize(true);
1113 	assert(url.toString() == "http://example.com/foo/");
1114 }
1115 
1116 version (Windows) unittest { // Windows drive letter paths
1117 	auto url = URL(WindowsPath(`C:\foo`));
1118 	assert(url.schema == "file");
1119 	assert(url.host == "");
1120 	assert(url.path == InetPath("/C:/foo"));
1121 	auto p = url.toNativePath;
1122 	p.normalize();
1123 	assert(p == WindowsPath(`C:\foo`));
1124 }
1125 
1126 version (Windows) unittest { // UNC paths
1127 	auto url = URL(WindowsPath(`\\server\share\path`));
1128 	assert(url.schema == "file");
1129 	assert(url.host == "server");
1130 	assert(url.path == InetPath("/share/path"));
1131 
1132 	auto p = url.toNativePath;
1133 	p.normalize(); // convert slash to backslash if necessary
1134 	assert(p == WindowsPath(`\\server\share\path`));
1135 }
1136 
1137 unittest {
1138 	assert((URL.parse("http://example.com/foo") ~ InetPath("bar")).toString()
1139 		== "http://example.com/foo/bar");
1140 	assert((URL.parse("http://example.com/foo") ~ InetPath.Segment("bar")).toString()
1141 		== "http://example.com/foo/bar");
1142 
1143 	assert((URL.parse("http://example.com") ~ InetPath("foo")).toString()
1144 		== "http://example.com/foo");
1145 	assert((URL.parse("http://example.com") ~ InetPath.Segment("foo")).toString()
1146 		== "http://example.com/foo");
1147 
1148 	URL url = URL.parse("http://example.com/");
1149 	url ~= InetPath("foo");
1150 	url ~= InetPath.Segment("bar");
1151 	assert(url.toString() == "http://example.com/foo/bar");
1152 }
1153 
1154 unittest {
1155 	assert(URL.parse("foo:/foo/bar").toString() == "foo:/foo/bar");
1156 	assert(URL.parse("foo:/foo/bar").path.toString() == "/foo/bar");
1157 	assert(URL.parse("foo:foo/bar").toString() == "foo:foo/bar");
1158 }