1 /**
2 	URL parsing routines.
3 
4 	Copyright: © 2012 rejectedsoftware e.K.
5 	License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6 	Authors: Sönke Ludwig
7 */
8 module dub.internal.vibecompat.inet.url;
9 
10 public import dub.internal.vibecompat.inet.path;
11 
12 version (Have_vibe_d_inet) public import vibe.inet.url;
13 else:
14 
15 import std.algorithm;
16 import std.array;
17 import std.conv;
18 import std.exception;
19 import std..string;
20 import std.uri;
21 import std.meta : AliasSeq;
22 
23 
24 /**
25 	Represents a URL decomposed into its components.
26 */
27 struct URL {
28 	private {
29 		string m_schema;
30 		string m_pathString;
31 		NativePath m_path;
32 		string m_host;
33 		ushort m_port;
34 		string m_username;
35 		string m_password;
36 		string m_queryString;
37 		string m_anchor;
38 		alias m_schemes = AliasSeq!("http", "https", "ftp", "spdy", "file", "sftp");
39 	}
40 
41 	/// Constructs a new URL object from its components.
42 	this(string schema, string host, ushort port, NativePath path)
43 	{
44 		m_schema = schema;
45 		m_host = host;
46 		m_port = port;
47 		m_path = path;
48 		m_pathString = path.toString();
49 	}
50 	/// ditto
51 	this(string schema, NativePath path)
52 	{
53 		this(schema, null, 0, path);
54 	}
55 
56 	/** Constructs a URL from its string representation.
57 
58 		TODO: additional validation required (e.g. valid host and user names and port)
59 	*/
60 	this(string url_string)
61 	{
62 		auto str = url_string;
63 		enforce(str.length > 0, "Empty URL.");
64 		if( str[0] != '/' ){
65 			auto idx = str.countUntil(':');
66 			enforce(idx > 0, "No schema in URL:"~str);
67 			m_schema = str[0 .. idx];
68 			str = str[idx+1 .. $];
69 			bool requires_host = false;
70 
71 			auto schema_parts = m_schema.split("+");
72 			if (!schema_parts.empty && schema_parts.back.canFind(m_schemes))
73 			{
74 				// proto://server/path style
75 				enforce(str.startsWith("//"), "URL must start with proto://...");
76 				requires_host = true;
77 				str = str[2 .. $];
78 			}
79 
80 			auto si = str.countUntil('/');
81 			if( si < 0 ) si = str.length;
82 			auto ai = str[0 .. si].countUntil('@');
83 			ptrdiff_t hs = 0;
84 			if( ai >= 0 ){
85 				hs = ai+1;
86 				auto ci = str[0 .. ai].countUntil(':');
87 				if( ci >= 0 ){
88 					m_username = str[0 .. ci];
89 					m_password = str[ci+1 .. ai];
90 				} else m_username = str[0 .. ai];
91 				enforce(m_username.length > 0, "Empty user name in URL.");
92 			}
93 
94 			m_host = str[hs .. si];
95 			auto pi = m_host.countUntil(':');
96 			if(pi > 0) {
97 				enforce(pi < m_host.length-1, "Empty port in URL.");
98 				m_port = to!ushort(m_host[pi+1..$]);
99 				m_host = m_host[0 .. pi];
100 			}
101 
102 			enforce(!requires_host || m_schema == "file" || m_host.length > 0,
103 				"Empty server name in URL.");
104 			str = str[si .. $];
105 		}
106 
107 		this.localURI = (str == "") ? "/" : str;
108 	}
109 	/// ditto
110 	static URL parse(string url_string)
111 	{
112 		return URL(url_string);
113 	}
114 
115 	/// The schema/protocol part of the URL
116 	@property string schema() const { return m_schema; }
117 	/// ditto
118 	@property void schema(string v) { m_schema = v; }
119 
120 	/// The path part of the URL in the original string form
121 	@property string pathString() const { return m_pathString; }
122 
123 	/// The path part of the URL
124 	@property NativePath path() const { return m_path; }
125 	/// ditto
126 	@property void path(NativePath p)
127 	{
128 		m_path = p;
129 		auto pstr = p.toString();
130 		m_pathString = pstr;
131 	}
132 
133 	/// The host part of the URL (depends on the schema)
134 	@property string host() const { return m_host; }
135 	/// ditto
136 	@property void host(string v) { m_host = v; }
137 
138 	/// The port part of the URL (optional)
139 	@property ushort port() const { return m_port; }
140 	/// ditto
141 	@property port(ushort v) { m_port = v; }
142 
143 	/// The user name part of the URL (optional)
144 	@property string username() const { return m_username; }
145 	/// ditto
146 	@property void username(string v) { m_username = v; }
147 
148 	/// The password part of the URL (optional)
149 	@property string password() const { return m_password; }
150 	/// ditto
151 	@property void password(string v) { m_password = v; }
152 
153 	/// The query string part of the URL (optional)
154 	@property string queryString() const { return m_queryString; }
155 	/// ditto
156 	@property void queryString(string v) { m_queryString = v; }
157 
158 	/// The anchor part of the URL (optional)
159 	@property string anchor() const { return m_anchor; }
160 
161 	/// The path part plus query string and anchor
162 	@property string localURI()
163 	const {
164 		auto str = appender!string();
165 		str.reserve(m_pathString.length + 2 + queryString.length + anchor.length);
166 		str.put(encode(path.toString()));
167 		if( queryString.length ) {
168 			str.put("?");
169 			str.put(queryString);
170 		}
171 		if( anchor.length ) {
172 			str.put("#");
173 			str.put(anchor);
174 		}
175 		return str.data;
176 	}
177 	/// ditto
178 	@property void localURI(string str)
179 	{
180 		auto ai = str.countUntil('#');
181 		if( ai >= 0 ){
182 			m_anchor = str[ai+1 .. $];
183 			str = str[0 .. ai];
184 		}
185 
186 		auto qi = str.countUntil('?');
187 		if( qi >= 0 ){
188 			m_queryString = str[qi+1 .. $];
189 			str = str[0 .. qi];
190 		}
191 
192 		m_pathString = str;
193 		m_path = NativePath(decode(str));
194 	}
195 
196 	/// The URL to the parent path with query string and anchor stripped.
197 	@property URL parentURL() const {
198 		URL ret;
199 		ret.schema = schema;
200 		ret.host = host;
201 		ret.port = port;
202 		ret.username = username;
203 		ret.password = password;
204 		ret.path = path.parentPath;
205 		return ret;
206 	}
207 
208 	/// Converts this URL object to its string representation.
209 	string toString()
210 	const {
211 		import std.format;
212 		auto dst = appender!string();
213 		dst.put(schema);
214 		dst.put(":");
215 		auto schema_parts = schema.split("+");
216 		if (!schema_parts.empty && schema_parts.back.canFind(m_schemes))
217 		{
218 			dst.put("//");
219 		}
220 		dst.put(host);
221 		if( m_port > 0 ) formattedWrite(dst, ":%d", m_port);
222 		dst.put(localURI);
223 		return dst.data;
224 	}
225 
226 	bool startsWith(const URL rhs) const {
227 		if( m_schema != rhs.m_schema ) return false;
228 		if( m_host != rhs.m_host ) return false;
229 		// FIXME: also consider user, port, querystring, anchor etc
230 		return path.startsWith(rhs.m_path);
231 	}
232 
233 	URL opBinary(string OP)(NativePath rhs) const if( OP == "~" ) { return URL(m_schema, m_host, m_port, m_path ~ rhs); }
234 	URL opBinary(string OP)(PathEntry rhs) const if( OP == "~" ) { return URL(m_schema, m_host, m_port, m_path ~ rhs); }
235 	void opOpAssign(string OP)(NativePath rhs) if( OP == "~" ) { m_path ~= rhs; }
236 	void opOpAssign(string OP)(PathEntry rhs) if( OP == "~" ) { m_path ~= rhs; }
237 
238 	/// Tests two URLs for equality using '=='.
239 	bool opEquals(ref const URL rhs) const {
240 		if( m_schema != rhs.m_schema ) return false;
241 		if( m_host != rhs.m_host ) return false;
242 		if( m_path != rhs.m_path ) return false;
243 		return true;
244 	}
245 	/// ditto
246 	bool opEquals(const URL other) const { return opEquals(other); }
247 
248 	int opCmp(ref const URL rhs) const {
249 		if( m_schema != rhs.m_schema ) return m_schema.cmp(rhs.m_schema);
250 		if( m_host != rhs.m_host ) return m_host.cmp(rhs.m_host);
251 		if( m_path != rhs.m_path ) return m_path.opCmp(rhs.m_path);
252 		return true;
253 	}
254 }
255 
256 unittest {
257 	auto url = URL.parse("https://www.example.net/index.html");
258 	assert(url.schema == "https", url.schema);
259 	assert(url.host == "www.example.net", url.host);
260 	assert(url.path == NativePath("/index.html"), url.path.toString());
261 
262 	url = URL.parse("http://jo.doe:password@sub.www.example.net:4711/sub2/index.html?query#anchor");
263 	assert(url.schema == "http", url.schema);
264 	assert(url.username == "jo.doe", url.username);
265 	assert(url.password == "password", url.password);
266 	assert(url.port == 4711, to!string(url.port));
267 	assert(url.host == "sub.www.example.net", url.host);
268 	assert(url.path.toString() == "/sub2/index.html", url.path.toString());
269 	assert(url.queryString == "query", url.queryString);
270 	assert(url.anchor == "anchor", url.anchor);
271 
272 	url = URL("http://localhost")~NativePath("packages");
273 	assert(url.toString() == "http://localhost/packages", url.toString());
274 
275 	url = URL("http://localhost/")~NativePath("packages");
276 	assert(url.toString() == "http://localhost/packages", url.toString());
277 
278 	url = URL.parse("dub+https://code.dlang.org/");
279 	assert(url.host == "code.dlang.org");
280 	assert(url.toString() == "dub+https://code.dlang.org/");
281 	assert(url.schema == "dub+https");
282 }