1 /** 2 URL parsing routines. 3 4 Copyright: © 2012 RejectedSoftware e.K. 5 License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 6 Authors: Sönke Ludwig 7 */ 8 module dub.internal.vibecompat.inet.url; 9 10 public import dub.internal.vibecompat.inet.path; 11 12 import std.algorithm; 13 import std.array; 14 import std.conv; 15 import std.exception; 16 import std..string; 17 import std.uri; 18 19 20 /** 21 Represents a URL decomposed into its components. 22 */ 23 struct Url { 24 private { 25 string m_schema; 26 string m_pathString; 27 Path m_path; 28 string m_host; 29 ushort m_port; 30 string m_username; 31 string m_password; 32 string m_queryString; 33 string m_anchor; 34 } 35 36 /// Constructs a new URL object from its components. 37 this(string schema, string host, ushort port, Path path) 38 { 39 m_schema = schema; 40 m_host = host; 41 m_port = port; 42 m_path = path; 43 m_pathString = path.toString(); 44 } 45 /// ditto 46 this(string schema, Path path) 47 { 48 this(schema, null, 0, path); 49 } 50 51 /** Constructs a URL from its string representation. 52 53 TODO: additional validation required (e.g. valid host and user names and port) 54 */ 55 this(string url_string) 56 { 57 auto str = url_string; 58 enforce(str.length > 0, "Empty URL."); 59 if( str[0] != '/' ){ 60 auto idx = str.countUntil(':'); 61 enforce(idx > 0, "No schema in URL:"~str); 62 m_schema = str[0 .. idx]; 63 str = str[idx+1 .. $]; 64 bool requires_host = false; 65 66 switch(m_schema){ 67 case "http": 68 case "https": 69 case "ftp": 70 case "spdy": 71 case "sftp": 72 case "file": 73 // proto://server/path style 74 enforce(str.startsWith("//"), "URL must start with proto://..."); 75 requires_host = true; 76 str = str[2 .. $]; 77 goto default; 78 default: 79 auto si = str.countUntil('/'); 80 if( si < 0 ) si = str.length; 81 auto ai = str[0 .. si].countUntil('@'); 82 sizediff_t hs = 0; 83 if( ai >= 0 ){ 84 hs = ai+1; 85 auto ci = str[0 .. ai].countUntil(':'); 86 if( ci >= 0 ){ 87 m_username = str[0 .. ci]; 88 m_password = str[ci+1 .. ai]; 89 } else m_username = str[0 .. ai]; 90 enforce(m_username.length > 0, "Empty user name in URL."); 91 } 92 93 m_host = str[hs .. si]; 94 auto pi = m_host.countUntil(':'); 95 if(pi > 0) { 96 enforce(pi < m_host.length-1, "Empty port in URL."); 97 m_port = to!ushort(m_host[pi+1..$]); 98 m_host = m_host[0 .. pi]; 99 } 100 101 enforce(!requires_host || m_schema == "file" || m_host.length > 0, 102 "Empty server name in URL."); 103 str = str[si .. $]; 104 } 105 } 106 107 this.localURI = str; 108 } 109 /// ditto 110 static Url parse(string url_string) 111 { 112 return Url(url_string); 113 } 114 115 /// The schema/protocol part of the URL 116 @property string schema() const { return m_schema; } 117 /// ditto 118 @property void schema(string v) { m_schema = v; } 119 120 /// The path part of the URL in the original string form 121 @property string pathString() const { return m_pathString; } 122 123 /// The path part of the URL 124 @property Path path() const { return m_path; } 125 /// ditto 126 @property void path(Path p) 127 { 128 m_path = p; 129 auto pstr = p.toString(); 130 m_pathString = pstr; 131 } 132 133 /// The host part of the URL (depends on the schema) 134 @property string host() const { return m_host; } 135 /// ditto 136 @property void host(string v) { m_host = v; } 137 138 /// The port part of the URL (optional) 139 @property ushort port() const { return m_port; } 140 /// ditto 141 @property port(ushort v) { m_port = v; } 142 143 /// The user name part of the URL (optional) 144 @property string username() const { return m_username; } 145 /// ditto 146 @property void username(string v) { m_username = v; } 147 148 /// The password part of the URL (optional) 149 @property string password() const { return m_password; } 150 /// ditto 151 @property void password(string v) { m_password = v; } 152 153 /// The query string part of the URL (optional) 154 @property string queryString() const { return m_queryString; } 155 /// ditto 156 @property void queryString(string v) { m_queryString = v; } 157 158 /// The anchor part of the URL (optional) 159 @property string anchor() const { return m_anchor; } 160 161 /// The path part plus query string and anchor 162 @property string localURI() 163 const { 164 auto str = appender!string(); 165 str.reserve(m_pathString.length + 2 + queryString.length + anchor.length); 166 str.put(encode(path.toString())); 167 if( queryString.length ) { 168 str.put("?"); 169 str.put(queryString); 170 } 171 if( anchor.length ) { 172 str.put("#"); 173 str.put(anchor); 174 } 175 return str.data; 176 } 177 /// ditto 178 @property void localURI(string str) 179 { 180 auto ai = str.countUntil('#'); 181 if( ai >= 0 ){ 182 m_anchor = str[ai+1 .. $]; 183 str = str[0 .. ai]; 184 } 185 186 auto qi = str.countUntil('?'); 187 if( qi >= 0 ){ 188 m_queryString = str[qi+1 .. $]; 189 str = str[0 .. qi]; 190 } 191 192 m_pathString = str; 193 m_path = Path(decode(str)); 194 } 195 196 /// The URL to the parent path with query string and anchor stripped. 197 @property Url parentUrl() const { 198 Url ret; 199 ret.schema = schema; 200 ret.host = host; 201 ret.port = port; 202 ret.username = username; 203 ret.password = password; 204 ret.path = path.parentPath; 205 return ret; 206 } 207 208 /// Converts this URL object to its string representation. 209 string toString() 210 const { 211 import std.format; 212 auto dst = appender!string(); 213 dst.put(schema); 214 dst.put(":"); 215 switch(schema){ 216 default: break; 217 case "file": 218 case "http": 219 case "https": 220 case "ftp": 221 case "spdy": 222 case "sftp": 223 dst.put("//"); 224 break; 225 } 226 dst.put(host); 227 if( m_port > 0 ) formattedWrite(dst, ":%d", m_port); 228 dst.put(localURI); 229 return dst.data; 230 } 231 232 bool startsWith(const Url rhs) const { 233 if( m_schema != rhs.m_schema ) return false; 234 if( m_host != rhs.m_host ) return false; 235 // FIXME: also consider user, port, querystring, anchor etc 236 return path.startsWith(rhs.m_path); 237 } 238 239 Url opBinary(string OP)(Path rhs) const if( OP == "~" ) { return Url(m_schema, m_host, m_port, m_path ~ rhs); } 240 Url opBinary(string OP)(PathEntry rhs) const if( OP == "~" ) { return Url(m_schema, m_host, m_port, m_path ~ rhs); } 241 void opOpAssign(string OP)(Path rhs) if( OP == "~" ) { m_path ~= rhs; } 242 void opOpAssign(string OP)(PathEntry rhs) if( OP == "~" ) { m_path ~= rhs; } 243 244 /// Tests two URLs for equality using '=='. 245 bool opEquals(ref const Url rhs) const { 246 if( m_schema != rhs.m_schema ) return false; 247 if( m_host != rhs.m_host ) return false; 248 if( m_path != rhs.m_path ) return false; 249 return true; 250 } 251 /// ditto 252 bool opEquals(const Url other) const { return opEquals(other); } 253 254 int opCmp(ref const Url rhs) const { 255 if( m_schema != rhs.m_schema ) return m_schema.cmp(rhs.m_schema); 256 if( m_host != rhs.m_host ) return m_host.cmp(rhs.m_host); 257 if( m_path != rhs.m_path ) return m_path.opCmp(rhs.m_path); 258 return true; 259 } 260 } 261 262 unittest { 263 auto url = Url.parse("https://www.example.net/index.html"); 264 assert(url.schema == "https", url.schema); 265 assert(url.host == "www.example.net", url.host); 266 assert(url.path == Path("/index.html"), url.path.toString()); 267 268 url = Url.parse("http://jo.doe:password@sub.www.example.net:4711/sub2/index.html?query#anchor"); 269 assert(url.schema == "http", url.schema); 270 assert(url.username == "jo.doe", url.username); 271 assert(url.password == "password", url.password); 272 assert(url.port == 4711, to!string(url.port)); 273 assert(url.host == "sub.www.example.net", url.host); 274 assert(url.path.toString() == "/sub2/index.html", url.path.toString()); 275 assert(url.queryString == "query", url.queryString); 276 assert(url.anchor == "anchor", url.anchor); 277 }