HttpListenerRequestUriBuilder.cs 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Diagnostics;
  4. using System.Globalization;
  5. using System.Text;
  6. namespace SocketHttpListener.Net
  7. {
  8. // We don't use the cooked URL because http.sys unescapes all percent-encoded values. However,
  9. // we also can't just use the raw Uri, since http.sys supports not only Utf-8, but also ANSI/DBCS and
  10. // Unicode code points. System.Uri only supports Utf-8.
  11. // The purpose of this class is to convert all ANSI, DBCS, and Unicode code points into percent encoded
  12. // Utf-8 characters.
  13. internal sealed class HttpListenerRequestUriBuilder
  14. {
  15. private static readonly Encoding s_utf8Encoding = new UTF8Encoding(false, true);
  16. private static readonly Encoding s_ansiEncoding = Encoding.GetEncoding(0, new EncoderExceptionFallback(), new DecoderExceptionFallback());
  17. private readonly string _rawUri;
  18. private readonly string _cookedUriScheme;
  19. private readonly string _cookedUriHost;
  20. private readonly string _cookedUriPath;
  21. private readonly string _cookedUriQuery;
  22. // This field is used to build the final request Uri string from the Uri parts passed to the ctor.
  23. private StringBuilder _requestUriString;
  24. // The raw path is parsed by looping through all characters from left to right. 'rawOctets'
  25. // is used to store consecutive percent encoded octets as actual byte values: e.g. for path /pa%C3%84th%2F/
  26. // rawOctets will be set to { 0xC3, 0x84 } when we reach character 't' and it will be { 0x2F } when
  27. // we reach the final '/'. I.e. after a sequence of percent encoded octets ends, we use rawOctets as
  28. // input to the encoding and percent encode the resulting string into UTF-8 octets.
  29. //
  30. // When parsing ANSI (Latin 1) encoded path '/pa%C4th/', %C4 will be added to rawOctets and when
  31. // we reach 't', the content of rawOctets { 0xC4 } will be fed into the ANSI encoding. The resulting
  32. // string 'Ä' will be percent encoded into UTF-8 octets and appended to requestUriString. The final
  33. // path will be '/pa%C3%84th/', where '%C3%84' is the UTF-8 percent encoded character 'Ä'.
  34. private List<byte> _rawOctets;
  35. private string _rawPath;
  36. // Holds the final request Uri.
  37. private Uri _requestUri;
  38. private HttpListenerRequestUriBuilder(string rawUri, string cookedUriScheme, string cookedUriHost,
  39. string cookedUriPath, string cookedUriQuery)
  40. {
  41. _rawUri = rawUri;
  42. _cookedUriScheme = cookedUriScheme;
  43. _cookedUriHost = cookedUriHost;
  44. _cookedUriPath = AddSlashToAsteriskOnlyPath(cookedUriPath);
  45. _cookedUriQuery = cookedUriQuery ?? string.Empty;
  46. }
  47. public static Uri GetRequestUri(string rawUri, string cookedUriScheme, string cookedUriHost,
  48. string cookedUriPath, string cookedUriQuery)
  49. {
  50. var builder = new HttpListenerRequestUriBuilder(rawUri,
  51. cookedUriScheme, cookedUriHost, cookedUriPath, cookedUriQuery);
  52. return builder.Build();
  53. }
  54. private Uri Build()
  55. {
  56. BuildRequestUriUsingRawPath();
  57. if (_requestUri == null)
  58. {
  59. BuildRequestUriUsingCookedPath();
  60. }
  61. return _requestUri;
  62. }
  63. private void BuildRequestUriUsingCookedPath()
  64. {
  65. bool isValid = Uri.TryCreate(_cookedUriScheme + Uri.SchemeDelimiter + _cookedUriHost + _cookedUriPath +
  66. _cookedUriQuery, UriKind.Absolute, out _requestUri);
  67. // Creating a Uri from the cooked Uri should really always work: If not, we log at least.
  68. if (!isValid)
  69. {
  70. //if (NetEventSource.IsEnabled)
  71. // NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_create_uri, _cookedUriScheme, _cookedUriHost, _cookedUriPath, _cookedUriQuery));
  72. }
  73. }
  74. private void BuildRequestUriUsingRawPath()
  75. {
  76. bool isValid = false;
  77. // Initialize 'rawPath' only if really needed; i.e. if we build the request Uri from the raw Uri.
  78. _rawPath = GetPath(_rawUri);
  79. // Try to check the raw path using first the primary encoding (according to http.sys settings);
  80. // if it fails try the secondary encoding.
  81. ParsingResult result = BuildRequestUriUsingRawPath(GetEncoding(EncodingType.Primary));
  82. if (result == ParsingResult.EncodingError)
  83. {
  84. Encoding secondaryEncoding = GetEncoding(EncodingType.Secondary);
  85. result = BuildRequestUriUsingRawPath(secondaryEncoding);
  86. }
  87. isValid = (result == ParsingResult.Success) ? true : false;
  88. // Log that we weren't able to create a Uri from the raw string.
  89. if (!isValid)
  90. {
  91. //if (NetEventSource.IsEnabled)
  92. // NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_create_uri, _cookedUriScheme, _cookedUriHost, _rawPath, _cookedUriQuery));
  93. }
  94. }
  95. private static Encoding GetEncoding(EncodingType type)
  96. {
  97. Debug.Assert((type == EncodingType.Primary) || (type == EncodingType.Secondary),
  98. "Unknown 'EncodingType' value: " + type.ToString());
  99. if (type == EncodingType.Secondary)
  100. {
  101. return s_ansiEncoding;
  102. }
  103. else
  104. {
  105. return s_utf8Encoding;
  106. }
  107. }
  108. private ParsingResult BuildRequestUriUsingRawPath(Encoding encoding)
  109. {
  110. Debug.Assert(encoding != null, "'encoding' must be assigned.");
  111. Debug.Assert(!string.IsNullOrEmpty(_rawPath), "'rawPath' must have at least one character.");
  112. _rawOctets = new List<byte>();
  113. _requestUriString = new StringBuilder();
  114. _requestUriString.Append(_cookedUriScheme);
  115. _requestUriString.Append(Uri.SchemeDelimiter);
  116. _requestUriString.Append(_cookedUriHost);
  117. ParsingResult result = ParseRawPath(encoding);
  118. if (result == ParsingResult.Success)
  119. {
  120. _requestUriString.Append(_cookedUriQuery);
  121. Debug.Assert(_rawOctets.Count == 0,
  122. "Still raw octets left. They must be added to the result path.");
  123. if (!Uri.TryCreate(_requestUriString.ToString(), UriKind.Absolute, out _requestUri))
  124. {
  125. // If we can't create a Uri from the string, this is an invalid string and it doesn't make
  126. // sense to try another encoding.
  127. result = ParsingResult.InvalidString;
  128. }
  129. }
  130. if (result != ParsingResult.Success)
  131. {
  132. //if (NetEventSource.IsEnabled)
  133. // NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_raw_path, _rawPath, encoding.EncodingName));
  134. }
  135. return result;
  136. }
  137. private ParsingResult ParseRawPath(Encoding encoding)
  138. {
  139. Debug.Assert(encoding != null, "'encoding' must be assigned.");
  140. int index = 0;
  141. char current = '\0';
  142. while (index < _rawPath.Length)
  143. {
  144. current = _rawPath[index];
  145. if (current == '%')
  146. {
  147. // Assert is enough, since http.sys accepted the request string already. This should never happen.
  148. Debug.Assert(index + 2 < _rawPath.Length, "Expected >=2 characters after '%' (e.g. %2F)");
  149. index++;
  150. current = _rawPath[index];
  151. if (current == 'u' || current == 'U')
  152. {
  153. // We found "%u" which means, we have a Unicode code point of the form "%uXXXX".
  154. Debug.Assert(index + 4 < _rawPath.Length, "Expected >=4 characters after '%u' (e.g. %u0062)");
  155. // Decode the content of rawOctets into percent encoded UTF-8 characters and append them
  156. // to requestUriString.
  157. if (!EmptyDecodeAndAppendRawOctetsList(encoding))
  158. {
  159. return ParsingResult.EncodingError;
  160. }
  161. if (!AppendUnicodeCodePointValuePercentEncoded(_rawPath.Substring(index + 1, 4)))
  162. {
  163. return ParsingResult.InvalidString;
  164. }
  165. index += 5;
  166. }
  167. else
  168. {
  169. // We found '%', but not followed by 'u', i.e. we have a percent encoded octed: %XX
  170. if (!AddPercentEncodedOctetToRawOctetsList(encoding, _rawPath.Substring(index, 2)))
  171. {
  172. return ParsingResult.InvalidString;
  173. }
  174. index += 2;
  175. }
  176. }
  177. else
  178. {
  179. // We found a non-'%' character: decode the content of rawOctets into percent encoded
  180. // UTF-8 characters and append it to the result.
  181. if (!EmptyDecodeAndAppendRawOctetsList(encoding))
  182. {
  183. return ParsingResult.EncodingError;
  184. }
  185. // Append the current character to the result.
  186. _requestUriString.Append(current);
  187. index++;
  188. }
  189. }
  190. // if the raw path ends with a sequence of percent encoded octets, make sure those get added to the
  191. // result (requestUriString).
  192. if (!EmptyDecodeAndAppendRawOctetsList(encoding))
  193. {
  194. return ParsingResult.EncodingError;
  195. }
  196. return ParsingResult.Success;
  197. }
  198. private bool AppendUnicodeCodePointValuePercentEncoded(string codePoint)
  199. {
  200. // http.sys only supports %uXXXX (4 hex-digits), even though unicode code points could have up to
  201. // 6 hex digits. Therefore we parse always 4 characters after %u and convert them to an int.
  202. if (!int.TryParse(codePoint, NumberStyles.HexNumber, null, out var codePointValue))
  203. {
  204. //if (NetEventSource.IsEnabled)
  205. // NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_percent_value, codePoint));
  206. return false;
  207. }
  208. string unicodeString = null;
  209. try
  210. {
  211. unicodeString = char.ConvertFromUtf32(codePointValue);
  212. AppendOctetsPercentEncoded(_requestUriString, s_utf8Encoding.GetBytes(unicodeString));
  213. return true;
  214. }
  215. catch (ArgumentOutOfRangeException)
  216. {
  217. //if (NetEventSource.IsEnabled)
  218. // NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_percent_value, codePoint));
  219. }
  220. catch (EncoderFallbackException)
  221. {
  222. // If utf8Encoding.GetBytes() fails
  223. //if (NetEventSource.IsEnabled) NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_to_utf8, unicodeString, e.Message));
  224. }
  225. return false;
  226. }
  227. private bool AddPercentEncodedOctetToRawOctetsList(Encoding encoding, string escapedCharacter)
  228. {
  229. if (!byte.TryParse(escapedCharacter, NumberStyles.HexNumber, null, out byte encodedValue))
  230. {
  231. //if (NetEventSource.IsEnabled) NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_percent_value, escapedCharacter));
  232. return false;
  233. }
  234. _rawOctets.Add(encodedValue);
  235. return true;
  236. }
  237. private bool EmptyDecodeAndAppendRawOctetsList(Encoding encoding)
  238. {
  239. if (_rawOctets.Count == 0)
  240. {
  241. return true;
  242. }
  243. string decodedString = null;
  244. try
  245. {
  246. // If the encoding can get a string out of the byte array, this is a valid string in the
  247. // 'encoding' encoding.
  248. decodedString = encoding.GetString(_rawOctets.ToArray());
  249. if (encoding == s_utf8Encoding)
  250. {
  251. AppendOctetsPercentEncoded(_requestUriString, _rawOctets.ToArray());
  252. }
  253. else
  254. {
  255. AppendOctetsPercentEncoded(_requestUriString, s_utf8Encoding.GetBytes(decodedString));
  256. }
  257. _rawOctets.Clear();
  258. return true;
  259. }
  260. catch (DecoderFallbackException)
  261. {
  262. //if (NetEventSource.IsEnabled) NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_bytes, GetOctetsAsString(_rawOctets), e.Message));
  263. }
  264. catch (EncoderFallbackException)
  265. {
  266. // If utf8Encoding.GetBytes() fails
  267. //if (NetEventSource.IsEnabled) NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_to_utf8, decodedString, e.Message));
  268. }
  269. return false;
  270. }
  271. private static void AppendOctetsPercentEncoded(StringBuilder target, IEnumerable<byte> octets)
  272. {
  273. foreach (byte octet in octets)
  274. {
  275. target.Append('%');
  276. target.Append(octet.ToString("X2", CultureInfo.InvariantCulture));
  277. }
  278. }
  279. private static string GetOctetsAsString(IEnumerable<byte> octets)
  280. {
  281. var octetString = new StringBuilder();
  282. bool first = true;
  283. foreach (byte octet in octets)
  284. {
  285. if (first)
  286. {
  287. first = false;
  288. }
  289. else
  290. {
  291. octetString.Append(' ');
  292. }
  293. octetString.Append(octet.ToString("X2", CultureInfo.InvariantCulture));
  294. }
  295. return octetString.ToString();
  296. }
  297. private static string GetPath(string uriString)
  298. {
  299. Debug.Assert(uriString != null, "uriString must not be null");
  300. Debug.Assert(uriString.Length > 0, "uriString must not be empty");
  301. int pathStartIndex = 0;
  302. // Perf. improvement: nearly all strings are relative Uris. So just look if the
  303. // string starts with '/'. If so, we have a relative Uri and the path starts at position 0.
  304. // (http.sys already trimmed leading whitespaces)
  305. if (uriString[0] != '/')
  306. {
  307. // We can't check against cookedUriScheme, since http.sys allows for request http://myserver/ to
  308. // use a request line 'GET https://myserver/' (note http vs. https). Therefore check if the
  309. // Uri starts with either http:// or https://.
  310. int authorityStartIndex = 0;
  311. if (uriString.StartsWith("http://", StringComparison.OrdinalIgnoreCase))
  312. {
  313. authorityStartIndex = 7;
  314. }
  315. else if (uriString.StartsWith("https://", StringComparison.OrdinalIgnoreCase))
  316. {
  317. authorityStartIndex = 8;
  318. }
  319. if (authorityStartIndex > 0)
  320. {
  321. // we have an absolute Uri. Find out where the authority ends and the path begins.
  322. // Note that Uris like "http://server?query=value/1/2" are invalid according to RFC2616
  323. // and http.sys behavior: If the Uri contains a query, there must be at least one '/'
  324. // between the authority and the '?' character: It's safe to just look for the first
  325. // '/' after the authority to determine the beginning of the path.
  326. pathStartIndex = uriString.IndexOf('/', authorityStartIndex);
  327. if (pathStartIndex == -1)
  328. {
  329. // e.g. for request lines like: 'GET http://myserver' (no final '/')
  330. pathStartIndex = uriString.Length;
  331. }
  332. }
  333. else
  334. {
  335. // RFC2616: Request-URI = "*" | absoluteURI | abs_path | authority
  336. // 'authority' can only be used with CONNECT which is never received by HttpListener.
  337. // I.e. if we don't have an absolute path (must start with '/') and we don't have
  338. // an absolute Uri (must start with http:// or https://), then 'uriString' must be '*'.
  339. Debug.Assert((uriString.Length == 1) && (uriString[0] == '*'), "Unknown request Uri string format",
  340. "Request Uri string is not an absolute Uri, absolute path, or '*': {0}", uriString);
  341. // Should we ever get here, be consistent with 2.0/3.5 behavior: just add an initial
  342. // slash to the string and treat it as a path:
  343. uriString = "/" + uriString;
  344. }
  345. }
  346. // Find end of path: The path is terminated by
  347. // - the first '?' character
  348. // - the first '#' character: This is never the case here, since http.sys won't accept
  349. // Uris containing fragments. Also, RFC2616 doesn't allow fragments in request Uris.
  350. // - end of Uri string
  351. int queryIndex = uriString.IndexOf('?');
  352. if (queryIndex == -1)
  353. {
  354. queryIndex = uriString.Length;
  355. }
  356. // will always return a != null string.
  357. return AddSlashToAsteriskOnlyPath(uriString.Substring(pathStartIndex, queryIndex - pathStartIndex));
  358. }
  359. private static string AddSlashToAsteriskOnlyPath(string path)
  360. {
  361. Debug.Assert(path != null, "'path' must not be null");
  362. // If a request like "OPTIONS * HTTP/1.1" is sent to the listener, then the request Uri
  363. // should be "http[s]://server[:port]/*" to be compatible with pre-4.0 behavior.
  364. if ((path.Length == 1) && (path[0] == '*'))
  365. {
  366. return "/*";
  367. }
  368. return path;
  369. }
  370. private enum ParsingResult
  371. {
  372. Success,
  373. InvalidString,
  374. EncodingError
  375. }
  376. private enum EncodingType
  377. {
  378. Primary,
  379. Secondary
  380. }
  381. }
  382. }