123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443 |
- using System;
- using System.Collections.Generic;
- using System.Diagnostics;
- using System.Globalization;
- using System.Text;
- namespace SocketHttpListener.Net
- {
- // We don't use the cooked URL because http.sys unescapes all percent-encoded values. However,
- // we also can't just use the raw Uri, since http.sys supports not only Utf-8, but also ANSI/DBCS and
- // Unicode code points. System.Uri only supports Utf-8.
- // The purpose of this class is to convert all ANSI, DBCS, and Unicode code points into percent encoded
- // Utf-8 characters.
- internal sealed class HttpListenerRequestUriBuilder
- {
- private static readonly Encoding s_utf8Encoding = new UTF8Encoding(false, true);
- private static readonly Encoding s_ansiEncoding = Encoding.GetEncoding(0, new EncoderExceptionFallback(), new DecoderExceptionFallback());
- private readonly string _rawUri;
- private readonly string _cookedUriScheme;
- private readonly string _cookedUriHost;
- private readonly string _cookedUriPath;
- private readonly string _cookedUriQuery;
- // This field is used to build the final request Uri string from the Uri parts passed to the ctor.
- private StringBuilder _requestUriString;
- // The raw path is parsed by looping through all characters from left to right. 'rawOctets'
- // is used to store consecutive percent encoded octets as actual byte values: e.g. for path /pa%C3%84th%2F/
- // rawOctets will be set to { 0xC3, 0x84 } when we reach character 't' and it will be { 0x2F } when
- // we reach the final '/'. I.e. after a sequence of percent encoded octets ends, we use rawOctets as
- // input to the encoding and percent encode the resulting string into UTF-8 octets.
- //
- // When parsing ANSI (Latin 1) encoded path '/pa%C4th/', %C4 will be added to rawOctets and when
- // we reach 't', the content of rawOctets { 0xC4 } will be fed into the ANSI encoding. The resulting
- // string 'Ä' will be percent encoded into UTF-8 octets and appended to requestUriString. The final
- // path will be '/pa%C3%84th/', where '%C3%84' is the UTF-8 percent encoded character 'Ä'.
- private List<byte> _rawOctets;
- private string _rawPath;
- // Holds the final request Uri.
- private Uri _requestUri;
- private HttpListenerRequestUriBuilder(string rawUri, string cookedUriScheme, string cookedUriHost,
- string cookedUriPath, string cookedUriQuery)
- {
- _rawUri = rawUri;
- _cookedUriScheme = cookedUriScheme;
- _cookedUriHost = cookedUriHost;
- _cookedUriPath = AddSlashToAsteriskOnlyPath(cookedUriPath);
- _cookedUriQuery = cookedUriQuery ?? string.Empty;
- }
- public static Uri GetRequestUri(string rawUri, string cookedUriScheme, string cookedUriHost,
- string cookedUriPath, string cookedUriQuery)
- {
- var builder = new HttpListenerRequestUriBuilder(rawUri,
- cookedUriScheme, cookedUriHost, cookedUriPath, cookedUriQuery);
- return builder.Build();
- }
- private Uri Build()
- {
- BuildRequestUriUsingRawPath();
- if (_requestUri == null)
- {
- BuildRequestUriUsingCookedPath();
- }
- return _requestUri;
- }
- private void BuildRequestUriUsingCookedPath()
- {
- bool isValid = Uri.TryCreate(_cookedUriScheme + Uri.SchemeDelimiter + _cookedUriHost + _cookedUriPath +
- _cookedUriQuery, UriKind.Absolute, out _requestUri);
- // Creating a Uri from the cooked Uri should really always work: If not, we log at least.
- if (!isValid)
- {
- //if (NetEventSource.IsEnabled)
- // NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_create_uri, _cookedUriScheme, _cookedUriHost, _cookedUriPath, _cookedUriQuery));
- }
- }
- private void BuildRequestUriUsingRawPath()
- {
- bool isValid = false;
- // Initialize 'rawPath' only if really needed; i.e. if we build the request Uri from the raw Uri.
- _rawPath = GetPath(_rawUri);
- // Try to check the raw path using first the primary encoding (according to http.sys settings);
- // if it fails try the secondary encoding.
- ParsingResult result = BuildRequestUriUsingRawPath(GetEncoding(EncodingType.Primary));
- if (result == ParsingResult.EncodingError)
- {
- Encoding secondaryEncoding = GetEncoding(EncodingType.Secondary);
- result = BuildRequestUriUsingRawPath(secondaryEncoding);
- }
- isValid = (result == ParsingResult.Success) ? true : false;
- // Log that we weren't able to create a Uri from the raw string.
- if (!isValid)
- {
- //if (NetEventSource.IsEnabled)
- // NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_create_uri, _cookedUriScheme, _cookedUriHost, _rawPath, _cookedUriQuery));
- }
- }
- private static Encoding GetEncoding(EncodingType type)
- {
- Debug.Assert((type == EncodingType.Primary) || (type == EncodingType.Secondary),
- "Unknown 'EncodingType' value: " + type.ToString());
- if (type == EncodingType.Secondary)
- {
- return s_ansiEncoding;
- }
- else
- {
- return s_utf8Encoding;
- }
- }
- private ParsingResult BuildRequestUriUsingRawPath(Encoding encoding)
- {
- Debug.Assert(encoding != null, "'encoding' must be assigned.");
- Debug.Assert(!string.IsNullOrEmpty(_rawPath), "'rawPath' must have at least one character.");
- _rawOctets = new List<byte>();
- _requestUriString = new StringBuilder();
- _requestUriString.Append(_cookedUriScheme);
- _requestUriString.Append(Uri.SchemeDelimiter);
- _requestUriString.Append(_cookedUriHost);
- ParsingResult result = ParseRawPath(encoding);
- if (result == ParsingResult.Success)
- {
- _requestUriString.Append(_cookedUriQuery);
- Debug.Assert(_rawOctets.Count == 0,
- "Still raw octets left. They must be added to the result path.");
- if (!Uri.TryCreate(_requestUriString.ToString(), UriKind.Absolute, out _requestUri))
- {
- // If we can't create a Uri from the string, this is an invalid string and it doesn't make
- // sense to try another encoding.
- result = ParsingResult.InvalidString;
- }
- }
- if (result != ParsingResult.Success)
- {
- //if (NetEventSource.IsEnabled)
- // NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_raw_path, _rawPath, encoding.EncodingName));
- }
- return result;
- }
- private ParsingResult ParseRawPath(Encoding encoding)
- {
- Debug.Assert(encoding != null, "'encoding' must be assigned.");
- int index = 0;
- char current = '\0';
- while (index < _rawPath.Length)
- {
- current = _rawPath[index];
- if (current == '%')
- {
- // Assert is enough, since http.sys accepted the request string already. This should never happen.
- Debug.Assert(index + 2 < _rawPath.Length, "Expected >=2 characters after '%' (e.g. %2F)");
- index++;
- current = _rawPath[index];
- if (current == 'u' || current == 'U')
- {
- // We found "%u" which means, we have a Unicode code point of the form "%uXXXX".
- Debug.Assert(index + 4 < _rawPath.Length, "Expected >=4 characters after '%u' (e.g. %u0062)");
- // Decode the content of rawOctets into percent encoded UTF-8 characters and append them
- // to requestUriString.
- if (!EmptyDecodeAndAppendRawOctetsList(encoding))
- {
- return ParsingResult.EncodingError;
- }
- if (!AppendUnicodeCodePointValuePercentEncoded(_rawPath.Substring(index + 1, 4)))
- {
- return ParsingResult.InvalidString;
- }
- index += 5;
- }
- else
- {
- // We found '%', but not followed by 'u', i.e. we have a percent encoded octed: %XX
- if (!AddPercentEncodedOctetToRawOctetsList(encoding, _rawPath.Substring(index, 2)))
- {
- return ParsingResult.InvalidString;
- }
- index += 2;
- }
- }
- else
- {
- // We found a non-'%' character: decode the content of rawOctets into percent encoded
- // UTF-8 characters and append it to the result.
- if (!EmptyDecodeAndAppendRawOctetsList(encoding))
- {
- return ParsingResult.EncodingError;
- }
- // Append the current character to the result.
- _requestUriString.Append(current);
- index++;
- }
- }
- // if the raw path ends with a sequence of percent encoded octets, make sure those get added to the
- // result (requestUriString).
- if (!EmptyDecodeAndAppendRawOctetsList(encoding))
- {
- return ParsingResult.EncodingError;
- }
- return ParsingResult.Success;
- }
- private bool AppendUnicodeCodePointValuePercentEncoded(string codePoint)
- {
- // http.sys only supports %uXXXX (4 hex-digits), even though unicode code points could have up to
- // 6 hex digits. Therefore we parse always 4 characters after %u and convert them to an int.
- if (!int.TryParse(codePoint, NumberStyles.HexNumber, null, out var codePointValue))
- {
- //if (NetEventSource.IsEnabled)
- // NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_percent_value, codePoint));
- return false;
- }
- string unicodeString = null;
- try
- {
- unicodeString = char.ConvertFromUtf32(codePointValue);
- AppendOctetsPercentEncoded(_requestUriString, s_utf8Encoding.GetBytes(unicodeString));
- return true;
- }
- catch (ArgumentOutOfRangeException)
- {
- //if (NetEventSource.IsEnabled)
- // NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_percent_value, codePoint));
- }
- catch (EncoderFallbackException)
- {
- // If utf8Encoding.GetBytes() fails
- //if (NetEventSource.IsEnabled) NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_to_utf8, unicodeString, e.Message));
- }
- return false;
- }
- private bool AddPercentEncodedOctetToRawOctetsList(Encoding encoding, string escapedCharacter)
- {
- if (!byte.TryParse(escapedCharacter, NumberStyles.HexNumber, null, out byte encodedValue))
- {
- //if (NetEventSource.IsEnabled) NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_percent_value, escapedCharacter));
- return false;
- }
- _rawOctets.Add(encodedValue);
- return true;
- }
- private bool EmptyDecodeAndAppendRawOctetsList(Encoding encoding)
- {
- if (_rawOctets.Count == 0)
- {
- return true;
- }
- string decodedString = null;
- try
- {
- // If the encoding can get a string out of the byte array, this is a valid string in the
- // 'encoding' encoding.
- decodedString = encoding.GetString(_rawOctets.ToArray());
- if (encoding == s_utf8Encoding)
- {
- AppendOctetsPercentEncoded(_requestUriString, _rawOctets.ToArray());
- }
- else
- {
- AppendOctetsPercentEncoded(_requestUriString, s_utf8Encoding.GetBytes(decodedString));
- }
- _rawOctets.Clear();
- return true;
- }
- catch (DecoderFallbackException)
- {
- //if (NetEventSource.IsEnabled) NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_bytes, GetOctetsAsString(_rawOctets), e.Message));
- }
- catch (EncoderFallbackException)
- {
- // If utf8Encoding.GetBytes() fails
- //if (NetEventSource.IsEnabled) NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_to_utf8, decodedString, e.Message));
- }
- return false;
- }
- private static void AppendOctetsPercentEncoded(StringBuilder target, IEnumerable<byte> octets)
- {
- foreach (byte octet in octets)
- {
- target.Append('%');
- target.Append(octet.ToString("X2", CultureInfo.InvariantCulture));
- }
- }
- private static string GetOctetsAsString(IEnumerable<byte> octets)
- {
- var octetString = new StringBuilder();
- bool first = true;
- foreach (byte octet in octets)
- {
- if (first)
- {
- first = false;
- }
- else
- {
- octetString.Append(' ');
- }
- octetString.Append(octet.ToString("X2", CultureInfo.InvariantCulture));
- }
- return octetString.ToString();
- }
- private static string GetPath(string uriString)
- {
- Debug.Assert(uriString != null, "uriString must not be null");
- Debug.Assert(uriString.Length > 0, "uriString must not be empty");
- int pathStartIndex = 0;
- // Perf. improvement: nearly all strings are relative Uris. So just look if the
- // string starts with '/'. If so, we have a relative Uri and the path starts at position 0.
- // (http.sys already trimmed leading whitespaces)
- if (uriString[0] != '/')
- {
- // We can't check against cookedUriScheme, since http.sys allows for request http://myserver/ to
- // use a request line 'GET https://myserver/' (note http vs. https). Therefore check if the
- // Uri starts with either http:// or https://.
- int authorityStartIndex = 0;
- if (uriString.StartsWith("http://", StringComparison.OrdinalIgnoreCase))
- {
- authorityStartIndex = 7;
- }
- else if (uriString.StartsWith("https://", StringComparison.OrdinalIgnoreCase))
- {
- authorityStartIndex = 8;
- }
- if (authorityStartIndex > 0)
- {
- // we have an absolute Uri. Find out where the authority ends and the path begins.
- // Note that Uris like "http://server?query=value/1/2" are invalid according to RFC2616
- // and http.sys behavior: If the Uri contains a query, there must be at least one '/'
- // between the authority and the '?' character: It's safe to just look for the first
- // '/' after the authority to determine the beginning of the path.
- pathStartIndex = uriString.IndexOf('/', authorityStartIndex);
- if (pathStartIndex == -1)
- {
- // e.g. for request lines like: 'GET http://myserver' (no final '/')
- pathStartIndex = uriString.Length;
- }
- }
- else
- {
- // RFC2616: Request-URI = "*" | absoluteURI | abs_path | authority
- // 'authority' can only be used with CONNECT which is never received by HttpListener.
- // I.e. if we don't have an absolute path (must start with '/') and we don't have
- // an absolute Uri (must start with http:// or https://), then 'uriString' must be '*'.
- Debug.Assert((uriString.Length == 1) && (uriString[0] == '*'), "Unknown request Uri string format",
- "Request Uri string is not an absolute Uri, absolute path, or '*': {0}", uriString);
- // Should we ever get here, be consistent with 2.0/3.5 behavior: just add an initial
- // slash to the string and treat it as a path:
- uriString = "/" + uriString;
- }
- }
- // Find end of path: The path is terminated by
- // - the first '?' character
- // - the first '#' character: This is never the case here, since http.sys won't accept
- // Uris containing fragments. Also, RFC2616 doesn't allow fragments in request Uris.
- // - end of Uri string
- int queryIndex = uriString.IndexOf('?');
- if (queryIndex == -1)
- {
- queryIndex = uriString.Length;
- }
- // will always return a != null string.
- return AddSlashToAsteriskOnlyPath(uriString.Substring(pathStartIndex, queryIndex - pathStartIndex));
- }
- private static string AddSlashToAsteriskOnlyPath(string path)
- {
- Debug.Assert(path != null, "'path' must not be null");
- // If a request like "OPTIONS * HTTP/1.1" is sent to the listener, then the request Uri
- // should be "http[s]://server[:port]/*" to be compatible with pre-4.0 behavior.
- if ((path.Length == 1) && (path[0] == '*'))
- {
- return "/*";
- }
- return path;
- }
- private enum ParsingResult
- {
- Success,
- InvalidString,
- EncodingError
- }
- private enum EncodingType
- {
- Primary,
- Secondary
- }
- }
- }
|