WebHeaderEncoding.cs 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using System.Threading.Tasks;
  6. namespace SocketHttpListener.Net
  7. {
  8. // we use this static class as a helper class to encode/decode HTTP headers.
  9. // what we need is a 1-1 correspondence between a char in the range U+0000-U+00FF
  10. // and a byte in the range 0x00-0xFF (which is the range that can hit the network).
  11. // The Latin-1 encoding (ISO-88591-1) (GetEncoding(28591)) works for byte[] to string, but is a little slow.
  12. // It doesn't work for string -> byte[] because of best-fit-mapping problems.
  13. internal static class WebHeaderEncoding
  14. {
  15. // We don't want '?' replacement characters, just fail.
  16. private static readonly Encoding s_utf8Decoder = Encoding.GetEncoding("utf-8", EncoderFallback.ExceptionFallback, DecoderFallback.ExceptionFallback);
  17. internal static unsafe string GetString(byte[] bytes, int byteIndex, int byteCount)
  18. {
  19. fixed (byte* pBytes = bytes)
  20. return GetString(pBytes + byteIndex, byteCount);
  21. }
  22. internal static unsafe string GetString(byte* pBytes, int byteCount)
  23. {
  24. if (byteCount < 1)
  25. return "";
  26. string s = new string('\0', byteCount);
  27. fixed (char* pStr = s)
  28. {
  29. char* pString = pStr;
  30. while (byteCount >= 8)
  31. {
  32. pString[0] = (char)pBytes[0];
  33. pString[1] = (char)pBytes[1];
  34. pString[2] = (char)pBytes[2];
  35. pString[3] = (char)pBytes[3];
  36. pString[4] = (char)pBytes[4];
  37. pString[5] = (char)pBytes[5];
  38. pString[6] = (char)pBytes[6];
  39. pString[7] = (char)pBytes[7];
  40. pString += 8;
  41. pBytes += 8;
  42. byteCount -= 8;
  43. }
  44. for (int i = 0; i < byteCount; i++)
  45. {
  46. pString[i] = (char)pBytes[i];
  47. }
  48. }
  49. return s;
  50. }
  51. internal static int GetByteCount(string myString)
  52. {
  53. return myString.Length;
  54. }
  55. internal static unsafe void GetBytes(string myString, int charIndex, int charCount, byte[] bytes, int byteIndex)
  56. {
  57. if (myString.Length == 0)
  58. {
  59. return;
  60. }
  61. fixed (byte* bufferPointer = bytes)
  62. {
  63. byte* newBufferPointer = bufferPointer + byteIndex;
  64. int finalIndex = charIndex + charCount;
  65. while (charIndex < finalIndex)
  66. {
  67. *newBufferPointer++ = (byte)myString[charIndex++];
  68. }
  69. }
  70. }
  71. internal static unsafe byte[] GetBytes(string myString)
  72. {
  73. byte[] bytes = new byte[myString.Length];
  74. if (myString.Length != 0)
  75. {
  76. GetBytes(myString, 0, myString.Length, bytes, 0);
  77. }
  78. return bytes;
  79. }
  80. // The normal client header parser just casts bytes to chars (see GetString).
  81. // Check if those bytes were actually utf-8 instead of ASCII.
  82. // If not, just return the input value.
  83. internal static string DecodeUtf8FromString(string input)
  84. {
  85. if (string.IsNullOrWhiteSpace(input))
  86. {
  87. return input;
  88. }
  89. bool possibleUtf8 = false;
  90. for (int i = 0; i < input.Length; i++)
  91. {
  92. if (input[i] > (char)255)
  93. {
  94. return input; // This couldn't have come from the wire, someone assigned it directly.
  95. }
  96. else if (input[i] > (char)127)
  97. {
  98. possibleUtf8 = true;
  99. break;
  100. }
  101. }
  102. if (possibleUtf8)
  103. {
  104. byte[] rawBytes = new byte[input.Length];
  105. for (int i = 0; i < input.Length; i++)
  106. {
  107. if (input[i] > (char)255)
  108. {
  109. return input; // This couldn't have come from the wire, someone assigned it directly.
  110. }
  111. rawBytes[i] = (byte)input[i];
  112. }
  113. try
  114. {
  115. return s_utf8Decoder.GetString(rawBytes);
  116. }
  117. catch (ArgumentException) { } // Not actually Utf-8
  118. }
  119. return input;
  120. }
  121. }
  122. }