EscSM.cs 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. /* ***** BEGIN LICENSE BLOCK *****
  2. * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  3. *
  4. * The contents of this file are subject to the Mozilla Public License Version
  5. * 1.1 (the "License"); you may not use this file except in compliance with
  6. * the License. You may obtain a copy of the License at
  7. * http://www.mozilla.org/MPL/
  8. *
  9. * Software distributed under the License is distributed on an "AS IS" basis,
  10. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  11. * for the specific language governing rights and limitations under the
  12. * License.
  13. *
  14. * The Original Code is mozilla.org code.
  15. *
  16. * The Initial Developer of the Original Code is
  17. * Netscape Communications Corporation.
  18. * Portions created by the Initial Developer are Copyright (C) 1998
  19. * the Initial Developer. All Rights Reserved.
  20. *
  21. * Contributor(s):
  22. * Kohei TAKETA <k-tak@void.in> (Java port)
  23. * Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
  24. *
  25. * Alternatively, the contents of this file may be used under the terms of
  26. * either the GNU General Public License Version 2 or later (the "GPL"), or
  27. * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  28. * in which case the provisions of the GPL or the LGPL are applicable instead
  29. * of those above. If you wish to allow use of your version of this file only
  30. * under the terms of either the GPL or the LGPL, and not to allow others to
  31. * use your version of this file under the terms of the MPL, indicate your
  32. * decision by deleting the provisions above and replace them with the notice
  33. * and other provisions required by the GPL or the LGPL. If you do not delete
  34. * the provisions above, a recipient may use your version of this file under
  35. * the terms of any one of the MPL, the GPL or the LGPL.
  36. *
  37. * ***** END LICENSE BLOCK ***** */
  38. /// <summary>
  39. /// Escaped charsets state machines
  40. /// </summary>
  41. namespace UniversalDetector.Core
  42. {
  43. public class HZSMModel : SMModel
  44. {
  45. private readonly static int[] HZ_cls = {
  46. BitPackage.Pack4bits(1,0,0,0,0,0,0,0), // 00 - 07
  47. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 08 - 0f
  48. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
  49. BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f
  50. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27
  51. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 28 - 2f
  52. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
  53. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
  54. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 40 - 47
  55. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f
  56. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
  57. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
  58. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
  59. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
  60. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
  61. BitPackage.Pack4bits(0,0,0,4,0,5,2,0), // 78 - 7f
  62. BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 80 - 87
  63. BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 88 - 8f
  64. BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 90 - 97
  65. BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 98 - 9f
  66. BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // a0 - a7
  67. BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // a8 - af
  68. BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b0 - b7
  69. BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b8 - bf
  70. BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // c0 - c7
  71. BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // c8 - cf
  72. BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // d0 - d7
  73. BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // d8 - df
  74. BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // e0 - e7
  75. BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // e8 - ef
  76. BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // f0 - f7
  77. BitPackage.Pack4bits(1,1,1,1,1,1,1,1) // f8 - ff
  78. };
  79. private readonly static int[] HZ_st = {
  80. BitPackage.Pack4bits(START, ERROR, 3, START, START, START, ERROR, ERROR),//00-07
  81. BitPackage.Pack4bits(ERROR, ERROR, ERROR, ERROR, ITSME, ITSME, ITSME, ITSME),//08-0f
  82. BitPackage.Pack4bits(ITSME, ITSME, ERROR, ERROR, START, START, 4, ERROR),//10-17
  83. BitPackage.Pack4bits( 5, ERROR, 6, ERROR, 5, 5, 4, ERROR),//18-1f
  84. BitPackage.Pack4bits( 4, ERROR, 4, 4, 4, ERROR, 4, ERROR),//20-27
  85. BitPackage.Pack4bits( 4, ITSME, START, START, START, START, START, START) //28-2f
  86. };
  87. private readonly static int[] HZCharLenTable = {0, 0, 0, 0, 0, 0};
  88. public HZSMModel() : base(
  89. new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
  90. BitPackage.SHIFT_MASK_4BITS,
  91. BitPackage.BIT_SHIFT_4BITS,
  92. BitPackage.UNIT_MASK_4BITS, HZ_cls),
  93. 6,
  94. new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
  95. BitPackage.SHIFT_MASK_4BITS,
  96. BitPackage.BIT_SHIFT_4BITS,
  97. BitPackage.UNIT_MASK_4BITS, HZ_st),
  98. HZCharLenTable, "HZ-GB-2312")
  99. {
  100. }
  101. }
  102. public class ISO2022CNSMModel : SMModel
  103. {
  104. private readonly static int[] ISO2022CN_cls = {
  105. BitPackage.Pack4bits(2,0,0,0,0,0,0,0), // 00 - 07
  106. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 08 - 0f
  107. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
  108. BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f
  109. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27
  110. BitPackage.Pack4bits(0,3,0,0,0,0,0,0), // 28 - 2f
  111. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
  112. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
  113. BitPackage.Pack4bits(0,0,0,4,0,0,0,0), // 40 - 47
  114. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f
  115. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
  116. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
  117. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
  118. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
  119. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
  120. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f
  121. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 80 - 87
  122. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 88 - 8f
  123. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 90 - 97
  124. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 98 - 9f
  125. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7
  126. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af
  127. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
  128. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
  129. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
  130. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf
  131. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
  132. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
  133. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7
  134. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef
  135. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7
  136. BitPackage.Pack4bits(2,2,2,2,2,2,2,2) // f8 - ff
  137. };
  138. private readonly static int[] ISO2022CN_st = {
  139. BitPackage.Pack4bits(START, 3,ERROR,START,START,START,START,START),//00-07
  140. BitPackage.Pack4bits(START,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//08-0f
  141. BitPackage.Pack4bits(ERROR,ERROR,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME),//10-17
  142. BitPackage.Pack4bits(ITSME,ITSME,ITSME,ERROR,ERROR,ERROR, 4,ERROR),//18-1f
  143. BitPackage.Pack4bits(ERROR,ERROR,ERROR,ITSME,ERROR,ERROR,ERROR,ERROR),//20-27
  144. BitPackage.Pack4bits( 5, 6,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//28-2f
  145. BitPackage.Pack4bits(ERROR,ERROR,ERROR,ITSME,ERROR,ERROR,ERROR,ERROR),//30-37
  146. BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ITSME,ERROR,START) //38-3f
  147. };
  148. private readonly static int[] ISO2022CNCharLenTable = {0, 0, 0, 0, 0, 0, 0, 0, 0};
  149. public ISO2022CNSMModel() : base(
  150. new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
  151. BitPackage.SHIFT_MASK_4BITS,
  152. BitPackage.BIT_SHIFT_4BITS,
  153. BitPackage.UNIT_MASK_4BITS, ISO2022CN_cls),
  154. 9,
  155. new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
  156. BitPackage.SHIFT_MASK_4BITS,
  157. BitPackage.BIT_SHIFT_4BITS,
  158. BitPackage.UNIT_MASK_4BITS, ISO2022CN_st),
  159. ISO2022CNCharLenTable, "ISO-2022-CN")
  160. {
  161. }
  162. }
  163. public class ISO2022JPSMModel : SMModel
  164. {
  165. private readonly static int[] ISO2022JP_cls = {
  166. BitPackage.Pack4bits(2,0,0,0,0,0,0,0), // 00 - 07
  167. BitPackage.Pack4bits(0,0,0,0,0,0,2,2), // 08 - 0f
  168. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
  169. BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f
  170. BitPackage.Pack4bits(0,0,0,0,7,0,0,0), // 20 - 27
  171. BitPackage.Pack4bits(3,0,0,0,0,0,0,0), // 28 - 2f
  172. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
  173. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
  174. BitPackage.Pack4bits(6,0,4,0,8,0,0,0), // 40 - 47
  175. BitPackage.Pack4bits(0,9,5,0,0,0,0,0), // 48 - 4f
  176. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
  177. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
  178. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
  179. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
  180. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
  181. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f
  182. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 80 - 87
  183. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 88 - 8f
  184. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 90 - 97
  185. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 98 - 9f
  186. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7
  187. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af
  188. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
  189. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
  190. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
  191. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf
  192. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
  193. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
  194. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7
  195. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef
  196. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7
  197. BitPackage.Pack4bits(2,2,2,2,2,2,2,2) // f8 - ff
  198. };
  199. private readonly static int[] ISO2022JP_st = {
  200. BitPackage.Pack4bits(START, 3, ERROR,START,START,START,START,START),//00-07
  201. BitPackage.Pack4bits(START, START, ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//08-0f
  202. BitPackage.Pack4bits(ERROR, ERROR, ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//10-17
  203. BitPackage.Pack4bits(ITSME, ITSME, ITSME,ITSME,ITSME,ITSME,ERROR,ERROR),//18-1f
  204. BitPackage.Pack4bits(ERROR, 5, ERROR,ERROR,ERROR, 4,ERROR,ERROR),//20-27
  205. BitPackage.Pack4bits(ERROR, ERROR, ERROR, 6,ITSME,ERROR,ITSME,ERROR),//28-2f
  206. BitPackage.Pack4bits(ERROR, ERROR, ERROR,ERROR,ERROR,ERROR,ITSME,ITSME),//30-37
  207. BitPackage.Pack4bits(ERROR, ERROR, ERROR,ITSME,ERROR,ERROR,ERROR,ERROR),//38-3f
  208. BitPackage.Pack4bits(ERROR, ERROR, ERROR,ERROR,ITSME,ERROR,START,START) //40-47
  209. };
  210. private readonly static int[] ISO2022JPCharLenTable = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  211. public ISO2022JPSMModel() : base(
  212. new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
  213. BitPackage.SHIFT_MASK_4BITS,
  214. BitPackage.BIT_SHIFT_4BITS,
  215. BitPackage.UNIT_MASK_4BITS, ISO2022JP_cls),
  216. 10,
  217. new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
  218. BitPackage.SHIFT_MASK_4BITS,
  219. BitPackage.BIT_SHIFT_4BITS,
  220. BitPackage.UNIT_MASK_4BITS, ISO2022JP_st),
  221. ISO2022JPCharLenTable, "ISO-2022-JP")
  222. {
  223. }
  224. }
  225. public class ISO2022KRSMModel : SMModel
  226. {
  227. private readonly static int[] ISO2022KR_cls = {
  228. BitPackage.Pack4bits(2,0,0,0,0,0,0,0), // 00 - 07
  229. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 08 - 0f
  230. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
  231. BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f
  232. BitPackage.Pack4bits(0,0,0,0,3,0,0,0), // 20 - 27
  233. BitPackage.Pack4bits(0,4,0,0,0,0,0,0), // 28 - 2f
  234. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
  235. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
  236. BitPackage.Pack4bits(0,0,0,5,0,0,0,0), // 40 - 47
  237. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f
  238. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
  239. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
  240. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
  241. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
  242. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
  243. BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f
  244. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 80 - 87
  245. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 88 - 8f
  246. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 90 - 97
  247. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 98 - 9f
  248. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7
  249. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af
  250. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
  251. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
  252. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
  253. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf
  254. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
  255. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
  256. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7
  257. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef
  258. BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7
  259. BitPackage.Pack4bits(2,2,2,2,2,2,2,2) // f8 - ff
  260. };
  261. private readonly static int[] ISO2022KR_st = {
  262. BitPackage.Pack4bits(START, 3,ERROR,START,START,START,ERROR,ERROR),//00-07
  263. BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f
  264. BitPackage.Pack4bits(ITSME,ITSME,ERROR,ERROR,ERROR, 4,ERROR,ERROR),//10-17
  265. BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR, 5,ERROR,ERROR,ERROR),//18-1f
  266. BitPackage.Pack4bits(ERROR,ERROR,ERROR,ITSME,START,START,START,START) //20-27
  267. };
  268. private readonly static int[] ISO2022KRCharLenTable = {0, 0, 0, 0, 0, 0};
  269. public ISO2022KRSMModel() : base(
  270. new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
  271. BitPackage.SHIFT_MASK_4BITS,
  272. BitPackage.BIT_SHIFT_4BITS,
  273. BitPackage.UNIT_MASK_4BITS, ISO2022KR_cls),
  274. 6,
  275. new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
  276. BitPackage.SHIFT_MASK_4BITS,
  277. BitPackage.BIT_SHIFT_4BITS,
  278. BitPackage.UNIT_MASK_4BITS, ISO2022KR_st),
  279. ISO2022KRCharLenTable, "ISO-2022-KR")
  280. {
  281. }
  282. }
  283. }