trivialjson.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. """trivialjson (https://github.com/phihag/trivialjson)"""
  2. import re
  3. def loads(s):
  4. s = s.decode('UTF-8')
  5. def raiseError(msg, i):
  6. raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
  7. def skipSpace(i, expectMore=True):
  8. while i < len(s) and s[i] in ' \t\r\n':
  9. i += 1
  10. if expectMore:
  11. if i >= len(s):
  12. raiseError('Premature end', i)
  13. return i
  14. def decodeEscape(match):
  15. esc = match.group(1)
  16. _STATIC = {
  17. '"': '"',
  18. '\\': '\\',
  19. '/': '/',
  20. 'b': unichr(0x8),
  21. 'f': unichr(0xc),
  22. 'n': '\n',
  23. 'r': '\r',
  24. 't': '\t',
  25. }
  26. if esc in _STATIC:
  27. return _STATIC[esc]
  28. if esc[0] == 'u':
  29. if len(esc) == 1+4:
  30. return unichr(int(esc[1:5], 16))
  31. if len(esc) == 5+6 and esc[5:7] == '\\u':
  32. hi = int(esc[1:5], 16)
  33. low = int(esc[7:11], 16)
  34. return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
  35. raise ValueError('Unknown escape ' + str(esc))
  36. def parseString(i):
  37. i += 1
  38. e = i
  39. while True:
  40. e = s.index('"', e)
  41. bslashes = 0
  42. while s[e-bslashes-1] == '\\':
  43. bslashes += 1
  44. if bslashes % 2 == 1:
  45. e += 1
  46. continue
  47. break
  48. rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)')
  49. stri = rexp.sub(decodeEscape, s[i:e])
  50. return (e+1,stri)
  51. def parseObj(i):
  52. i += 1
  53. res = {}
  54. i = skipSpace(i)
  55. if s[i] == '}': # Empty dictionary
  56. return (i+1,res)
  57. while True:
  58. if s[i] != '"':
  59. raiseError('Expected a string object key', i)
  60. i,key = parseString(i)
  61. i = skipSpace(i)
  62. if i >= len(s) or s[i] != ':':
  63. raiseError('Expected a colon', i)
  64. i,val = parse(i+1)
  65. res[key] = val
  66. i = skipSpace(i)
  67. if s[i] == '}':
  68. return (i+1, res)
  69. if s[i] != ',':
  70. raiseError('Expected comma or closing curly brace', i)
  71. i = skipSpace(i+1)
  72. def parseArray(i):
  73. res = []
  74. i = skipSpace(i+1)
  75. if s[i] == ']': # Empty array
  76. return (i+1,res)
  77. while True:
  78. i,val = parse(i)
  79. res.append(val)
  80. i = skipSpace(i) # Raise exception if premature end
  81. if s[i] == ']':
  82. return (i+1, res)
  83. if s[i] != ',':
  84. raiseError('Expected a comma or closing bracket', i)
  85. i = skipSpace(i+1)
  86. def parseDiscrete(i):
  87. for k,v in {'true': True, 'false': False, 'null': None}.items():
  88. if s.startswith(k, i):
  89. return (i+len(k), v)
  90. raiseError('Not a boolean (or null)', i)
  91. def parseNumber(i):
  92. mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:])
  93. if mobj is None:
  94. raiseError('Not a number', i)
  95. nums = mobj.group(1)
  96. if '.' in nums or 'e' in nums or 'E' in nums:
  97. return (i+len(nums), float(nums))
  98. return (i+len(nums), int(nums))
  99. CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
  100. def parse(i):
  101. i = skipSpace(i)
  102. i,res = CHARMAP.get(s[i], parseNumber)(i)
  103. i = skipSpace(i, False)
  104. return (i,res)
  105. i,res = parse(0)
  106. if i < len(s):
  107. raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
  108. return res