json_util.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. # Copyright 2009-2015 MongoDB, Inc.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """Tools for using Python's :mod:`json` module with BSON documents.
  15. This module provides two helper methods `dumps` and `loads` that wrap the
  16. native :mod:`json` methods and provide explicit BSON conversion to and from
  17. json. This allows for specialized encoding and decoding of BSON documents
  18. into `Mongo Extended JSON
  19. <http://www.mongodb.org/display/DOCS/Mongo+Extended+JSON>`_'s *Strict*
  20. mode. This lets you encode / decode BSON documents to JSON even when
  21. they use special BSON types.
  22. Example usage (serialization):
  23. .. doctest::
  24. >>> from bson import Binary, Code
  25. >>> from bson.json_util import dumps
  26. >>> dumps([{'foo': [1, 2]},
  27. ... {'bar': {'hello': 'world'}},
  28. ... {'code': Code("function x() { return 1; }")},
  29. ... {'bin': Binary("\x01\x02\x03\x04")}])
  30. '[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$code": "function x() { return 1; }", "$scope": {}}}, {"bin": {"$binary": "AQIDBA==", "$type": "00"}}]'
  31. Example usage (deserialization):
  32. .. doctest::
  33. >>> from bson.json_util import loads
  34. >>> loads('[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$scope": {}, "$code": "function x() { return 1; }"}}, {"bin": {"$type": "00", "$binary": "AQIDBA=="}}]')
  35. [{u'foo': [1, 2]}, {u'bar': {u'hello': u'world'}}, {u'code': Code('function x() { return 1; }', {})}, {u'bin': Binary('...', 0)}]
  36. Alternatively, you can manually pass the `default` to :func:`json.dumps`.
  37. It won't handle :class:`~bson.binary.Binary` and :class:`~bson.code.Code`
  38. instances (as they are extended strings you can't provide custom defaults),
  39. but it will be faster as there is less recursion.
  40. .. versionchanged:: 2.8
  41. The output format for :class:`~bson.timestamp.Timestamp` has changed from
  42. '{"t": <int>, "i": <int>}' to '{"$timestamp": {"t": <int>, "i": <int>}}'.
  43. This new format will be decoded to an instance of
  44. :class:`~bson.timestamp.Timestamp`. The old format will continue to be
  45. decoded to a python dict as before. Encoding to the old format is no longer
  46. supported as it was never correct and loses type information.
  47. Added support for $numberLong and $undefined - new in MongoDB 2.6 - and
  48. parsing $date in ISO-8601 format.
  49. .. versionchanged:: 2.7
  50. Preserves order when rendering SON, Timestamp, Code, Binary, and DBRef
  51. instances.
  52. .. versionchanged:: 2.3
  53. Added dumps and loads helpers to automatically handle conversion to and
  54. from json and supports :class:`~bson.binary.Binary` and
  55. :class:`~bson.code.Code`
  56. """
  57. import base64
  58. import calendar
  59. import collections
  60. import datetime
  61. import json
  62. import re
  63. import uuid
  64. from bson import EPOCH_AWARE, RE_TYPE, SON
  65. from bson.binary import Binary
  66. from bson.code import Code
  67. from bson.dbref import DBRef
  68. from bson.int64 import Int64
  69. from bson.max_key import MaxKey
  70. from bson.min_key import MinKey
  71. from bson.objectid import ObjectId
  72. from bson.regex import Regex
  73. from bson.timestamp import Timestamp
  74. from bson.tz_util import utc
  75. from bson.py3compat import PY3, iteritems, string_type, text_type
  76. _RE_OPT_TABLE = {
  77. "i": re.I,
  78. "l": re.L,
  79. "m": re.M,
  80. "s": re.S,
  81. "u": re.U,
  82. "x": re.X,
  83. }
  84. def dumps(obj, *args, **kwargs):
  85. """Helper function that wraps :class:`json.dumps`.
  86. Recursive function that handles all BSON types including
  87. :class:`~bson.binary.Binary` and :class:`~bson.code.Code`.
  88. .. versionchanged:: 2.7
  89. Preserves order when rendering SON, Timestamp, Code, Binary, and DBRef
  90. instances.
  91. """
  92. return json.dumps(_json_convert(obj), *args, **kwargs)
  93. def loads(s, *args, **kwargs):
  94. """Helper function that wraps :class:`json.loads`.
  95. Automatically passes the object_hook for BSON type conversion.
  96. """
  97. kwargs['object_hook'] = lambda dct: object_hook(dct)
  98. return json.loads(s, *args, **kwargs)
  99. def _json_convert(obj):
  100. """Recursive helper method that converts BSON types so they can be
  101. converted into json.
  102. """
  103. if hasattr(obj, 'iteritems') or hasattr(obj, 'items'): # PY3 support
  104. return SON(((k, _json_convert(v)) for k, v in iteritems(obj)))
  105. elif hasattr(obj, '__iter__') and not isinstance(obj, (text_type, bytes)):
  106. return list((_json_convert(v) for v in obj))
  107. try:
  108. return default(obj)
  109. except TypeError:
  110. return obj
  111. def object_hook(dct):
  112. if "$oid" in dct:
  113. return ObjectId(str(dct["$oid"]))
  114. if "$ref" in dct:
  115. return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None))
  116. if "$date" in dct:
  117. dtm = dct["$date"]
  118. # mongoexport 2.6 and newer
  119. if isinstance(dtm, string_type):
  120. aware = datetime.datetime.strptime(
  121. dtm[:23], "%Y-%m-%dT%H:%M:%S.%f").replace(tzinfo=utc)
  122. offset = dtm[23:]
  123. if not offset or offset == 'Z':
  124. # UTC
  125. return aware
  126. else:
  127. if len(offset) == 5:
  128. # Offset from mongoexport is in format (+|-)HHMM
  129. secs = (int(offset[1:3]) * 3600 + int(offset[3:]) * 60)
  130. elif ':' in offset and len(offset) == 6:
  131. # RFC-3339 format (+|-)HH:MM
  132. hours, minutes = offset[1:].split(':')
  133. secs = (int(hours) * 3600 + int(minutes) * 60)
  134. else:
  135. # Not RFC-3339 compliant or mongoexport output.
  136. raise ValueError("invalid format for offset")
  137. if offset[0] == "-":
  138. secs *= -1
  139. return aware - datetime.timedelta(seconds=secs)
  140. # mongoexport 2.6 and newer, time before the epoch (SERVER-15275)
  141. elif isinstance(dtm, collections.Mapping):
  142. secs = float(dtm["$numberLong"]) / 1000.0
  143. # mongoexport before 2.6
  144. else:
  145. secs = float(dtm) / 1000.0
  146. return EPOCH_AWARE + datetime.timedelta(seconds=secs)
  147. if "$regex" in dct:
  148. flags = 0
  149. # PyMongo always adds $options but some other tools may not.
  150. for opt in dct.get("$options", ""):
  151. flags |= _RE_OPT_TABLE.get(opt, 0)
  152. return Regex(dct["$regex"], flags)
  153. if "$minKey" in dct:
  154. return MinKey()
  155. if "$maxKey" in dct:
  156. return MaxKey()
  157. if "$binary" in dct:
  158. if isinstance(dct["$type"], int):
  159. dct["$type"] = "%02x" % dct["$type"]
  160. subtype = int(dct["$type"], 16)
  161. if subtype >= 0xffffff80: # Handle mongoexport values
  162. subtype = int(dct["$type"][6:], 16)
  163. return Binary(base64.b64decode(dct["$binary"].encode()), subtype)
  164. if "$code" in dct:
  165. return Code(dct["$code"], dct.get("$scope"))
  166. if "$uuid" in dct:
  167. return uuid.UUID(dct["$uuid"])
  168. if "$undefined" in dct:
  169. return None
  170. if "$numberLong" in dct:
  171. return Int64(dct["$numberLong"])
  172. if "$timestamp" in dct:
  173. tsp = dct["$timestamp"]
  174. return Timestamp(tsp["t"], tsp["i"])
  175. return dct
  176. def default(obj):
  177. # We preserve key order when rendering SON, DBRef, etc. as JSON by
  178. # returning a SON for those types instead of a dict.
  179. if isinstance(obj, ObjectId):
  180. return {"$oid": str(obj)}
  181. if isinstance(obj, DBRef):
  182. return _json_convert(obj.as_doc())
  183. if isinstance(obj, datetime.datetime):
  184. # TODO share this code w/ bson.py?
  185. if obj.utcoffset() is not None:
  186. obj = obj - obj.utcoffset()
  187. millis = int(calendar.timegm(obj.timetuple()) * 1000 +
  188. obj.microsecond / 1000)
  189. return {"$date": millis}
  190. if isinstance(obj, (RE_TYPE, Regex)):
  191. flags = ""
  192. if obj.flags & re.IGNORECASE:
  193. flags += "i"
  194. if obj.flags & re.LOCALE:
  195. flags += "l"
  196. if obj.flags & re.MULTILINE:
  197. flags += "m"
  198. if obj.flags & re.DOTALL:
  199. flags += "s"
  200. if obj.flags & re.UNICODE:
  201. flags += "u"
  202. if obj.flags & re.VERBOSE:
  203. flags += "x"
  204. if isinstance(obj.pattern, text_type):
  205. pattern = obj.pattern
  206. else:
  207. pattern = obj.pattern.decode('utf-8')
  208. return SON([("$regex", pattern), ("$options", flags)])
  209. if isinstance(obj, MinKey):
  210. return {"$minKey": 1}
  211. if isinstance(obj, MaxKey):
  212. return {"$maxKey": 1}
  213. if isinstance(obj, Timestamp):
  214. return {"$timestamp": SON([("t", obj.time), ("i", obj.inc)])}
  215. if isinstance(obj, Code):
  216. return SON([('$code', str(obj)), ('$scope', obj.scope)])
  217. if isinstance(obj, Binary):
  218. return SON([
  219. ('$binary', base64.b64encode(obj).decode()),
  220. ('$type', "%02x" % obj.subtype)])
  221. if PY3 and isinstance(obj, bytes):
  222. return SON([
  223. ('$binary', base64.b64encode(obj).decode()),
  224. ('$type', "00")])
  225. if isinstance(obj, uuid.UUID):
  226. return {"$uuid": obj.hex}
  227. raise TypeError("%r is not JSON serializable" % obj)