son.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. # Copyright 2009-2015 MongoDB, Inc.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """Tools for creating and manipulating SON, the Serialized Ocument Notation.
  15. Regular dictionaries can be used instead of SON objects, but not when the order
  16. of keys is important. A SON object can be used just like a normal Python
  17. dictionary."""
  18. import collections
  19. import copy
  20. import re
  21. from bson.py3compat import iteritems
  22. # This sort of sucks, but seems to be as good as it gets...
  23. # This is essentially the same as re._pattern_type
  24. RE_TYPE = type(re.compile(""))
  25. class SON(dict):
  26. """SON data.
  27. A subclass of dict that maintains ordering of keys and provides a
  28. few extra niceties for dealing with SON. SON objects can be
  29. converted to and from BSON.
  30. The mapping from Python types to BSON types is as follows:
  31. ======================================= ============= ===================
  32. Python Type BSON Type Supported Direction
  33. ======================================= ============= ===================
  34. None null both
  35. bool boolean both
  36. int [#int]_ int32 / int64 py -> bson
  37. long int64 py -> bson
  38. `bson.int64.Int64` int64 both
  39. float number (real) both
  40. string string py -> bson
  41. unicode string both
  42. list array both
  43. dict / `SON` object both
  44. datetime.datetime [#dt]_ [#dt2]_ date both
  45. `bson.regex.Regex` regex both
  46. compiled re [#re]_ regex py -> bson
  47. `bson.binary.Binary` binary both
  48. `bson.objectid.ObjectId` oid both
  49. `bson.dbref.DBRef` dbref both
  50. None undefined bson -> py
  51. unicode code bson -> py
  52. `bson.code.Code` code py -> bson
  53. unicode symbol bson -> py
  54. bytes (Python 3) [#bytes]_ binary both
  55. ======================================= ============= ===================
  56. Note that to save binary data it must be wrapped as an instance of
  57. `bson.binary.Binary`. Otherwise it will be saved as a BSON string
  58. and retrieved as unicode.
  59. .. [#int] A Python int will be saved as a BSON int32 or BSON int64 depending
  60. on its size. A BSON int32 will always decode to a Python int. A BSON
  61. int64 will always decode to a :class:`~bson.int64.Int64`.
  62. .. [#dt] datetime.datetime instances will be rounded to the nearest
  63. millisecond when saved
  64. .. [#dt2] all datetime.datetime instances are treated as *naive*. clients
  65. should always use UTC.
  66. .. [#re] :class:`~bson.regex.Regex` instances and regular expression
  67. objects from ``re.compile()`` are both saved as BSON regular expressions.
  68. BSON regular expressions are decoded as :class:`~bson.regex.Regex`
  69. instances.
  70. .. [#bytes] The bytes type from Python 3.x is encoded as BSON binary with
  71. subtype 0. In Python 3.x it will be decoded back to bytes. In Python 2.x
  72. it will be decoded to an instance of :class:`~bson.binary.Binary` with
  73. subtype 0.
  74. """
  75. def __init__(self, data=None, **kwargs):
  76. self.__keys = []
  77. dict.__init__(self)
  78. self.update(data)
  79. self.update(kwargs)
  80. def __new__(cls, *args, **kwargs):
  81. instance = super(SON, cls).__new__(cls, *args, **kwargs)
  82. instance.__keys = []
  83. return instance
  84. def __repr__(self):
  85. result = []
  86. for key in self.__keys:
  87. result.append("(%r, %r)" % (key, self[key]))
  88. return "SON([%s])" % ", ".join(result)
  89. def __setitem__(self, key, value):
  90. if key not in self.__keys:
  91. self.__keys.append(key)
  92. dict.__setitem__(self, key, value)
  93. def __delitem__(self, key):
  94. self.__keys.remove(key)
  95. dict.__delitem__(self, key)
  96. def keys(self):
  97. return list(self.__keys)
  98. def copy(self):
  99. other = SON()
  100. other.update(self)
  101. return other
  102. # TODO this is all from UserDict.DictMixin. it could probably be made more
  103. # efficient.
  104. # second level definitions support higher levels
  105. def __iter__(self):
  106. for k in self.__keys:
  107. yield k
  108. def has_key(self, key):
  109. return key in self.__keys
  110. # third level takes advantage of second level definitions
  111. def iteritems(self):
  112. for k in self:
  113. yield (k, self[k])
  114. def iterkeys(self):
  115. return self.__iter__()
  116. # fourth level uses definitions from lower levels
  117. def itervalues(self):
  118. for _, v in self.iteritems():
  119. yield v
  120. def values(self):
  121. return [v for _, v in self.iteritems()]
  122. def items(self):
  123. return [(key, self[key]) for key in self]
  124. def clear(self):
  125. self.__keys = []
  126. super(SON, self).clear()
  127. def setdefault(self, key, default=None):
  128. try:
  129. return self[key]
  130. except KeyError:
  131. self[key] = default
  132. return default
  133. def pop(self, key, *args):
  134. if len(args) > 1:
  135. raise TypeError("pop expected at most 2 arguments, got "\
  136. + repr(1 + len(args)))
  137. try:
  138. value = self[key]
  139. except KeyError:
  140. if args:
  141. return args[0]
  142. raise
  143. del self[key]
  144. return value
  145. def popitem(self):
  146. try:
  147. k, v = next(self.iteritems())
  148. except StopIteration:
  149. raise KeyError('container is empty')
  150. del self[k]
  151. return (k, v)
  152. def update(self, other=None, **kwargs):
  153. # Make progressively weaker assumptions about "other"
  154. if other is None:
  155. pass
  156. elif hasattr(other, 'iteritems'): # iteritems saves memory and lookups
  157. for k, v in other.iteritems():
  158. self[k] = v
  159. elif hasattr(other, 'keys'):
  160. for k in other.keys():
  161. self[k] = other[k]
  162. else:
  163. for k, v in other:
  164. self[k] = v
  165. if kwargs:
  166. self.update(kwargs)
  167. def get(self, key, default=None):
  168. try:
  169. return self[key]
  170. except KeyError:
  171. return default
  172. def __eq__(self, other):
  173. """Comparison to another SON is order-sensitive while comparison to a
  174. regular dictionary is order-insensitive.
  175. """
  176. if isinstance(other, SON):
  177. return len(self) == len(other) and self.items() == other.items()
  178. return self.to_dict() == other
  179. def __ne__(self, other):
  180. return not self == other
  181. def __len__(self):
  182. return len(self.__keys)
  183. def to_dict(self):
  184. """Convert a SON document to a normal Python dictionary instance.
  185. This is trickier than just *dict(...)* because it needs to be
  186. recursive.
  187. """
  188. def transform_value(value):
  189. if isinstance(value, list):
  190. return [transform_value(v) for v in value]
  191. elif isinstance(value, collections.Mapping):
  192. return dict([
  193. (k, transform_value(v))
  194. for k, v in iteritems(value)])
  195. else:
  196. return value
  197. return transform_value(dict(self))
  198. def __deepcopy__(self, memo):
  199. out = SON()
  200. val_id = id(self)
  201. if val_id in memo:
  202. return memo.get(val_id)
  203. memo[val_id] = out
  204. for k, v in self.iteritems():
  205. if not isinstance(v, RE_TYPE):
  206. v = copy.deepcopy(v, memo)
  207. out[k] = v
  208. return out