uri_parser.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. # Copyright 2011-2015 MongoDB, Inc.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you
  4. # may not use this file except in compliance with the License. You
  5. # may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
  12. # implied. See the License for the specific language governing
  13. # permissions and limitations under the License.
  14. """Tools to parse and validate a MongoDB URI."""
  15. from bson.py3compat import PY3, iteritems, string_type
  16. if PY3:
  17. from urllib.parse import unquote_plus
  18. else:
  19. from urllib import unquote_plus
  20. from pymongo.common import validate as _validate
  21. from pymongo.errors import ConfigurationError, InvalidURI
  22. SCHEME = 'mongodb://'
  23. SCHEME_LEN = len(SCHEME)
  24. DEFAULT_PORT = 27017
  25. def _partition(entity, sep):
  26. """Python2.4 doesn't have a partition method so we provide
  27. our own that mimics str.partition from later releases.
  28. Split the string at the first occurrence of sep, and return a
  29. 3-tuple containing the part before the separator, the separator
  30. itself, and the part after the separator. If the separator is not
  31. found, return a 3-tuple containing the string itself, followed
  32. by two empty strings.
  33. """
  34. parts = entity.split(sep, 1)
  35. if len(parts) == 2:
  36. return parts[0], sep, parts[1]
  37. else:
  38. return entity, '', ''
  39. def _rpartition(entity, sep):
  40. """Python2.4 doesn't have an rpartition method so we provide
  41. our own that mimics str.rpartition from later releases.
  42. Split the string at the last occurrence of sep, and return a
  43. 3-tuple containing the part before the separator, the separator
  44. itself, and the part after the separator. If the separator is not
  45. found, return a 3-tuple containing two empty strings, followed
  46. by the string itself.
  47. """
  48. idx = entity.rfind(sep)
  49. if idx == -1:
  50. return '', '', entity
  51. return entity[:idx], sep, entity[idx + 1:]
  52. def parse_userinfo(userinfo):
  53. """Validates the format of user information in a MongoDB URI.
  54. Reserved characters like ':', '/', '+' and '@' must be escaped
  55. following RFC 2396.
  56. Returns a 2-tuple containing the unescaped username followed
  57. by the unescaped password.
  58. :Paramaters:
  59. - `userinfo`: A string of the form <username>:<password>
  60. .. versionchanged:: 2.2
  61. Now uses `urllib.unquote_plus` so `+` characters must be escaped.
  62. """
  63. if '@' in userinfo or userinfo.count(':') > 1:
  64. raise InvalidURI("':' or '@' characters in a username or password "
  65. "must be escaped according to RFC 2396.")
  66. user, _, passwd = _partition(userinfo, ":")
  67. # No password is expected with GSSAPI authentication.
  68. if not user:
  69. raise InvalidURI("The empty string is not valid username.")
  70. user = unquote_plus(user)
  71. passwd = unquote_plus(passwd)
  72. return user, passwd
  73. def parse_ipv6_literal_host(entity, default_port):
  74. """Validates an IPv6 literal host:port string.
  75. Returns a 2-tuple of IPv6 literal followed by port where
  76. port is default_port if it wasn't specified in entity.
  77. :Parameters:
  78. - `entity`: A string that represents an IPv6 literal enclosed
  79. in braces (e.g. '[::1]' or '[::1]:27017').
  80. - `default_port`: The port number to use when one wasn't
  81. specified in entity.
  82. """
  83. if entity.find(']') == -1:
  84. raise ValueError("an IPv6 address literal must be "
  85. "enclosed in '[' and ']' according "
  86. "to RFC 2732.")
  87. i = entity.find(']:')
  88. if i == -1:
  89. return entity[1:-1], default_port
  90. return entity[1: i], entity[i + 2:]
  91. def parse_host(entity, default_port=DEFAULT_PORT):
  92. """Validates a host string
  93. Returns a 2-tuple of host followed by port where port is default_port
  94. if it wasn't specified in the string.
  95. :Parameters:
  96. - `entity`: A host or host:port string where host could be a
  97. hostname or IP address.
  98. - `default_port`: The port number to use when one wasn't
  99. specified in entity.
  100. """
  101. host = entity
  102. port = default_port
  103. if entity[0] == '[':
  104. host, port = parse_ipv6_literal_host(entity, default_port)
  105. elif entity.find(':') != -1:
  106. if entity.count(':') > 1:
  107. raise ValueError("Reserved characters such as ':' must be "
  108. "escaped according RFC 2396. An IPv6 "
  109. "address literal must be enclosed in '[' "
  110. "and ']' according to RFC 2732.")
  111. host, port = host.split(':', 1)
  112. if isinstance(port, string_type):
  113. if not port.isdigit():
  114. raise ValueError("Port number must be an integer.")
  115. port = int(port)
  116. # Normalize hostname to lowercase, since DNS is case-insensitive:
  117. # http://tools.ietf.org/html/rfc4343
  118. # This prevents useless rediscovery if "foo.com" is in the seed list but
  119. # "FOO.com" is in the ismaster response.
  120. return host.lower(), port
  121. def validate_options(opts):
  122. """Validates and normalizes options passed in a MongoDB URI.
  123. Returns a new dictionary of validated and normalized options.
  124. :Parameters:
  125. - `opts`: A dict of MongoDB URI options.
  126. """
  127. return dict([_validate(opt, val) for opt, val in iteritems(opts)])
  128. def _parse_options(opts, delim):
  129. """Helper method for split_options which creates the options dict.
  130. Also handles the creation of a list for the URI tag_sets/
  131. readpreferencetags portion."""
  132. options = {}
  133. for opt in opts.split(delim):
  134. key, val = opt.split("=")
  135. if key.lower() == 'readpreferencetags':
  136. options.setdefault('readpreferencetags', []).append(val)
  137. else:
  138. # str(option) to ensure that a unicode URI results in plain 'str'
  139. # option names. 'normalized' is then suitable to be passed as
  140. # kwargs in all Python versions.
  141. options[str(key)] = val
  142. return options
  143. def split_options(opts, validate=True):
  144. """Takes the options portion of a MongoDB URI, validates each option
  145. and returns the options in a dictionary.
  146. :Parameters:
  147. - `opt`: A string representing MongoDB URI options.
  148. - `validate`: If ``True`` (the default), validate and normalize all
  149. options.
  150. """
  151. and_idx = opts.find("&")
  152. semi_idx = opts.find(";")
  153. try:
  154. if and_idx >= 0 and semi_idx >= 0:
  155. raise InvalidURI("Can not mix '&' and ';' for option separators.")
  156. elif and_idx >= 0:
  157. options = _parse_options(opts, "&")
  158. elif semi_idx >= 0:
  159. options = _parse_options(opts, ";")
  160. elif opts.find("=") != -1:
  161. options = _parse_options(opts, None)
  162. else:
  163. raise ValueError
  164. except ValueError:
  165. raise InvalidURI("MongoDB URI options are key=value pairs.")
  166. if validate:
  167. return validate_options(options)
  168. return options
  169. def split_hosts(hosts, default_port=DEFAULT_PORT):
  170. """Takes a string of the form host1[:port],host2[:port]... and
  171. splits it into (host, port) tuples. If [:port] isn't present the
  172. default_port is used.
  173. Returns a set of 2-tuples containing the host name (or IP) followed by
  174. port number.
  175. :Parameters:
  176. - `hosts`: A string of the form host1[:port],host2[:port],...
  177. - `default_port`: The port number to use when one wasn't specified
  178. for a host.
  179. """
  180. nodes = []
  181. for entity in hosts.split(','):
  182. if not entity:
  183. raise ConfigurationError("Empty host "
  184. "(or extra comma in host list).")
  185. port = default_port
  186. # Unix socket entities don't have ports
  187. if entity.endswith('.sock'):
  188. port = None
  189. nodes.append(parse_host(entity, port))
  190. return nodes
  191. def parse_uri(uri, default_port=DEFAULT_PORT, validate=True):
  192. """Parse and validate a MongoDB URI.
  193. Returns a dict of the form::
  194. {
  195. 'nodelist': <list of (host, port) tuples>,
  196. 'username': <username> or None,
  197. 'password': <password> or None,
  198. 'database': <database name> or None,
  199. 'collection': <collection name> or None,
  200. 'options': <dict of MongoDB URI options>
  201. }
  202. :Parameters:
  203. - `uri`: The MongoDB URI to parse.
  204. - `default_port`: The port number to use when one wasn't specified
  205. for a host in the URI.
  206. - `validate`: If ``True`` (the default), validate and normalize all
  207. options.
  208. """
  209. if not uri.startswith(SCHEME):
  210. raise InvalidURI("Invalid URI scheme: URI "
  211. "must begin with '%s'" % (SCHEME,))
  212. scheme_free = uri[SCHEME_LEN:]
  213. if not scheme_free:
  214. raise InvalidURI("Must provide at least one hostname or IP.")
  215. nodes = None
  216. user = None
  217. passwd = None
  218. dbase = None
  219. collection = None
  220. options = {}
  221. # Check for unix domain sockets in the uri
  222. if '.sock' in scheme_free:
  223. host_part, _, path_part = _rpartition(scheme_free, '/')
  224. try:
  225. parse_uri('%s%s' % (SCHEME, host_part))
  226. except (ConfigurationError, InvalidURI):
  227. host_part = scheme_free
  228. path_part = ""
  229. else:
  230. host_part, _, path_part = _partition(scheme_free, '/')
  231. if not path_part and '?' in host_part:
  232. raise InvalidURI("A '/' is required between "
  233. "the host list and any options.")
  234. if '@' in host_part:
  235. userinfo, _, hosts = _rpartition(host_part, '@')
  236. user, passwd = parse_userinfo(userinfo)
  237. else:
  238. hosts = host_part
  239. nodes = split_hosts(hosts, default_port=default_port)
  240. if path_part:
  241. if path_part[0] == '?':
  242. opts = path_part[1:]
  243. else:
  244. dbase, _, opts = _partition(path_part, '?')
  245. if '.' in dbase:
  246. dbase, collection = dbase.split('.', 1)
  247. if opts:
  248. options = split_options(opts, validate)
  249. return {
  250. 'nodelist': nodes,
  251. 'username': user,
  252. 'password': passwd,
  253. 'database': dbase,
  254. 'collection': collection,
  255. 'options': options
  256. }
  257. if __name__ == '__main__':
  258. import pprint
  259. import sys
  260. try:
  261. pprint.pprint(parse_uri(sys.argv[1]))
  262. except InvalidURI as e:
  263. print(e)
  264. sys.exit(0)