topology.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. # Copyright 2014-2015 MongoDB, Inc.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you
  4. # may not use this file except in compliance with the License. You
  5. # may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
  12. # implied. See the License for the specific language governing
  13. # permissions and limitations under the License.
  14. """Internal class to monitor a topology of one or more servers."""
  15. import random
  16. import threading
  17. from bson.py3compat import itervalues
  18. from pymongo import common
  19. from pymongo.pool import PoolOptions
  20. from pymongo.topology_description import (updated_topology_description,
  21. TOPOLOGY_TYPE,
  22. TopologyDescription)
  23. from pymongo.errors import ServerSelectionTimeoutError, InvalidOperation
  24. from pymongo.monotonic import time as _time
  25. from pymongo.server import Server
  26. from pymongo.server_selectors import (any_server_selector,
  27. apply_local_threshold,
  28. arbiter_server_selector,
  29. secondary_server_selector,
  30. writable_server_selector)
  31. class Topology(object):
  32. """Monitor a topology of one or more servers."""
  33. def __init__(self, topology_settings):
  34. self._settings = topology_settings
  35. topology_description = TopologyDescription(
  36. topology_settings.get_topology_type(),
  37. topology_settings.get_server_descriptions(),
  38. topology_settings.replica_set_name,
  39. None)
  40. self._description = topology_description
  41. # Store the seed list to help diagnose errors in _error_message().
  42. self._seed_addresses = list(topology_description.server_descriptions())
  43. self._opened = False
  44. self._lock = threading.Lock()
  45. self._condition = self._settings.condition_class(self._lock)
  46. self._servers = {}
  47. def open(self):
  48. """Start monitoring, or restart after a fork.
  49. No effect if called multiple times.
  50. """
  51. with self._lock:
  52. self._ensure_opened()
  53. def select_servers(self,
  54. selector,
  55. server_selection_timeout=None,
  56. address=None):
  57. """Return a list of Servers matching selector, or time out.
  58. :Parameters:
  59. - `selector`: function that takes a list of Servers and returns
  60. a subset of them.
  61. - `server_selection_timeout` (optional): maximum seconds to wait.
  62. If not provided, the default value common.SERVER_SELECTION_TIMEOUT
  63. is used.
  64. - `address`: optional server address to select.
  65. Calls self.open() if needed.
  66. Raises exc:`ServerSelectionTimeoutError` after
  67. `server_selection_timeout` if no matching servers are found.
  68. """
  69. if server_selection_timeout is None:
  70. server_timeout = self._settings.server_selection_timeout
  71. else:
  72. server_timeout = server_selection_timeout
  73. with self._lock:
  74. self._description.check_compatible()
  75. now = _time()
  76. end_time = now + server_timeout
  77. server_descriptions = self._apply_selector(selector, address)
  78. while not server_descriptions:
  79. # No suitable servers.
  80. if server_timeout == 0 or now > end_time:
  81. raise ServerSelectionTimeoutError(
  82. self._error_message(selector))
  83. self._ensure_opened()
  84. self._request_check_all()
  85. # Release the lock and wait for the topology description to
  86. # change, or for a timeout. We won't miss any changes that
  87. # came after our most recent _apply_selector call, since we've
  88. # held the lock until now.
  89. self._condition.wait(common.MIN_HEARTBEAT_INTERVAL)
  90. self._description.check_compatible()
  91. now = _time()
  92. server_descriptions = self._apply_selector(selector, address)
  93. return [self.get_server_by_address(sd.address)
  94. for sd in server_descriptions]
  95. def select_server(self,
  96. selector,
  97. server_selection_timeout=None,
  98. address=None):
  99. """Like select_servers, but choose a random server if several match."""
  100. return random.choice(self.select_servers(selector,
  101. server_selection_timeout,
  102. address))
  103. def select_server_by_address(self, address,
  104. server_selection_timeout=None):
  105. """Return a Server for "address", reconnecting if necessary.
  106. If the server's type is not known, request an immediate check of all
  107. servers. Time out after "server_selection_timeout" if the server
  108. cannot be reached.
  109. :Parameters:
  110. - `address`: A (host, port) pair.
  111. - `server_selection_timeout` (optional): maximum seconds to wait.
  112. If not provided, the default value
  113. common.SERVER_SELECTION_TIMEOUT is used.
  114. Calls self.open() if needed.
  115. Raises exc:`ServerSelectionTimeoutError` after
  116. `server_selection_timeout` if no matching servers are found.
  117. """
  118. return self.select_server(any_server_selector,
  119. server_selection_timeout,
  120. address)
  121. def on_change(self, server_description):
  122. """Process a new ServerDescription after an ismaster call completes."""
  123. # We do no I/O holding the lock.
  124. with self._lock:
  125. # Any monitored server was definitely in the topology description
  126. # once. Check if it's still in the description or if some state-
  127. # change removed it. E.g., we got a host list from the primary
  128. # that didn't include this server.
  129. if self._description.has_server(server_description.address):
  130. self._description = updated_topology_description(
  131. self._description, server_description)
  132. self._update_servers()
  133. # Wake waiters in select_servers().
  134. self._condition.notify_all()
  135. def get_server_by_address(self, address):
  136. """Get a Server or None.
  137. Returns the current version of the server immediately, even if it's
  138. Unknown or absent from the topology. Only use this in unittests.
  139. In driver code, use select_server_by_address, since then you're
  140. assured a recent view of the server's type and wire protocol version.
  141. """
  142. return self._servers.get(address)
  143. def has_server(self, address):
  144. return address in self._servers
  145. def get_primary(self):
  146. """Return primary's address or None."""
  147. # Implemented here in Topology instead of MongoClient, so it can lock.
  148. with self._lock:
  149. topology_type = self._description.topology_type
  150. if topology_type != TOPOLOGY_TYPE.ReplicaSetWithPrimary:
  151. return None
  152. description = writable_server_selector(
  153. self._description.known_servers)[0]
  154. return description.address
  155. def _get_replica_set_members(self, selector):
  156. """Return set of replica set member addresses."""
  157. # Implemented here in Topology instead of MongoClient, so it can lock.
  158. with self._lock:
  159. topology_type = self._description.topology_type
  160. if topology_type not in (TOPOLOGY_TYPE.ReplicaSetWithPrimary,
  161. TOPOLOGY_TYPE.ReplicaSetNoPrimary):
  162. return set()
  163. descriptions = selector(self._description.known_servers)
  164. return set([d.address for d in descriptions])
  165. def get_direct_or_primary(self):
  166. """Return the address of a connected primary or standalone, or None.
  167. Raise InvalidOperation for Sharded topologies.
  168. """
  169. # Implemented here in Topology instead of MongoClient, so it can lock.
  170. with self._lock:
  171. topology_type = self._description.topology_type
  172. if topology_type == TOPOLOGY_TYPE.Sharded:
  173. raise InvalidOperation()
  174. if topology_type not in (TOPOLOGY_TYPE.ReplicaSetWithPrimary,
  175. TOPOLOGY_TYPE.Single):
  176. return None
  177. descriptions = writable_server_selector(
  178. self._description.known_servers)
  179. return descriptions[0].address if descriptions else None
  180. def get_secondaries(self):
  181. """Return set of secondary addresses."""
  182. return self._get_replica_set_members(secondary_server_selector)
  183. def get_arbiters(self):
  184. """Return set of arbiter addresses."""
  185. return self._get_replica_set_members(arbiter_server_selector)
  186. def request_check_all(self, wait_time=5):
  187. """Wake all monitors, wait for at least one to check its server."""
  188. with self._lock:
  189. self._request_check_all()
  190. self._condition.wait(wait_time)
  191. def reset_pool(self, address):
  192. with self._lock:
  193. server = self._servers.get(address)
  194. if server:
  195. server.pool.reset()
  196. def reset_server(self, address):
  197. """Clear our pool for a server and mark it Unknown.
  198. Do *not* request an immediate check.
  199. """
  200. with self._lock:
  201. self._reset_server(address)
  202. def reset_server_and_request_check(self, address):
  203. """Clear our pool for a server, mark it Unknown, and check it soon."""
  204. with self._lock:
  205. self._reset_server(address)
  206. self._request_check(address)
  207. def close(self):
  208. """Clear pools and terminate monitors. Topology reopens on demand."""
  209. with self._lock:
  210. for server in self._servers.values():
  211. server.close()
  212. # Mark all servers Unknown.
  213. self._description = self._description.reset()
  214. self._update_servers()
  215. @property
  216. def description(self):
  217. return self._description
  218. def _ensure_opened(self):
  219. """Start monitors, or restart after a fork.
  220. Hold the lock when calling this.
  221. """
  222. if not self._opened:
  223. self._opened = True
  224. self._update_servers()
  225. else:
  226. # Restart monitors if we forked since previous call.
  227. for server in itervalues(self._servers):
  228. server.open()
  229. def _reset_server(self, address):
  230. """Clear our pool for a server and mark it Unknown.
  231. Hold the lock when calling this. Does *not* request an immediate check.
  232. """
  233. server = self._servers.get(address)
  234. # "server" is None if another thread removed it from the topology.
  235. if server:
  236. server.reset()
  237. # Mark this server Unknown.
  238. self._description = self._description.reset_server(address)
  239. self._update_servers()
  240. def _request_check(self, address):
  241. """Wake one monitor. Hold the lock when calling this."""
  242. server = self._servers.get(address)
  243. # "server" is None if another thread removed it from the topology.
  244. if server:
  245. server.request_check()
  246. def _request_check_all(self):
  247. """Wake all monitors. Hold the lock when calling this."""
  248. for server in self._servers.values():
  249. server.request_check()
  250. def _apply_selector(self, selector, address):
  251. if self._description.topology_type == TOPOLOGY_TYPE.Single:
  252. # Ignore the selector.
  253. return self._description.known_servers
  254. elif address:
  255. sd = self._description.server_descriptions().get(address)
  256. return [sd] if sd else []
  257. elif self._description.topology_type == TOPOLOGY_TYPE.Sharded:
  258. return apply_local_threshold(self._settings.local_threshold_ms,
  259. self._description.known_servers)
  260. else:
  261. sds = selector(self._description.known_servers)
  262. return apply_local_threshold(
  263. self._settings.local_threshold_ms, sds)
  264. def _update_servers(self):
  265. """Sync our Servers from TopologyDescription.server_descriptions.
  266. Hold the lock while calling this.
  267. """
  268. for address, sd in self._description.server_descriptions().items():
  269. if address not in self._servers:
  270. monitor = self._settings.monitor_class(
  271. server_description=sd,
  272. topology=self,
  273. pool=self._create_pool_for_monitor(address),
  274. topology_settings=self._settings)
  275. server = Server(
  276. server_description=sd,
  277. pool=self._create_pool_for_server(address),
  278. monitor=monitor)
  279. self._servers[address] = server
  280. server.open()
  281. else:
  282. self._servers[address].description = sd
  283. for address, server in list(self._servers.items()):
  284. if not self._description.has_server(address):
  285. server.close()
  286. self._servers.pop(address)
  287. def _create_pool_for_server(self, address):
  288. return self._settings.pool_class(address, self._settings.pool_options)
  289. def _create_pool_for_monitor(self, address):
  290. options = self._settings.pool_options
  291. # According to the Server Discovery And Monitoring Spec, monitors use
  292. # connect_timeout for both connect_timeout and socket_timeout. The
  293. # pool only has one socket so maxPoolSize and so on aren't needed.
  294. monitor_pool_options = PoolOptions(
  295. connect_timeout=options.connect_timeout,
  296. socket_timeout=options.connect_timeout,
  297. ssl_context=options.ssl_context,
  298. ssl_match_hostname=options.ssl_match_hostname,
  299. socket_keepalive=True)
  300. return self._settings.pool_class(address, monitor_pool_options,
  301. handshake=False)
  302. def _error_message(self, selector):
  303. """Format an error message if server selection fails.
  304. Hold the lock when calling this.
  305. """
  306. is_replica_set = self._description.topology_type in (
  307. TOPOLOGY_TYPE.ReplicaSetWithPrimary,
  308. TOPOLOGY_TYPE.ReplicaSetNoPrimary)
  309. if is_replica_set:
  310. server_plural = 'replica set members'
  311. elif self._description.topology_type == TOPOLOGY_TYPE.Sharded:
  312. server_plural = 'mongoses'
  313. else:
  314. server_plural = 'servers'
  315. if self._description.known_servers:
  316. # We've connected, but no servers match the selector.
  317. if selector is writable_server_selector:
  318. if is_replica_set:
  319. return 'No primary available for writes'
  320. else:
  321. return 'No %s available for writes' % server_plural
  322. else:
  323. return 'No %s match selector "%s"' % (server_plural, selector)
  324. else:
  325. addresses = list(self._description.server_descriptions())
  326. servers = list(self._description.server_descriptions().values())
  327. if not servers:
  328. if is_replica_set:
  329. # We removed all servers because of the wrong setName?
  330. return 'No %s available for replica set name "%s"' % (
  331. server_plural, self._settings.replica_set_name)
  332. else:
  333. return 'No %s available' % server_plural
  334. # 1 or more servers, all Unknown. Are they unknown for one reason?
  335. error = servers[0].error
  336. same = all(server.error == error for server in servers[1:])
  337. if same:
  338. if error is None:
  339. # We're still discovering.
  340. return 'No %s found yet' % server_plural
  341. if (is_replica_set and not
  342. set(addresses).intersection(self._seed_addresses)):
  343. # We replaced our seeds with new hosts but can't reach any.
  344. return (
  345. 'Could not reach any servers in %s. Replica set is'
  346. ' configured with internal hostnames or IPs?' %
  347. addresses)
  348. return str(error)
  349. else:
  350. return ','.join(str(server.error) for server in servers
  351. if server.error)