svn_utils.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583
  1. import os
  2. import re
  3. import sys
  4. from distutils import log
  5. import xml.dom.pulldom
  6. import shlex
  7. import locale
  8. import codecs
  9. import unicodedata
  10. import warnings
  11. from setuptools.compat import unicode
  12. from setuptools.py31compat import TemporaryDirectory
  13. from xml.sax.saxutils import unescape
  14. try:
  15. import urlparse
  16. except ImportError:
  17. import urllib.parse as urlparse
  18. from subprocess import Popen as _Popen, PIPE as _PIPE
  19. #NOTE: Use of the command line options require SVN 1.3 or newer (December 2005)
  20. # and SVN 1.3 hasn't been supported by the developers since mid 2008.
  21. #subprocess is called several times with shell=(sys.platform=='win32')
  22. #see the follow for more information:
  23. # http://bugs.python.org/issue8557
  24. # http://stackoverflow.com/questions/5658622/
  25. # python-subprocess-popen-environment-path
  26. def _run_command(args, stdout=_PIPE, stderr=_PIPE, encoding=None, stream=0):
  27. #regarding the shell argument, see: http://bugs.python.org/issue8557
  28. try:
  29. proc = _Popen(args, stdout=stdout, stderr=stderr,
  30. shell=(sys.platform == 'win32'))
  31. data = proc.communicate()[stream]
  32. except OSError:
  33. return 1, ''
  34. #doubled checked and
  35. data = decode_as_string(data, encoding)
  36. #communciate calls wait()
  37. return proc.returncode, data
  38. def _get_entry_schedule(entry):
  39. schedule = entry.getElementsByTagName('schedule')[0]
  40. return "".join([t.nodeValue
  41. for t in schedule.childNodes
  42. if t.nodeType == t.TEXT_NODE])
  43. def _get_target_property(target):
  44. property_text = target.getElementsByTagName('property')[0]
  45. return "".join([t.nodeValue
  46. for t in property_text.childNodes
  47. if t.nodeType == t.TEXT_NODE])
  48. def _get_xml_data(decoded_str):
  49. if sys.version_info < (3, 0):
  50. #old versions want an encoded string
  51. data = decoded_str.encode('utf-8')
  52. else:
  53. data = decoded_str
  54. return data
  55. def joinpath(prefix, *suffix):
  56. if not prefix or prefix == '.':
  57. return os.path.join(*suffix)
  58. return os.path.join(prefix, *suffix)
  59. def determine_console_encoding():
  60. try:
  61. #try for the preferred encoding
  62. encoding = locale.getpreferredencoding()
  63. #see if the locale.getdefaultlocale returns null
  64. #some versions of python\platforms return US-ASCII
  65. #when it cannot determine an encoding
  66. if not encoding or encoding == "US-ASCII":
  67. encoding = locale.getdefaultlocale()[1]
  68. if encoding:
  69. codecs.lookup(encoding) # make sure a lookup error is not made
  70. except (locale.Error, LookupError):
  71. encoding = None
  72. is_osx = sys.platform == "darwin"
  73. if not encoding:
  74. return ["US-ASCII", "utf-8"][is_osx]
  75. elif encoding.startswith("mac-") and is_osx:
  76. #certain versions of python would return mac-roman as default
  77. #OSX as a left over of earlier mac versions.
  78. return "utf-8"
  79. else:
  80. return encoding
  81. _console_encoding = determine_console_encoding()
  82. def decode_as_string(text, encoding=None):
  83. """
  84. Decode the console or file output explicitly using getpreferredencoding.
  85. The text paraemeter should be a encoded string, if not no decode occurs
  86. If no encoding is given, getpreferredencoding is used. If encoding is
  87. specified, that is used instead. This would be needed for SVN --xml
  88. output. Unicode is explicitly put in composed NFC form.
  89. --xml should be UTF-8 (SVN Issue 2938) the discussion on the Subversion
  90. DEV List from 2007 seems to indicate the same.
  91. """
  92. #text should be a byte string
  93. if encoding is None:
  94. encoding = _console_encoding
  95. if not isinstance(text, unicode):
  96. text = text.decode(encoding)
  97. text = unicodedata.normalize('NFC', text)
  98. return text
  99. def parse_dir_entries(decoded_str):
  100. '''Parse the entries from a recursive info xml'''
  101. doc = xml.dom.pulldom.parseString(_get_xml_data(decoded_str))
  102. entries = list()
  103. for event, node in doc:
  104. if event == 'START_ELEMENT' and node.nodeName == 'entry':
  105. doc.expandNode(node)
  106. if not _get_entry_schedule(node).startswith('delete'):
  107. entries.append((node.getAttribute('path'),
  108. node.getAttribute('kind')))
  109. return entries[1:] # do not want the root directory
  110. def parse_externals_xml(decoded_str, prefix=''):
  111. '''Parse a propget svn:externals xml'''
  112. prefix = os.path.normpath(prefix)
  113. prefix = os.path.normcase(prefix)
  114. doc = xml.dom.pulldom.parseString(_get_xml_data(decoded_str))
  115. externals = list()
  116. for event, node in doc:
  117. if event == 'START_ELEMENT' and node.nodeName == 'target':
  118. doc.expandNode(node)
  119. path = os.path.normpath(node.getAttribute('path'))
  120. if os.path.normcase(path).startswith(prefix):
  121. path = path[len(prefix)+1:]
  122. data = _get_target_property(node)
  123. #data should be decoded already
  124. for external in parse_external_prop(data):
  125. externals.append(joinpath(path, external))
  126. return externals # do not want the root directory
  127. def parse_external_prop(lines):
  128. """
  129. Parse the value of a retrieved svn:externals entry.
  130. possible token setups (with quotng and backscaping in laters versions)
  131. URL[@#] EXT_FOLDERNAME
  132. [-r#] URL EXT_FOLDERNAME
  133. EXT_FOLDERNAME [-r#] URL
  134. """
  135. externals = []
  136. for line in lines.splitlines():
  137. line = line.lstrip() # there might be a "\ "
  138. if not line:
  139. continue
  140. if sys.version_info < (3, 0):
  141. #shlex handles NULLs just fine and shlex in 2.7 tries to encode
  142. #as ascii automatiically
  143. line = line.encode('utf-8')
  144. line = shlex.split(line)
  145. if sys.version_info < (3, 0):
  146. line = [x.decode('utf-8') for x in line]
  147. #EXT_FOLDERNAME is either the first or last depending on where
  148. #the URL falls
  149. if urlparse.urlsplit(line[-1])[0]:
  150. external = line[0]
  151. else:
  152. external = line[-1]
  153. external = decode_as_string(external, encoding="utf-8")
  154. externals.append(os.path.normpath(external))
  155. return externals
  156. def parse_prop_file(filename, key):
  157. found = False
  158. f = open(filename, 'rt')
  159. data = ''
  160. try:
  161. for line in iter(f.readline, ''): # can't use direct iter!
  162. parts = line.split()
  163. if len(parts) == 2:
  164. kind, length = parts
  165. data = f.read(int(length))
  166. if kind == 'K' and data == key:
  167. found = True
  168. elif kind == 'V' and found:
  169. break
  170. finally:
  171. f.close()
  172. return data
  173. class SvnInfo(object):
  174. '''
  175. Generic svn_info object. No has little knowledge of how to extract
  176. information. Use cls.load to instatiate according svn version.
  177. Paths are not filesystem encoded.
  178. '''
  179. @staticmethod
  180. def get_svn_version():
  181. # Temp config directory should be enough to check for repository
  182. # This is needed because .svn always creates .subversion and
  183. # some operating systems do not handle dot directory correctly.
  184. # Real queries in real svn repos with be concerned with it creation
  185. with TemporaryDirectory() as tempdir:
  186. code, data = _run_command(['svn',
  187. '--config-dir', tempdir,
  188. '--version',
  189. '--quiet'])
  190. if code == 0 and data:
  191. return data.strip()
  192. else:
  193. return ''
  194. #svnversion return values (previous implementations return max revision)
  195. # 4123:4168 mixed revision working copy
  196. # 4168M modified working copy
  197. # 4123S switched working copy
  198. # 4123:4168MS mixed revision, modified, switched working copy
  199. revision_re = re.compile(r'(?:([\-0-9]+):)?(\d+)([a-z]*)\s*$', re.I)
  200. @classmethod
  201. def load(cls, dirname=''):
  202. normdir = os.path.normpath(dirname)
  203. # Temp config directory should be enough to check for repository
  204. # This is needed because .svn always creates .subversion and
  205. # some operating systems do not handle dot directory correctly.
  206. # Real queries in real svn repos with be concerned with it creation
  207. with TemporaryDirectory() as tempdir:
  208. code, data = _run_command(['svn',
  209. '--config-dir', tempdir,
  210. 'info', normdir])
  211. # Must check for some contents, as some use empty directories
  212. # in testcases, however only enteries is needed also the info
  213. # command above MUST have worked
  214. svn_dir = os.path.join(normdir, '.svn')
  215. is_svn_wd = (not code or
  216. os.path.isfile(os.path.join(svn_dir, 'entries')))
  217. svn_version = tuple(cls.get_svn_version().split('.'))
  218. try:
  219. base_svn_version = tuple(int(x) for x in svn_version[:2])
  220. except ValueError:
  221. base_svn_version = tuple()
  222. if not is_svn_wd:
  223. #return an instance of this NO-OP class
  224. return SvnInfo(dirname)
  225. if code or not base_svn_version or base_svn_version < (1, 3):
  226. warnings.warn(("No SVN 1.3+ command found: falling back "
  227. "on pre 1.7 .svn parsing"), DeprecationWarning)
  228. return SvnFileInfo(dirname)
  229. if base_svn_version < (1, 5):
  230. return Svn13Info(dirname)
  231. return Svn15Info(dirname)
  232. def __init__(self, path=''):
  233. self.path = path
  234. self._entries = None
  235. self._externals = None
  236. def get_revision(self):
  237. 'Retrieve the directory revision informatino using svnversion'
  238. code, data = _run_command(['svnversion', '-c', self.path])
  239. if code:
  240. log.warn("svnversion failed")
  241. return 0
  242. parsed = self.revision_re.match(data)
  243. if parsed:
  244. return int(parsed.group(2))
  245. else:
  246. return 0
  247. @property
  248. def entries(self):
  249. if self._entries is None:
  250. self._entries = self.get_entries()
  251. return self._entries
  252. @property
  253. def externals(self):
  254. if self._externals is None:
  255. self._externals = self.get_externals()
  256. return self._externals
  257. def iter_externals(self):
  258. '''
  259. Iterate over the svn:external references in the repository path.
  260. '''
  261. for item in self.externals:
  262. yield item
  263. def iter_files(self):
  264. '''
  265. Iterate over the non-deleted file entries in the repository path
  266. '''
  267. for item, kind in self.entries:
  268. if kind.lower() == 'file':
  269. yield item
  270. def iter_dirs(self, include_root=True):
  271. '''
  272. Iterate over the non-deleted file entries in the repository path
  273. '''
  274. if include_root:
  275. yield self.path
  276. for item, kind in self.entries:
  277. if kind.lower() == 'dir':
  278. yield item
  279. def get_entries(self):
  280. return []
  281. def get_externals(self):
  282. return []
  283. class Svn13Info(SvnInfo):
  284. def get_entries(self):
  285. code, data = _run_command(['svn', 'info', '-R', '--xml', self.path],
  286. encoding="utf-8")
  287. if code:
  288. log.debug("svn info failed")
  289. return []
  290. return parse_dir_entries(data)
  291. def get_externals(self):
  292. #Previous to 1.5 --xml was not supported for svn propget and the -R
  293. #output format breaks the shlex compatible semantics.
  294. cmd = ['svn', 'propget', 'svn:externals']
  295. result = []
  296. for folder in self.iter_dirs():
  297. code, lines = _run_command(cmd + [folder], encoding="utf-8")
  298. if code != 0:
  299. log.warn("svn propget failed")
  300. return []
  301. #lines should a str
  302. for external in parse_external_prop(lines):
  303. if folder:
  304. external = os.path.join(folder, external)
  305. result.append(os.path.normpath(external))
  306. return result
  307. class Svn15Info(Svn13Info):
  308. def get_externals(self):
  309. cmd = ['svn', 'propget', 'svn:externals', self.path, '-R', '--xml']
  310. code, lines = _run_command(cmd, encoding="utf-8")
  311. if code:
  312. log.debug("svn propget failed")
  313. return []
  314. return parse_externals_xml(lines, prefix=os.path.abspath(self.path))
  315. class SvnFileInfo(SvnInfo):
  316. def __init__(self, path=''):
  317. super(SvnFileInfo, self).__init__(path)
  318. self._directories = None
  319. self._revision = None
  320. def _walk_svn(self, base):
  321. entry_file = joinpath(base, '.svn', 'entries')
  322. if os.path.isfile(entry_file):
  323. entries = SVNEntriesFile.load(base)
  324. yield (base, False, entries.parse_revision())
  325. for path in entries.get_undeleted_records():
  326. path = decode_as_string(path)
  327. path = joinpath(base, path)
  328. if os.path.isfile(path):
  329. yield (path, True, None)
  330. elif os.path.isdir(path):
  331. for item in self._walk_svn(path):
  332. yield item
  333. def _build_entries(self):
  334. entries = list()
  335. rev = 0
  336. for path, isfile, dir_rev in self._walk_svn(self.path):
  337. if isfile:
  338. entries.append((path, 'file'))
  339. else:
  340. entries.append((path, 'dir'))
  341. rev = max(rev, dir_rev)
  342. self._entries = entries
  343. self._revision = rev
  344. def get_entries(self):
  345. if self._entries is None:
  346. self._build_entries()
  347. return self._entries
  348. def get_revision(self):
  349. if self._revision is None:
  350. self._build_entries()
  351. return self._revision
  352. def get_externals(self):
  353. prop_files = [['.svn', 'dir-prop-base'],
  354. ['.svn', 'dir-props']]
  355. externals = []
  356. for dirname in self.iter_dirs():
  357. prop_file = None
  358. for rel_parts in prop_files:
  359. filename = joinpath(dirname, *rel_parts)
  360. if os.path.isfile(filename):
  361. prop_file = filename
  362. if prop_file is not None:
  363. ext_prop = parse_prop_file(prop_file, 'svn:externals')
  364. #ext_prop should be utf-8 coming from svn:externals
  365. ext_prop = decode_as_string(ext_prop, encoding="utf-8")
  366. externals.extend(parse_external_prop(ext_prop))
  367. return externals
  368. def svn_finder(dirname=''):
  369. #combined externals due to common interface
  370. #combined externals and entries due to lack of dir_props in 1.7
  371. info = SvnInfo.load(dirname)
  372. for path in info.iter_files():
  373. yield path
  374. for path in info.iter_externals():
  375. sub_info = SvnInfo.load(path)
  376. for sub_path in sub_info.iter_files():
  377. yield sub_path
  378. class SVNEntriesFile(object):
  379. def __init__(self, data):
  380. self.data = data
  381. @classmethod
  382. def load(class_, base):
  383. filename = os.path.join(base, '.svn', 'entries')
  384. f = open(filename)
  385. try:
  386. result = SVNEntriesFile.read(f)
  387. finally:
  388. f.close()
  389. return result
  390. @classmethod
  391. def read(class_, fileobj):
  392. data = fileobj.read()
  393. is_xml = data.startswith('<?xml')
  394. class_ = [SVNEntriesFileText, SVNEntriesFileXML][is_xml]
  395. return class_(data)
  396. def parse_revision(self):
  397. all_revs = self.parse_revision_numbers() + [0]
  398. return max(all_revs)
  399. class SVNEntriesFileText(SVNEntriesFile):
  400. known_svn_versions = {
  401. '1.4.x': 8,
  402. '1.5.x': 9,
  403. '1.6.x': 10,
  404. }
  405. def __get_cached_sections(self):
  406. return self.sections
  407. def get_sections(self):
  408. SECTION_DIVIDER = '\f\n'
  409. sections = self.data.split(SECTION_DIVIDER)
  410. sections = [x for x in map(str.splitlines, sections)]
  411. try:
  412. # remove the SVN version number from the first line
  413. svn_version = int(sections[0].pop(0))
  414. if not svn_version in self.known_svn_versions.values():
  415. log.warn("Unknown subversion verson %d", svn_version)
  416. except ValueError:
  417. return
  418. self.sections = sections
  419. self.get_sections = self.__get_cached_sections
  420. return self.sections
  421. def is_valid(self):
  422. return bool(self.get_sections())
  423. def get_url(self):
  424. return self.get_sections()[0][4]
  425. def parse_revision_numbers(self):
  426. revision_line_number = 9
  427. rev_numbers = [
  428. int(section[revision_line_number])
  429. for section in self.get_sections()
  430. if (len(section) > revision_line_number
  431. and section[revision_line_number])
  432. ]
  433. return rev_numbers
  434. def get_undeleted_records(self):
  435. undeleted = lambda s: s and s[0] and (len(s) < 6 or s[5] != 'delete')
  436. result = [
  437. section[0]
  438. for section in self.get_sections()
  439. if undeleted(section)
  440. ]
  441. return result
  442. class SVNEntriesFileXML(SVNEntriesFile):
  443. def is_valid(self):
  444. return True
  445. def get_url(self):
  446. "Get repository URL"
  447. urlre = re.compile('url="([^"]+)"')
  448. return urlre.search(self.data).group(1)
  449. def parse_revision_numbers(self):
  450. revre = re.compile(r'committed-rev="(\d+)"')
  451. return [
  452. int(m.group(1))
  453. for m in revre.finditer(self.data)
  454. ]
  455. def get_undeleted_records(self):
  456. entries_pattern = \
  457. re.compile(r'name="([^"]+)"(?![^>]+deleted="true")', re.I)
  458. results = [
  459. unescape(match.group(1))
  460. for match in entries_pattern.finditer(self.data)
  461. ]
  462. return results
  463. if __name__ == '__main__':
  464. for name in svn_finder(sys.argv[1]):
  465. print(name)