metadata.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. """
  2. Tools for converting old- to new-style metadata.
  3. """
  4. from collections import defaultdict, namedtuple
  5. from .pkginfo import read_pkg_info
  6. import re
  7. import os.path
  8. import textwrap
  9. import pkg_resources
  10. import email.parser
  11. import wheel
  12. METADATA_VERSION = "2.0"
  13. PLURAL_FIELDS = { "classifier" : "classifiers",
  14. "provides_dist" : "provides",
  15. "provides_extra" : "extras" }
  16. SKIP_FIELDS = set()
  17. CONTACT_FIELDS = (({"email":"author_email", "name": "author"},
  18. "author"),
  19. ({"email":"maintainer_email", "name": "maintainer"},
  20. "maintainer"))
  21. # commonly filled out as "UNKNOWN" by distutils:
  22. UNKNOWN_FIELDS = set(("author", "author_email", "platform", "home_page",
  23. "license"))
  24. # Wheel itself is probably the only program that uses non-extras markers
  25. # in METADATA/PKG-INFO. Support its syntax with the extra at the end only.
  26. EXTRA_RE = re.compile("""^(?P<package>.*?)(;\s*(?P<condition>.*?)(extra == '(?P<extra>.*?)')?)$""")
  27. KEYWORDS_RE = re.compile("[\0-,]+")
  28. MayRequiresKey = namedtuple('MayRequiresKey', ('condition', 'extra'))
  29. def unique(iterable):
  30. """
  31. Yield unique values in iterable, preserving order.
  32. """
  33. seen = set()
  34. for value in iterable:
  35. if not value in seen:
  36. seen.add(value)
  37. yield value
  38. def handle_requires(metadata, pkg_info, key):
  39. """
  40. Place the runtime requirements from pkg_info into metadata.
  41. """
  42. may_requires = defaultdict(list)
  43. for value in pkg_info.get_all(key):
  44. extra_match = EXTRA_RE.search(value)
  45. if extra_match:
  46. groupdict = extra_match.groupdict()
  47. condition = groupdict['condition']
  48. extra = groupdict['extra']
  49. package = groupdict['package']
  50. if condition.endswith(' and '):
  51. condition = condition[:-5]
  52. else:
  53. condition, extra = None, None
  54. package = value
  55. key = MayRequiresKey(condition, extra)
  56. may_requires[key].append(package)
  57. if may_requires:
  58. metadata['run_requires'] = []
  59. for key, value in may_requires.items():
  60. may_requirement = {'requires':value}
  61. if key.extra:
  62. may_requirement['extra'] = key.extra
  63. if key.condition:
  64. may_requirement['environment'] = key.condition
  65. metadata['run_requires'].append(may_requirement)
  66. if not 'extras' in metadata:
  67. metadata['extras'] = []
  68. metadata['extras'].extend([key.extra for key in may_requires.keys() if key.extra])
  69. def pkginfo_to_dict(path, distribution=None):
  70. """
  71. Convert PKG-INFO to a prototype Metadata 2.0 (PEP 426) dict.
  72. The description is included under the key ['description'] rather than
  73. being written to a separate file.
  74. path: path to PKG-INFO file
  75. distribution: optional distutils Distribution()
  76. """
  77. metadata = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
  78. metadata["generator"] = "bdist_wheel (" + wheel.__version__ + ")"
  79. try:
  80. unicode
  81. pkg_info = read_pkg_info(path)
  82. except NameError:
  83. pkg_info = email.parser.Parser().parsestr(open(path, 'rb').read().decode('utf-8'))
  84. description = None
  85. if pkg_info['Summary']:
  86. metadata['summary'] = pkginfo_unicode(pkg_info, 'Summary')
  87. del pkg_info['Summary']
  88. if pkg_info['Description']:
  89. description = dedent_description(pkg_info)
  90. del pkg_info['Description']
  91. else:
  92. payload = pkg_info.get_payload()
  93. if isinstance(payload, bytes):
  94. # Avoid a Python 2 Unicode error.
  95. # We still suffer ? glyphs on Python 3.
  96. payload = payload.decode('utf-8')
  97. if payload:
  98. description = payload
  99. if description:
  100. pkg_info['description'] = description
  101. for key in unique(k.lower() for k in pkg_info.keys()):
  102. low_key = key.replace('-', '_')
  103. if low_key in SKIP_FIELDS:
  104. continue
  105. if low_key in UNKNOWN_FIELDS and pkg_info.get(key) == 'UNKNOWN':
  106. continue
  107. if low_key in PLURAL_FIELDS:
  108. metadata[PLURAL_FIELDS[low_key]] = pkg_info.get_all(key)
  109. elif low_key == "requires_dist":
  110. handle_requires(metadata, pkg_info, key)
  111. elif low_key == 'provides_extra':
  112. if not 'extras' in metadata:
  113. metadata['extras'] = []
  114. metadata['extras'].extend(pkg_info.get_all(key))
  115. elif low_key == 'home_page':
  116. metadata['extensions']['python.details']['project_urls'] = {'Home':pkg_info[key]}
  117. elif low_key == 'keywords':
  118. metadata['keywords'] = KEYWORDS_RE.split(pkg_info[key])
  119. else:
  120. metadata[low_key] = pkg_info[key]
  121. metadata['metadata_version'] = METADATA_VERSION
  122. if 'extras' in metadata:
  123. metadata['extras'] = sorted(set(metadata['extras']))
  124. # include more information if distribution is available
  125. if distribution:
  126. for requires, attr in (('test_requires', 'tests_require'),):
  127. try:
  128. requirements = getattr(distribution, attr)
  129. if isinstance(requirements, list):
  130. new_requirements = list(convert_requirements(requirements))
  131. metadata[requires] = [{'requires':new_requirements}]
  132. except AttributeError:
  133. pass
  134. # handle contacts
  135. contacts = []
  136. for contact_type, role in CONTACT_FIELDS:
  137. contact = {}
  138. for key in contact_type:
  139. if contact_type[key] in metadata:
  140. contact[key] = metadata.pop(contact_type[key])
  141. if contact:
  142. contact['role'] = role
  143. contacts.append(contact)
  144. if contacts:
  145. metadata['extensions']['python.details']['contacts'] = contacts
  146. # convert entry points to exports
  147. try:
  148. with open(os.path.join(os.path.dirname(path), "entry_points.txt"), "r") as ep_file:
  149. ep_map = pkg_resources.EntryPoint.parse_map(ep_file.read())
  150. exports = {}
  151. for group, items in ep_map.items():
  152. exports[group] = {}
  153. for item in items.values():
  154. name, export = str(item).split(' = ', 1)
  155. exports[group][name] = export
  156. if exports:
  157. metadata['extensions']['python.exports'] = exports
  158. except IOError:
  159. pass
  160. # copy console_scripts entry points to commands
  161. if 'python.exports' in metadata['extensions']:
  162. for (ep_script, wrap_script) in (('console_scripts', 'wrap_console'),
  163. ('gui_scripts', 'wrap_gui')):
  164. if ep_script in metadata['extensions']['python.exports']:
  165. metadata['extensions']['python.commands'][wrap_script] = \
  166. metadata['extensions']['python.exports'][ep_script]
  167. return metadata
  168. def requires_to_requires_dist(requirement):
  169. """Compose the version predicates for requirement in PEP 345 fashion."""
  170. requires_dist = []
  171. for op, ver in requirement.specs:
  172. requires_dist.append(op + ver)
  173. if not requires_dist:
  174. return ''
  175. return " (%s)" % ','.join(requires_dist)
  176. def convert_requirements(requirements):
  177. """Yield Requires-Dist: strings for parsed requirements strings."""
  178. for req in requirements:
  179. parsed_requirement = pkg_resources.Requirement.parse(req)
  180. spec = requires_to_requires_dist(parsed_requirement)
  181. extras = ",".join(parsed_requirement.extras)
  182. if extras:
  183. extras = "[%s]" % extras
  184. yield (parsed_requirement.project_name + extras + spec)
  185. def pkginfo_to_metadata(egg_info_path, pkginfo_path):
  186. """
  187. Convert .egg-info directory with PKG-INFO to the Metadata 1.3 aka
  188. old-draft Metadata 2.0 format.
  189. """
  190. pkg_info = read_pkg_info(pkginfo_path)
  191. pkg_info.replace_header('Metadata-Version', '2.0')
  192. requires_path = os.path.join(egg_info_path, 'requires.txt')
  193. if os.path.exists(requires_path):
  194. requires = open(requires_path).read()
  195. for extra, reqs in pkg_resources.split_sections(requires):
  196. condition = ''
  197. if extra and ':' in extra: # setuptools extra:condition syntax
  198. extra, condition = extra.split(':', 1)
  199. if extra:
  200. pkg_info['Provides-Extra'] = extra
  201. if condition:
  202. condition += " and "
  203. condition += 'extra == %s' % repr(extra)
  204. if condition:
  205. condition = '; ' + condition
  206. for new_req in convert_requirements(reqs):
  207. pkg_info['Requires-Dist'] = new_req + condition
  208. description = pkg_info['Description']
  209. if description:
  210. pkg_info.set_payload(dedent_description(pkg_info))
  211. del pkg_info['Description']
  212. return pkg_info
  213. def pkginfo_unicode(pkg_info, field):
  214. """Hack to coax Unicode out of an email Message() - Python 3.3+"""
  215. text = pkg_info[field]
  216. field = field.lower()
  217. if not isinstance(text, str):
  218. if not hasattr(pkg_info, 'raw_items'): # Python 3.2
  219. return str(text)
  220. for item in pkg_info.raw_items():
  221. if item[0].lower() == field:
  222. text = item[1].encode('ascii', 'surrogateescape')\
  223. .decode('utf-8')
  224. break
  225. return text
  226. def dedent_description(pkg_info):
  227. """
  228. Dedent and convert pkg_info['Description'] to Unicode.
  229. """
  230. description = pkg_info['Description']
  231. # Python 3 Unicode handling, sorta.
  232. surrogates = False
  233. if not isinstance(description, str):
  234. surrogates = True
  235. description = pkginfo_unicode(pkg_info, 'Description')
  236. description_lines = description.splitlines()
  237. description_dedent = '\n'.join(
  238. # if the first line of long_description is blank,
  239. # the first line here will be indented.
  240. (description_lines[0].lstrip(),
  241. textwrap.dedent('\n'.join(description_lines[1:])),
  242. '\n'))
  243. if surrogates:
  244. description_dedent = description_dedent\
  245. .encode("utf8")\
  246. .decode("ascii", "surrogateescape")
  247. return description_dedent
  248. if __name__ == "__main__":
  249. import sys, pprint
  250. pprint.pprint(pkginfo_to_dict(sys.argv[1]))