IptcImagePlugin.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. #
  2. # The Python Imaging Library.
  3. # $Id$
  4. #
  5. # IPTC/NAA file handling
  6. #
  7. # history:
  8. # 1995-10-01 fl Created
  9. # 1998-03-09 fl Cleaned up and added to PIL
  10. # 2002-06-18 fl Added getiptcinfo helper
  11. #
  12. # Copyright (c) Secret Labs AB 1997-2002.
  13. # Copyright (c) Fredrik Lundh 1995.
  14. #
  15. # See the README file for information on usage and redistribution.
  16. #
  17. from __future__ import annotations
  18. from io import BytesIO
  19. from typing import cast
  20. from . import Image, ImageFile
  21. from ._binary import i16be as i16
  22. from ._binary import i32be as i32
  23. COMPRESSION = {1: "raw", 5: "jpeg"}
  24. #
  25. # Helpers
  26. def _i(c: bytes) -> int:
  27. return i32((b"\0\0\0\0" + c)[-4:])
  28. ##
  29. # Image plugin for IPTC/NAA datastreams. To read IPTC/NAA fields
  30. # from TIFF and JPEG files, use the <b>getiptcinfo</b> function.
  31. class IptcImageFile(ImageFile.ImageFile):
  32. format = "IPTC"
  33. format_description = "IPTC/NAA"
  34. def getint(self, key: tuple[int, int]) -> int:
  35. return _i(self.info[key])
  36. def field(self) -> tuple[tuple[int, int] | None, int]:
  37. #
  38. # get a IPTC field header
  39. assert self.fp is not None
  40. s = self.fp.read(5)
  41. if not s.strip(b"\x00"):
  42. return None, 0
  43. tag = s[1], s[2]
  44. # syntax
  45. if s[0] != 0x1C or tag[0] not in [1, 2, 3, 4, 5, 6, 7, 8, 9, 240]:
  46. msg = "invalid IPTC/NAA file"
  47. raise SyntaxError(msg)
  48. # field size
  49. size = s[3]
  50. if size > 132:
  51. msg = "illegal field length in IPTC/NAA file"
  52. raise OSError(msg)
  53. elif size == 128:
  54. size = 0
  55. elif size > 128:
  56. size = _i(self.fp.read(size - 128))
  57. else:
  58. size = i16(s, 3)
  59. return tag, size
  60. def _open(self) -> None:
  61. # load descriptive fields
  62. assert self.fp is not None
  63. while True:
  64. offset = self.fp.tell()
  65. tag, size = self.field()
  66. if not tag or tag == (8, 10):
  67. break
  68. if size:
  69. tagdata = self.fp.read(size)
  70. else:
  71. tagdata = None
  72. if tag in self.info:
  73. if isinstance(self.info[tag], list):
  74. self.info[tag].append(tagdata)
  75. else:
  76. self.info[tag] = [self.info[tag], tagdata]
  77. else:
  78. self.info[tag] = tagdata
  79. # mode
  80. layers = self.info[(3, 60)][0]
  81. component = self.info[(3, 60)][1]
  82. if layers == 1 and not component:
  83. self._mode = "L"
  84. band = None
  85. else:
  86. if layers == 3 and component:
  87. self._mode = "RGB"
  88. elif layers == 4 and component:
  89. self._mode = "CMYK"
  90. if (3, 65) in self.info:
  91. band = self.info[(3, 65)][0] - 1
  92. else:
  93. band = 0
  94. # size
  95. self._size = self.getint((3, 20)), self.getint((3, 30))
  96. # compression
  97. try:
  98. compression = COMPRESSION[self.getint((3, 120))]
  99. except KeyError as e:
  100. msg = "Unknown IPTC image compression"
  101. raise OSError(msg) from e
  102. # tile
  103. if tag == (8, 10):
  104. self.tile = [
  105. ImageFile._Tile("iptc", (0, 0) + self.size, offset, (compression, band))
  106. ]
  107. def load(self) -> Image.core.PixelAccess | None:
  108. if self.tile:
  109. args = self.tile[0].args
  110. assert isinstance(args, tuple)
  111. compression, band = args
  112. assert self.fp is not None
  113. self.fp.seek(self.tile[0].offset)
  114. # Copy image data to temporary file
  115. o = BytesIO()
  116. if compression == "raw":
  117. # To simplify access to the extracted file,
  118. # prepend a PPM header
  119. o.write(b"P5\n%d %d\n255\n" % self.size)
  120. while True:
  121. type, size = self.field()
  122. if type != (8, 10):
  123. break
  124. while size > 0:
  125. s = self.fp.read(min(size, 8192))
  126. if not s:
  127. break
  128. o.write(s)
  129. size -= len(s)
  130. with Image.open(o) as _im:
  131. if band is not None:
  132. bands = [Image.new("L", _im.size)] * Image.getmodebands(self.mode)
  133. bands[band] = _im
  134. im = Image.merge(self.mode, bands)
  135. else:
  136. im = _im
  137. im.load()
  138. self.im = im.im
  139. self.tile = []
  140. return ImageFile.ImageFile.load(self)
  141. Image.register_open(IptcImageFile.format, IptcImageFile)
  142. Image.register_extension(IptcImageFile.format, ".iim")
  143. def getiptcinfo(
  144. im: ImageFile.ImageFile,
  145. ) -> dict[tuple[int, int], bytes | list[bytes]] | None:
  146. """
  147. Get IPTC information from TIFF, JPEG, or IPTC file.
  148. :param im: An image containing IPTC data.
  149. :returns: A dictionary containing IPTC information, or None if
  150. no IPTC information block was found.
  151. """
  152. from . import JpegImagePlugin, TiffImagePlugin
  153. data = None
  154. info: dict[tuple[int, int], bytes | list[bytes]] = {}
  155. if isinstance(im, IptcImageFile):
  156. # return info dictionary right away
  157. for k, v in im.info.items():
  158. if isinstance(k, tuple):
  159. info[k] = v
  160. return info
  161. elif isinstance(im, JpegImagePlugin.JpegImageFile):
  162. # extract the IPTC/NAA resource
  163. photoshop = im.info.get("photoshop")
  164. if photoshop:
  165. data = photoshop.get(0x0404)
  166. elif isinstance(im, TiffImagePlugin.TiffImageFile):
  167. # get raw data from the IPTC/NAA tag (PhotoShop tags the data
  168. # as 4-byte integers, so we cannot use the get method...)
  169. try:
  170. data = im.tag_v2._tagdata[TiffImagePlugin.IPTC_NAA_CHUNK]
  171. except KeyError:
  172. pass
  173. if data is None:
  174. return None # no properties
  175. # create an IptcImagePlugin object without initializing it
  176. class FakeImage:
  177. pass
  178. fake_im = FakeImage()
  179. fake_im.__class__ = IptcImageFile # type: ignore[assignment]
  180. iptc_im = cast(IptcImageFile, fake_im)
  181. # parse the IPTC information chunk
  182. iptc_im.info = {}
  183. iptc_im.fp = BytesIO(data)
  184. try:
  185. iptc_im._open()
  186. except (IndexError, KeyError):
  187. pass # expected failure
  188. for k, v in iptc_im.info.items():
  189. if isinstance(k, tuple):
  190. info[k] = v
  191. return info