DEP: Deprecate support for abbreviations in decode_stream_data (#3617)

stefan6419846 · web-flow · commit 5d6d7f06a5e3 · 2026-01-27T14:06:57.000+01:00
The inline image abbreviations were previously supported in the
publicly facing `decode_stream_data` function. Doing some analysis
showed that this support was limited:

  * Our own code would always map to the "proper" names beforehand, thus
    we possibly never tested this variant and it has been incomplete.
  * Only abbreviations for the filter names were supported. Already
    using `/F` instead of `/Filter` for the filter names itself or
    specifying `/DP` instead of `/DecodeParams` would fail, as well as
    all other keys from table 91 and 92 of the PDF 2.0 specification.

With this in mind, only external API users might have used this API in
the full manner, although we never seem to have received specific
reports about the not supported/mapped parameters. Thus, deprecating the
abbreviations specific to inline images in this generic function seems
like the best way to move forward.

While I was working on this to analyze the current behavior, I wrote
some basic inline image tests for the previously unsupported filter
abbreviations CCF and DCT (at least in `decode_stream_data`). The new
abbreviations were added for completeness (although deprecated
directly), showing that using `PageObject.images` never errored out or
would run into the deprecation now due to proper mapping in our own
code.
diff --git a/pypdf/filters.py b/pypdf/filters.py
@@ -50,6 +50,7 @@
 from ._codecs._codecs import LzwCodec as _LzwCodec
 from ._utils import (
     WHITESPACES_AS_BYTES,
+    deprecate,
     deprecation_with_replacement,
     logger_warning,
 )
@@ -753,7 +754,16 @@ def _is_binary_compatible() -> bool:
         return Version(version) >= Version("0.19")
 
 
-def decode_stream_data(stream: Any) -> bytes:
+def _deprecate_inline_image_filters(filter_name: str, old_name: str, new_name: str) -> None:
+    if filter_name != old_name:
+        return
+    deprecate(
+        f"The filter name {old_name} is deprecated and will be removed in pypdf 7.0.0. Use {new_name} instead.",
+        4,
+    )
+
+
+def decode_stream_data(stream: StreamObject) -> bytes:
     """
     Decode the stream data based on the specified filters.
 
@@ -787,19 +797,26 @@ def decode_stream_data(stream: Any) -> bytes:
         if isinstance(params, NullObject):
             params = {}
         if filter_name in (FT.ASCII_HEX_DECODE, FTA.AHx):
+            _deprecate_inline_image_filters(filter_name=filter_name, old_name=FTA.AHx, new_name=FT.ASCII_HEX_DECODE)
             data = ASCIIHexDecode.decode(data)
         elif filter_name in (FT.ASCII_85_DECODE, FTA.A85):
+            _deprecate_inline_image_filters(filter_name=filter_name, old_name=FTA.A85, new_name=FT.ASCII_85_DECODE)
             data = ASCII85Decode.decode(data)
         elif filter_name in (FT.LZW_DECODE, FTA.LZW):
+            _deprecate_inline_image_filters(filter_name=filter_name, old_name=FTA.LZW, new_name=FT.LZW_DECODE)
             data = LZWDecode.decode(data, params)
         elif filter_name in (FT.FLATE_DECODE, FTA.FL):
+            _deprecate_inline_image_filters(filter_name=filter_name, old_name=FTA.FL, new_name=FT.FLATE_DECODE)
             data = FlateDecode.decode(data, params)
         elif filter_name in (FT.RUN_LENGTH_DECODE, FTA.RL):
+            _deprecate_inline_image_filters(filter_name=filter_name, old_name=FTA.RL, new_name=FT.RUN_LENGTH_DECODE)
             data = RunLengthDecode.decode(data)
-        elif filter_name == FT.CCITT_FAX_DECODE:
+        elif filter_name in (FT.CCITT_FAX_DECODE, FTA.CCF):
+            _deprecate_inline_image_filters(filter_name=filter_name, old_name=FTA.CCF, new_name=FT.CCITT_FAX_DECODE)
             height = stream.get(IA.HEIGHT, ())
             data = CCITTFaxDecode.decode(data, params, height)
-        elif filter_name == FT.DCT_DECODE:
+        elif filter_name in (FT.DCT_DECODE, FTA.DCT):
+            _deprecate_inline_image_filters(filter_name=filter_name, old_name=FTA.DCT, new_name=FT.DCT_DECODE)
             data = DCTDecode.decode(data)
         elif filter_name == FT.JPX_DECODE:
             data = JPXDecode.decode(data)
diff --git a/tests/test_filters.py b/tests/test_filters.py
@@ -13,7 +13,7 @@
 import pytest
 from PIL import Image, ImageOps
 
-from pypdf import PdfReader
+from pypdf import PdfReader, PdfWriter
 from pypdf.errors import DependencyError, DeprecationError, LimitReachedError, PdfReadError, PdfStreamError
 from pypdf.filters import (
     ASCII85Decode,
@@ -24,6 +24,7 @@
     FlateDecode,
     JBIG2Decode,
     RunLengthDecode,
+    decode_stream_data,
     decompress,
 )
 from pypdf.generic import (
@@ -36,6 +37,7 @@
     NullObject,
     NumberObject,
     StreamObject,
+    TextStringObject,
 )
 
 from . import PILContext, get_data_from_url, get_image_data
@@ -902,3 +904,112 @@ def test_decompress__logging_on_invalid_data(caplog):
     assert len(encoded) > 5
     assert codec.decode(encoded[5:]) == b""
     assert caplog.messages == ["Error -3 while decompressing data: incorrect header check"]
+
+
+def test_ccittfaxdecode__ccf_inline():
+    writer = PdfWriter(clone_from=RESOURCE_ROOT / "jpeg.pdf")
+    page = writer.pages[0]
+    writer.remove_images()
+
+    image_data = (
+        b"\nBI\n  /W 16\n  /H 16\n  /CS /G\n  /BPC 1\n  /F [/CCF]\n"
+        b"  /DP [ << /K -1 /BlackIs1 false /Columns 16 /Rows 16 >> ]\nID\n"
+        b"&\xa0\xbf\xcc9\x14|G#\x1f\xff\xf1\xcc9\x18\xfe\xbbX\xfc\x00@\x04"
+        b"\nEI\n"
+    )
+    content_stream = page.get_contents()
+    content_stream.set_data(
+        content_stream.get_data().replace(b"/Im4 Do", b"").replace(b"\nET", image_data)
+    )
+    page.replace_contents(content_stream)
+
+    expected = PdfReader(RESOURCE_ROOT / "imagemagick-CCITTFaxDecode.pdf").pages[0].images[0].image
+    assert get_image_data(expected) == get_image_data(page.images[0].image)
+
+
+def test_dctdecode__dct_inline():
+    writer = PdfWriter(clone_from=RESOURCE_ROOT / "jpeg.pdf")
+    page = writer.pages[0]
+    writer.remove_images()
+
+    image_data = (
+        b"\nBI\n  /W 16\n  /H 16\n  /CS /G\n  /BPC 8\n  /F [/DCT]\nID\n"
+        b"\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x01,\x01,\x00\x00\xff\xfe\x00\x13Created with GIMP\xff\xe2"
+        b"\x02\xb0ICC_PROFILE\x00\x01\x01\x00\x00\x02\xa0lcms\x040\x00\x00mntrRGB XYZ \x07\xe6\x00\x04\x00\x0f\x00"
+        b"\t\x00\x1d\x007acspAPPL\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+        b"\x00\x00\x00\x00\x00\x00\xf6\xd6\x00\x01\x00\x00\x00\x00\xd3-lcms\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+        b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+        b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\rdesc\x00\x00\x01 \x00\x00\x00@cprt\x00\x00\x01`"
+        b"\x00\x00\x006wtpt\x00\x00\x01\x98\x00\x00\x00\x14chad\x00\x00\x01\xac\x00\x00\x00,rXYZ\x00\x00\x01\xd8"
+        b"\x00\x00\x00\x14bXYZ\x00\x00\x01\xec\x00\x00\x00\x14gXYZ\x00\x00\x02\x00\x00\x00\x00\x14rTRC\x00\x00"
+        b"\x02\x14\x00\x00\x00 gTRC\x00\x00\x02\x14\x00\x00\x00 bTRC\x00\x00\x02\x14\x00\x00\x00 chrm\x00\x00"
+        b"\x024\x00\x00\x00$dmnd\x00\x00\x02X\x00\x00\x00$dmdd\x00\x00\x02|\x00\x00\x00$mluc\x00\x00\x00\x00"
+        b"\x00\x00\x00\x01\x00\x00\x00\x0cenUS\x00\x00\x00$\x00\x00\x00\x1c\x00G\x00I\x00M\x00P\x00 \x00b\x00"
+        b"u\x00i\x00l\x00t\x00-\x00i\x00n\x00 \x00s\x00R\x00G\x00Bmluc\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00"
+        b"\x00\x0cenUS\x00\x00\x00\x1a\x00\x00\x00\x1c\x00P\x00u\x00b\x00l\x00i\x00c\x00 \x00D\x00o\x00m\x00a"
+        b"\x00i\x00n\x00\x00XYZ \x00\x00\x00\x00\x00\x00\xf6\xd6\x00\x01\x00\x00\x00\x00\xd3-sf32\x00\x00\x00"
+        b"\x00\x00\x01\x0cB\x00\x00\x05\xde\xff\xff\xf3%\x00\x00\x07\x93\x00\x00\xfd\x90\xff\xff\xfb\xa1\xff"
+        b"\xff\xfd\xa2\x00\x00\x03\xdc\x00\x00\xc0nXYZ \x00\x00\x00\x00\x00\x00o\xa0\x00\x008\xf5\x00\x00\x03"
+        b"\x90XYZ \x00\x00\x00\x00\x00\x00$\x9f\x00\x00\x0f\x84\x00\x00\xb6\xc4XYZ \x00\x00\x00\x00\x00\x00b"
+        b"\x97\x00\x00\xb7\x87\x00\x00\x18\xd9para\x00\x00\x00\x00\x00\x03\x00\x00\x00\x02ff\x00\x00\xf2\xa7"
+        b"\x00\x00\rY\x00\x00\x13\xd0\x00\x00\n[chrm\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\xa3\xd7\x00\x00T|"
+        b"\x00\x00L\xcd\x00\x00\x99\x9a\x00\x00&g\x00\x00\x0f\\mluc\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00"
+        b"\x00\x0cenUS\x00\x00\x00\x08\x00\x00\x00\x1c\x00G\x00I\x00M\x00Pmluc\x00\x00\x00\x00\x00\x00\x00"
+        b"\x01\x00\x00\x00\x0cenUS\x00\x00\x00\x08\x00\x00\x00\x1c\x00s\x00R\x00G\x00B\xff\xdb\x00C\x00\x01"
+        b"\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
+        b"\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
+        b"\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\xff\xc0\x00\x0b\x08\x00\x10\x00\x10"
+        b"\x01\x01\x11\x00\xff\xc4\x00\x17\x00\x00\x03\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+        b"\x00\x06\x07\x08\n\xff\xc4\x00\x1d\x10\x00\x03\x00\x03\x01\x01\x01\x01\x00\x00\x00\x00\x00\x00"
+        b"\x00\x00\x05\x06\x07\x01\x04\x08\x02\x03\x13\x15\xff\xda\x00\x08\x01\x01\x00\x00?\x00\xc4D\x0eA"
+        b"\x8e\x91\xa8\xf3\xcf5N\xb5\x7f\x87k\xbc_\x96\xe3\x83]\x9c\\\xff\x00\x19f1^=:A\x98jm.\x03\x9f\x10"
+        b"mW\xc2\xcbYF\xd2T\x06\xef,OXfX`^\x18\x0ez\xb4U \x91\x17\xd4\xf6\xbe\xc2\xb7\x85s:{\xa1\x8f\xec;}"
+        b"\x8f-l/1|\x19\x86|\x14\xc5+j\x8cm\xf0\xde\x10\xba\x7f\xa5=\xe2\x86\xd8\x18\r\xed$o\xab2h\xbc\xad"
+        b"\x8cS\x18\xba\xd8,\xb2\xa3\xbf\xd9\xd8I\x84+\x07\x9d\x1ay\x1cr\xba\x81\nu\x0f\xa7yk\xa0%5\xf2\xf4"
+        b"\xf4\x9e\x8d\xe6\x19\x90+s;P\xfd\xd1\xb3\x8f\xac\xf8\x0e@5\xf5\x8f(i\xc3\x0e\xf3\xd3\xbc\xf5\xa5"
+        b"\xed:\x85<$\xee\xd1@%i\xde\x1ao\xdaF$\t?Vq\xce\x92\xde\xe1\xbd\x14H\x8a'\"\x8d\xbf75\xaef\x90\xc3|"
+        b"\xe8~\x82\x04\xab+3O.\xdeX&\xac\xf2t\x89\xcf\xd3\xfa\x85\xbdFu=\x8e*\xa9\xfb!\x96\xed\xfa\xe3S\xe5A"
+        b"\xf2\xa8\xf5\xe8\xd7\x85\xa5\x05\t\xf8a\xff\x00\xff\xd9"
+        b"\nEI\n"
+    )
+    content_stream = page.get_contents()
+    content_stream.set_data(
+        content_stream.get_data().replace(b"/Im4 Do", b"").replace(b"\nET", image_data)
+    )
+    page.replace_contents(content_stream)
+
+    expected = PdfReader(RESOURCE_ROOT / "imagemagick-images.pdf").pages[3].images[0].image
+    assert get_image_data(expected) == get_image_data(page.images[0].image)
+
+
+def test_deprecate_inline_image_filters():
+    stream = ContentStream(stream=None, pdf=None)
+    stream.set_data(b"&\xa0\xbf\xcc9\x14|G#\x1f\xff\xf1\xcc9\x18\xfe\xbbX\xfc\x00@\x04")
+
+    # The abbreviations do not work here, which is one of the reasons for the deprecation.
+    stream[NameObject("/Width")] = NumberObject(16)
+    stream[NameObject("/Height")] = NumberObject(16)
+    stream[NameObject("/ColorSpace")] = NameObject("/DeviceGray")
+    stream[NameObject("/BitsPerComponent")] = NumberObject(1)
+    stream[NameObject("/Filter")] = NameObject("/CCF")
+    stream[NameObject("/DecodeParams")] = ArrayObject(
+        [
+            DictionaryObject(
+                {
+                    NameObject("/K"): NumberObject(-1),
+                    NameObject("/BlackIs1"): TextStringObject("false"),
+                    NameObject("/Columns"): NumberObject(16),
+                    NameObject("/Rows"): NumberObject(16),
+                }
+            )
+        ]
+    )
+
+    with pytest.warns(
+            expected_warning=DeprecationWarning,
+            match=r"^The filter name /CCF is deprecated and will be removed in pypdf 7\.0\.0\. Use /CCITTFaxDecode instead\.$"  # noqa: E501
+    ):
+        decode_stream_data(stream)
+
+    stream[NameObject("/Filter")] = NameObject("/CCITTFaxDecode")
+    assert decode_stream_data(stream).startswith(b"II*")