PaddlePaddle · Copilot · Oct 14, 2025 · Oct 14, 2025 · Oct 14, 2025
diff --git a/docs/version3.x/pipeline_usage/seal_recognition.en.md b/docs/version3.x/pipeline_usage/seal_recognition.en.md
@@ -699,6 +699,23 @@ devanagari_PP-OCRv3_mobile_rec_infer.tar">Inference Model</a>/<a href="https://p
 <br />
 <b>If you are more concerned with model accuracy, please choose a model with higher accuracy. If you are more concerned with inference speed, please choose a model with faster inference speed. If you are more concerned with model storage size, please choose a model with smaller storage size</b>.
 
+## 1.1 Known Issues
+
+⚠️ **Multi-page PDF Processing Issue (PaddleX v3.2.0/v3.2.1)**
+
+If you are using PaddleX v3.2.0 or v3.2.1, you may encounter an `IndexError: list index out of range` error when processing multi-page PDF files. This is a known bug in PaddleX that has been fixed in subsequent versions.
+
+**Solutions:**
+
+1. **Recommended**: Install the PaddleX version with the fix
+   ```bash
+   pip install 'git+https://github.com/PaddlePaddle/PaddleX.git@release/3.2#egg=paddlex[ocr-core]'
+   ```
+
+2. **Temporary workaround**: Split multi-page PDFs into single-page PDFs and process them separately
+
+Related link: [PaddleX Fix Commit](https://github.com/PaddlePaddle/PaddleX/commit/bdcc1f7dc)
+
 
 ## 2. Quick Start
 

diff --git a/docs/version3.x/pipeline_usage/seal_recognition.md b/docs/version3.x/pipeline_usage/seal_recognition.md
@@ -700,7 +700,24 @@ devanagari_PP-OCRv3_mobile_rec_infer.tar">推理模型</a>/<a href="https://padd
 </details>
 
 <br />
-<b>如您更考虑模型精度，请选择精度较高的模型，如您更考虑模型推理速度，请选择推理速度较快的模型，如您更考虑模型存储大小，请选择存储大小较小的模型</b>。
+<b>如您更考虑模型精度，请选择精度较高的模型,如您更考虑模型推理速度，请选择推理速度较快的模型，如您更考虑模型存储大小，请选择存储大小较小的模型</b>。
+
+## 1.1 已知问题
+
+⚠️ **多页PDF处理问题 (PaddleX v3.2.0/v3.2.1)**
+
+如果您使用 PaddleX v3.2.0 或 v3.2.1 版本，在处理多页PDF文件时可能会遇到 `IndexError: list index out of range` 错误。这是 PaddleX 的一个已知bug，已在后续版本中修复。
+
+**解决方案：**
+
+1. **推荐方式**：安装包含修复的 PaddleX 版本
+   ```bash
+   pip install 'git+https://github.com/PaddlePaddle/PaddleX.git@release/3.2#egg=paddlex[ocr-core]'
+   ```
+
+2. **临时方案**：将多页PDF拆分为单页PDF分别处理
+
+相关链接：[PaddleX修复提交](https://github.com/PaddlePaddle/PaddleX/commit/bdcc1f7dc)
 
 ## 2. 快速开始
 

diff --git a/paddleocr/_pipelines/seal_recognition.py b/paddleocr/_pipelines/seal_recognition.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import warnings
+
 from .._utils.cli import (
     add_simple_inference_args,
     get_subcommand_args,
@@ -51,6 +53,8 @@ def __init__(
         seal_rec_score_thresh=None,
         **kwargs,
     ):
+        # Check for known PaddleX bugs and warn users
+        self._check_paddlex_version()
 
         self._params = {
             "doc_orientation_classify_model_name": doc_orientation_classify_model_name,
@@ -80,6 +84,33 @@ def __init__(
         }
         super().__init__(**kwargs)
 
+    def _check_paddlex_version(self):
+        """Check for known PaddleX bugs and warn users"""
+        try:
+            import paddlex
+            from packaging.version import parse
+
+            paddlex_version = parse(paddlex.__version__)
+
+            # Check for the multi-page PDF bug (fixed in commit bdcc1f7dc, not yet released)
+            if parse("3.2.0") <= paddlex_version <= parse("3.2.1"):
+                warnings.warn(
+                    f"\nDetected PaddleX version {paddlex.__version__} which contains a known bug "
+                    "that causes 'IndexError: list index out of range' when processing multi-page PDFs "
+                    "with seal recognition enabled.\n\n"
+                    "This bug has been fixed in PaddleX but not yet released. "
+                    "If you encounter this error, you have two options:\n"
+                    "1. Install the fixed version from GitHub:\n"
+                    "   pip install 'git+https://github.com/PaddlePaddle/PaddleX.git@release/3.2#egg=paddlex[ocr-core]'\n"
+                    "2. Process single-page PDFs only, or extract pages individually.\n\n"
+                    "For more details, see: https://github.com/PaddlePaddle/PaddleX/commit/bdcc1f7dc",
+                    UserWarning,
+                    stacklevel=3,
+                )
+        except (ImportError, AttributeError):
+            # PaddleX not installed yet or version not available
+            pass
+
     @property
     def _paddlex_pipeline_name(self):
         return "seal_recognition"
@@ -104,24 +135,43 @@ def predict_iter(
         seal_rec_score_thresh=None,
         **kwargs,
     ):
-        return self.paddlex_pipeline.predict(
-            input,
-            use_doc_orientation_classify=use_doc_orientation_classify,
-            use_doc_unwarping=use_doc_unwarping,
-            use_layout_detection=use_layout_detection,
-            layout_det_res=layout_det_res,
-            layout_threshold=layout_threshold,
-            layout_nms=layout_nms,
-            layout_unclip_ratio=layout_unclip_ratio,
-            layout_merge_bboxes_mode=layout_merge_bboxes_mode,
-            seal_det_limit_side_len=seal_det_limit_side_len,
-            seal_det_limit_type=seal_det_limit_type,
-            seal_det_thresh=seal_det_thresh,
-            seal_det_box_thresh=seal_det_box_thresh,
-            seal_det_unclip_ratio=seal_det_unclip_ratio,
-            seal_rec_score_thresh=seal_rec_score_thresh,
-            **kwargs,
-        )
+        try:
+            yield from self.paddlex_pipeline.predict(
+                input,
+                use_doc_orientation_classify=use_doc_orientation_classify,
+                use_doc_unwarping=use_doc_unwarping,
+                use_layout_detection=use_layout_detection,
+                layout_det_res=layout_det_res,
+                layout_threshold=layout_threshold,
+                layout_nms=layout_nms,
+                layout_unclip_ratio=layout_unclip_ratio,
+                layout_merge_bboxes_mode=layout_merge_bboxes_mode,
+                seal_det_limit_side_len=seal_det_limit_side_len,
+                seal_det_limit_type=seal_det_limit_type,
+                seal_det_thresh=seal_det_thresh,
+                seal_det_box_thresh=seal_det_box_thresh,
+                seal_det_unclip_ratio=seal_det_unclip_ratio,
+                seal_rec_score_thresh=seal_rec_score_thresh,
+                **kwargs,
+            )
+        except IndexError as e:
+            # Check if this is the known multi-page PDF bug
+            if "list index out of range" in str(e):
+                import paddlex
+                from packaging.version import parse
+
+                paddlex_version = parse(paddlex.__version__)
+                if parse("3.2.0") <= paddlex_version <= parse("3.2.1"):
+                    raise RuntimeError(
+                        f"Encountered a known bug in PaddleX {paddlex.__version__} when processing multi-page PDFs "
+                        "with seal recognition.\n\n"
+                        "To fix this issue, please install the fixed version:\n"
+                        "  pip install 'git+https://github.com/PaddlePaddle/PaddleX.git@release/3.2#egg=paddlex[ocr-core]'\n\n"
+                        "Alternatively, process single-page PDFs only.\n\n"
+                        "For more details, see: https://github.com/PaddlePaddle/PaddleX/commit/bdcc1f7dc"
+                    ) from e
+            # Re-raise if it's a different error
+            raise
 
     def predict(
         self,

diff --git a/tests/pipelines/test_seal_rec.py b/tests/pipelines/test_seal_rec.py
@@ -68,3 +68,38 @@ def test_predict_params(
         "dummy_path",
         params,
     )
+
+
+def test_paddlex_version_warning(monkeypatch) -> None:
+    """
+    Test that a warning is issued for PaddleX versions with the multi-page PDF bug.
+    """
+    import warnings
+
+    # Mock paddlex to simulate version 3.2.0
+    class MockPaddleX:
+        __version__ = "3.2.0"
+
+    monkeypatch.setitem(__import__("sys").modules, "paddlex", MockPaddleX())
+
+    # Test that a warning is issued during initialization
+    with warnings.catch_warnings(record=True) as w:
+        warnings.simplefilter("always")
+        # Import seal_recognition here to trigger the warning
+        from paddleocr._pipelines.seal_recognition import SealRecognition
+
+        # Create instance which should trigger the version check
+        try:
+            _ = SealRecognition()
+        except Exception:
+            # Initialization might fail due to missing PaddleX modules, but that's OK
+            # We're just testing if the warning is issued
+            pass
+
+        # Check that a warning was issued
+        assert len(w) >= 1
+        assert any(
+            "PaddleX version" in str(warning.message)
+            and "multi-page PDFs" in str(warning.message)
+            for warning in w
+        ), f"Expected warning about PaddleX version bug, got: {[str(warning.message) for warning in w]}"