update mlx-vlm version and vl docs (#17605) (#17608)

zhang-prog · web-flow · commit 3eb3ad9c5a62 · 2026-02-02T15:28:46.000+08:00
* update mlx-vlm version

* update

* update

* update vl docs
diff --git a/docs/version3.x/pipeline_usage/PaddleOCR-VL-Apple-Silicon.en.md b/docs/version3.x/pipeline_usage/PaddleOCR-VL-Apple-Silicon.en.md
@@ -48,9 +48,8 @@ The inference performance under default configurations is not fully optimized an
 Install the MLX-VLM inference framework:
 
 ```shell
-git clone https://github.com/Blaizzy/mlx-vlm.git
-cd mlx-vlm
-pip install -e .
+python -m pip install -U mlx-vlm
+python -m pip install "transformers<5.0.0"
 ```
 
 Start the MLX-VLM inference service:
diff --git a/docs/version3.x/pipeline_usage/PaddleOCR-VL-Apple-Silicon.md b/docs/version3.x/pipeline_usage/PaddleOCR-VL-Apple-Silicon.md
@@ -48,9 +48,8 @@ python -m pip install -U "paddleocr[doc-parser]"
 安装 MLX-VLM 推理框架：
 
 ```shell
-git clone https://github.com/Blaizzy/mlx-vlm.git
-cd mlx-vlm
-pip install -e .
+python -m pip install -U mlx-vlm
+python -m pip install "transformers<5.0.0"
 ```
 
 启动 MLX-VLM 推理服务：
diff --git a/docs/version3.x/pipeline_usage/PaddleOCR-VL.en.md b/docs/version3.x/pipeline_usage/PaddleOCR-VL.en.md
@@ -238,6 +238,12 @@ paddleocr doc_parser -i ./paddleocr_vl_demo.png --device dcu
 # MetaX GPU
 paddleocr doc_parser -i ./paddleocr_vl_demo.png --device metax_gpu
 
+# Apple Silicon
+paddleocr doc_parser -i ./paddleocr_vl_demo.png --device cpu
+
+# Huawei Ascend NPU 
+# Huawei Ascend NPU please refer to Chapter 3 for inference using PaddlePaddle + vLLM
+
 # Use --use_doc_orientation_classify to enable document orientation classification
 paddleocr doc_parser -i ./paddleocr_vl_demo.png --use_doc_orientation_classify True
 
@@ -654,6 +660,10 @@ pipeline = PaddleOCRVL()
 # pipeline = PaddleOCRVL(device="dcu")
 # MetaX GPU
 # pipeline = PaddleOCRVL(device="metax_gpu")
+# Apple Silicon
+# pipeline = PaddleOCRVL(device="cpu")
+# Huawei Ascend NPU 
+# Huawei Ascend NPU please refer to Chapter 3 for inference using PaddlePaddle + vLLM
 
 # pipeline = PaddleOCRVL(use_doc_orientation_classify=True) # Use use_doc_orientation_classify to enable/disable document orientation classification model
 # pipeline = PaddleOCRVL(use_doc_unwarping=True) # Use use_doc_unwarping to enable/disable document unwarping module
@@ -666,21 +676,14 @@ for res in output:
     res.save_to_markdown(save_path="output") ## Save the current image's result in Markdown format
 ```
 
-For PDF files, each page will be processed individually, and a separate Markdown file will be generated for each page. If you wish to perform cross-page table merging, reconstruct multi-level labels, or merge multi-page results, you can achieve this using the following method:
+For PDF files, each page will be processed individually, and a separate Markdown file will be generated for each page. If you wish to perform cross-page table merging, reconstruct multi-level headings, or merge multi-page results, you can achieve this using the following method:
 
 ```python
 from paddleocr import PaddleOCRVL
 
 input_file = "./your_pdf_file.pdf"
 
-# NVIDIA GPU
 pipeline = PaddleOCRVL()
-# KUNLUNXIN XPU
-# pipeline = PaddleOCRVL(device="xpu")
-# HYGON DCU
-# pipeline = PaddleOCRVL(device="dcu")
-# MetaX GPU
-# pipeline = PaddleOCRVL(device="metax_gpu")
 
 output = pipeline.predict(input=input_file)
 
diff --git a/docs/version3.x/pipeline_usage/PaddleOCR-VL.md b/docs/version3.x/pipeline_usage/PaddleOCR-VL.md
@@ -240,6 +240,12 @@ paddleocr doc_parser -i ./paddleocr_vl_demo.png --device dcu
 # 沐曦 GPU
 paddleocr doc_parser -i ./paddleocr_vl_demo.png --device metax_gpu
 
+# Apple Silicon
+paddleocr doc_parser -i ./paddleocr_vl_demo.png --device cpu
+
+# 华为昇腾 NPU 
+# 华为昇腾 NPU 请参考第 3 章节使用 PaddlePaddle + vLLM 的方式进行推理
+
 # 通过 --use_doc_orientation_classify 指定是否使用文档方向分类模型
 paddleocr doc_parser -i ./paddleocr_vl_demo.png --use_doc_orientation_classify True
 
@@ -632,6 +638,10 @@ pipeline = PaddleOCRVL()
 # pipeline = PaddleOCRVL(device="dcu")
 # 沐曦 GPU
 # pipeline = PaddleOCRVL(device="metax_gpu")
+# Apple Silicon
+# pipeline = PaddleOCRVL(device="cpu")
+# 华为昇腾 NPU 
+# 华为昇腾 NPU 请参考第 3 章节使用 PaddlePaddle + vLLM 的方式进行推理
 
 # pipeline = PaddleOCRVL(use_doc_orientation_classify=True) # 通过 use_doc_orientation_classify 指定是否使用文档方向分类模型
 # pipeline = PaddleOCRVL(use_doc_unwarping=True) # 通过 use_doc_unwarping 指定是否使用文本图像矫正模块
@@ -644,22 +654,15 @@ for res in output:
     res.save_to_markdown(save_path="output") ## 保存当前图像的markdown格式的结果
 ```
 
-如果是 PDF 文件，会将 PDF 的每一页单独处理，每一页的 Markdown 文件也会对应单独的结果。如果您希望对多页的推理结果进行跨页表格合并、重建多级标和合并多页结果等需求，可以通过如下方式实现：
+如果是 PDF 文件，会将 PDF 的每一页单独处理，每一页的 Markdown 文件也会对应单独的结果。如果您希望对多页的推理结果进行跨页表格合并、重建多级标题和合并多页结果等需求，可以通过如下方式实现：
 
 ```python
 from paddleocr import PaddleOCRVL
 
 input_file = "./your_pdf_file.pdf"
 output_path = Path("./output")
 
-# 英伟达 GPU
 pipeline = PaddleOCRVL()
-# 昆仑芯 XPU
-# pipeline = PaddleOCRVL(device="xpu")
-# 海光 DCU
-# pipeline = PaddleOCRVL(device="dcu")
-# 沐曦 GPU
-# pipeline = PaddleOCRVL(device="metax_gpu")
 
 output = pipeline.predict(input=input_file)
 
@@ -671,7 +674,6 @@ output = pipeline.restructure_pages(pages_res)
 # output = pipeline.restructure_pages(pages_res, merge_table=True, relevel_titles=True) # 合并跨页表格，重建多级标题
 # output = pipeline.restructure_pages(pages_res, merge_table=True, relevel_titles=True, merge_pages=True) # 合并跨页表格，重建多级标题，合并多页结果为一页
 
-
 for res in output:
     res.print() ## 打印预测的结构化输出
     res.save_to_json(save_path="output") ## 保存当前图像的结构化json结果
@@ -691,19 +693,6 @@ output = pipeline.predict(["imgs/file1.png", "imgs/file2.png", "imgs/file3.png"]
 #     output = pipeline.predict(file)
 ```
 
-如果您需要处理多个文件，**建议将包含文件的目录路径，或者文件路径列表传入 `predict` 方法**，以最大化处理效率。例如：
-
-```python
-# `imgs` 目录中包含多张待处理图像：file1.png、file2.png、file3.png
-# 传入目录路径
-output = pipeline.predict("imgs")
-# 或者传入文件路径列表
-output = pipeline.predict(["imgs/file1.png", "imgs/file2.png", "imgs/file3.png"])
-# 以上两种方式的处理效率高于下列方式：
-# for file in ["imgs/file1.png", "imgs/file2.png", "imgs/file3.png"]:
-#     output = pipeline.predict(file)
-```
-
 **注：**
 
 - 在示例代码中，`use_doc_orientation_classify`、`use_doc_unwarping` 参数默认均设置为 `False`，分别表示关闭文档方向分类、文本图像矫正功能，如果需要使用这些功能，可以手动设置为 `True`。