PaddlePaddle
diff --git a/‎configs/rec/PP-FormuaNet/PP-FormulaNet_plus-L.yaml‎
Lines changed: 122 additions & 0 deletions b/‎configs/rec/PP-FormuaNet/PP-FormulaNet_plus-L.yaml‎
Lines changed: 122 additions & 0 deletions
diff --git a/‎configs/rec/PP-FormuaNet/PP-FormulaNet_plus-M.yaml‎
Lines changed: 119 additions & 0 deletions b/‎configs/rec/PP-FormuaNet/PP-FormulaNet_plus-M.yaml‎
Lines changed: 119 additions & 0 deletions
diff --git a/‎configs/rec/PP-FormuaNet/PP-FormulaNet_plus-S.yaml‎
Lines changed: 120 additions & 0 deletions b/‎configs/rec/PP-FormuaNet/PP-FormulaNet_plus-S.yaml‎
Lines changed: 120 additions & 0 deletions
diff --git a/‎docs/algorithm/formula_recognition/algorithm_rec_ppformulanet.md‎
Lines changed: 3 additions & 0 deletions b/‎docs/algorithm/formula_recognition/algorithm_rec_ppformulanet.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/algorithm/formula_recognition/algorithm_rec_ppformulanet_en.md‎
Lines changed: 3 additions & 0 deletions b/‎docs/algorithm/formula_recognition/algorithm_rec_ppformulanet_en.md‎
Lines changed: 3 additions & 0 deletions
@@ -0,0 +1,122 @@
+Global:
+  model_name: PP-FormulaNet_plus-L # To use static model for inference.
+  use_gpu: True
+  epoch_num: 10
+  log_smooth_window: 10
+  print_batch_step: 10
+  save_model_dir: ./output/rec/pp_formulanet_plus_l/
+  save_epoch_step: 2
+  # evaluation is run every  417  iterations (1 epoch)(batch_size = 24)   # max_seq_len: 1024
+  eval_batch_step: [0,  417 ]
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/datasets/pme_demo/0000013.png
+  infer_mode: False
+  use_space_char: False
+  rec_char_dict_path: &rec_char_dict_path ppocr/utils/dict/unimernet_tokenizer
+  max_new_tokens: &max_new_tokens 2560
+  input_size: &input_size [768, 768]
+  save_res_path: ./output/rec/predicts_pp_formulanet_plus_l.txt
+  allow_resize_largeImg: False
+  start_ema: True
+  d2s_train_image_shape: [1,768,768]
+
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  weight_decay: 0.05
+  lr:
+    name: LinearWarmupCosine
+    learning_rate: 0.0001
+
+Architecture:
+  model_type: rec
+  algorithm: PP-FormulaNet_plus-L
+  in_channels: 3
+  Transform:
+  Backbone:
+    name: Vary_VIT_B_Formula
+    image_size: 768 
+    encoder_embed_dim: 768
+    encoder_depth: 12
+    encoder_num_heads: 12
+    encoder_global_attn_indexes: [2, 5, 8, 11]
+  Head:
+    name: PPFormulaNet_Head
+    max_new_tokens: *max_new_tokens
+    decoder_start_token_id: 0
+    decoder_ffn_dim: 2048
+    decoder_hidden_size: 512
+    decoder_layers: 8
+    temperature: 0.2
+    do_sample: False
+    top_p: 0.95 
+    encoder_hidden_size: 1024
+    is_export: False
+    length_aware: False 
+    use_parallel: False
+    parallel_step: 0
+
+Loss:
+  name: PPFormulaNet_L_Loss
+
+PostProcess:
+  name:  UniMERNetDecode
+  rec_char_dict_path:  *rec_char_dict_path
+
+Metric:
+  name: LaTeXOCRMetric
+  main_indicator:  exp_rate
+  cal_bleu_score: True
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./ocr_rec_latexocr_dataset_example
+    label_file_list: ["./ocr_rec_latexocr_dataset_example/train.txt"]
+    transforms:
+      - UniMERNetImgDecode:
+          input_size: *input_size
+          random_padding: True
+          random_resize:  True
+          random_crop: True 
+      - UniMERNetTrainTransform: 
+      - LatexImageFormat:
+      - UniMERNetLabelEncode:
+          rec_char_dict_path: *rec_char_dict_path
+          max_seq_len:  *max_new_tokens
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'attention_mask']
+
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 3
+    num_workers: 0
+    collate_fn: UniMERNetCollator
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./ocr_rec_latexocr_dataset_example
+    label_file_list: ["./ocr_rec_latexocr_dataset_example/val.txt"]
+    transforms:
+      - UniMERNetImgDecode:
+          input_size: *input_size
+      - UniMERNetTestTransform:
+      - LatexImageFormat:
+      - UniMERNetLabelEncode:
+          max_seq_len:  *max_new_tokens
+          rec_char_dict_path: *rec_char_dict_path
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'attention_mask', 'filename']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 10
+    num_workers: 0
+    collate_fn: UniMERNetCollator
@@ -0,0 +1,119 @@
+Global:
+  model_name: PP-FormulaNet_plus-M # To use static model for inference.
+  use_gpu: True
+  epoch_num: 20
+  log_smooth_window: 10
+  print_batch_step: 10
+  save_model_dir: ./output/rec/pp_formulanet_plus_m/
+  save_epoch_step: 2
+  # evaluation is run every 179 iterations (1 epoch)(batch_size = 56)   # max_seq_len: 1024
+  eval_batch_step: [0, 179]
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/datasets/pme_demo/0000013.png
+  infer_mode: False
+  use_space_char: False
+  rec_char_dict_path: &rec_char_dict_path  ppocr/utils/dict/unimernet_tokenizer
+  max_new_tokens: &max_new_tokens 2560
+  input_size: &input_size [384, 384]
+  save_res_path: ./output/rec/predicts_pp_formulanet_plus_m.txt
+  allow_resize_largeImg: False
+  start_ema: True
+  d2s_train_image_shape: [1,384,384]
+
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  weight_decay: 0.05
+  lr:
+    name: LinearWarmupCosine
+    learning_rate: 0.0001
+
+Architecture:
+  model_type: rec
+  algorithm: PP-FormulaNet_plus-M
+  in_channels: 3
+  Transform:
+  Backbone:
+    name: PPHGNetV2_B6_Formula
+    class_num: 1024
+
+  Head:
+    name: PPFormulaNet_Head
+    max_new_tokens: *max_new_tokens
+    decoder_start_token_id: 0
+    decoder_ffn_dim: 2048
+    decoder_hidden_size: 512
+    decoder_layers: 6
+    temperature: 0.2
+    do_sample: False
+    top_p: 0.95 
+    encoder_hidden_size: 2048
+    is_export: False
+    length_aware: False 
+    use_parallel: False
+    parallel_step: 0
+
+Loss:
+  name: PPFormulaNet_L_Loss
+
+PostProcess:
+  name:  UniMERNetDecode
+  rec_char_dict_path: *rec_char_dict_path
+
+Metric:
+  name: LaTeXOCRMetric
+  main_indicator:  exp_rate
+  cal_bleu_score: True
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./ocr_rec_latexocr_dataset_example
+    label_file_list: ["./ocr_rec_latexocr_dataset_example/train.txt"]
+    transforms:
+      - UniMERNetImgDecode:
+          input_size: *input_size
+          random_padding: True
+          random_resize:  True
+          random_crop: True 
+      - UniMERNetTrainTransform: 
+      - LatexImageFormat:
+      - UniMERNetLabelEncode:
+          rec_char_dict_path: *rec_char_dict_path
+          max_seq_len: *max_new_tokens
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'attention_mask']
+
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 14
+    num_workers: 0
+    collate_fn: UniMERNetCollator
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./ocr_rec_latexocr_dataset_example
+    label_file_list: ["./ocr_rec_latexocr_dataset_example/val.txt"]
+    transforms:
+      - UniMERNetImgDecode:
+          input_size:  *input_size
+      - UniMERNetTestTransform:
+      - LatexImageFormat:
+      - UniMERNetLabelEncode:
+          max_seq_len: *max_new_tokens
+          rec_char_dict_path: *rec_char_dict_path
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'attention_mask', 'filename']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 30
+    num_workers: 0
+    collate_fn: UniMERNetCollator
@@ -0,0 +1,120 @@
+Global:
+  model_name: PP-FormulaNet_plus-S # To use static model for inference.
+  use_gpu: True
+  epoch_num: 20
+  log_smooth_window: 10
+  print_batch_step: 10
+  save_model_dir: ./output/rec/pp_formulanet_plus_s/
+  save_epoch_step: 2
+  # evaluation is run every 179 iterations (1 epoch)(batch_size = 56)   # max_seq_len: 1024
+  eval_batch_step: [0, 179]
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/datasets/pme_demo/0000013.png
+  infer_mode: False
+  use_space_char: False
+  rec_char_dict_path: &rec_char_dict_path  ppocr/utils/dict/unimernet_tokenizer
+  max_new_tokens: &max_new_tokens 1024
+  input_size: &input_size [384, 384]
+  save_res_path: ./output/rec/predicts_pp_formulanet_plus_s.txt
+  allow_resize_largeImg: False
+  start_ema: True
+  d2s_train_image_shape: [1,384,384]
+
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  weight_decay: 0.05
+  lr:
+    name: LinearWarmupCosine
+    learning_rate: 0.0001
+
+Architecture:
+  model_type: rec
+  algorithm: PP-FormulaNet_plus-S
+  in_channels: 3
+  Transform:
+  Backbone:
+    name: PPHGNetV2_B4_Formula
+    class_num: 1024
+
+  Head:
+    name: PPFormulaNet_Head
+    max_new_tokens:  *max_new_tokens
+    decoder_start_token_id: 0
+    decoder_ffn_dim: 1536
+    decoder_hidden_size: 384
+    decoder_layers: 2
+    temperature: 0.2
+    do_sample: False
+    top_p: 0.95 
+    encoder_hidden_size: 2048
+    is_export: False
+    length_aware: True 
+    use_parallel: True,
+    parallel_step: 3
+
+Loss:
+  name: PPFormulaNet_S_Loss
+  parallel_step: 3
+
+PostProcess:
+  name:  UniMERNetDecode
+  rec_char_dict_path: *rec_char_dict_path
+
+Metric:
+  name: LaTeXOCRMetric
+  main_indicator:  exp_rate
+  cal_bleu_score: True
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./ocr_rec_latexocr_dataset_example
+    label_file_list: ["./ocr_rec_latexocr_dataset_example/train.txt"]
+    transforms:
+      - UniMERNetImgDecode:
+          input_size: *input_size
+          random_padding: True
+          random_resize:  True
+          random_crop: True 
+      - UniMERNetTrainTransform: 
+      - LatexImageFormat:
+      - UniMERNetLabelEncode:
+          rec_char_dict_path: *rec_char_dict_path
+          max_seq_len: *max_new_tokens
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'attention_mask']
+
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 14
+    num_workers: 0
+    collate_fn: UniMERNetCollator
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./ocr_rec_latexocr_dataset_example
+    label_file_list: ["./ocr_rec_latexocr_dataset_example/val.txt"]
+    transforms:
+      - UniMERNetImgDecode:
+          input_size:  *input_size
+      - UniMERNetTestTransform:
+      - LatexImageFormat:
+      - UniMERNetLabelEncode:
+          max_seq_len: *max_new_tokens
+          rec_char_dict_path: *rec_char_dict_path
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'attention_mask', 'filename']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 30
+    num_workers: 0
+    collate_fn: UniMERNetCollator
@@ -11,6 +11,9 @@
 | UniMERNet | Donut Swin | [UniMERNet.yaml](../../../configs/rec/UniMERNet.yaml) |     0.9187  |    0.9252       | 0.8658  |    0.8228   | 0.7740 |     0.8613        |[训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_unimernet_train.tar)|
 | PP-FormulaNet-S | PPHGNetV2_B4 | [PP-FormulaNet-S.yaml](../../../configs/rec/PP-FormuaNet/PP-FormulaNet-S.yaml) |    0.8694   |    0.8071       | 0.9294  |    0.9112    | 0.8391 |    0.8712       |[训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_ppformulanet_s_train.tar)|
 | PP-FormulaNet-L | Vary_VIT_B | [PP-FormulaNet-L.yaml](../../../configs/rec/PP-FormuaNet/PP-FormulaNet-L.yaml) |     0.9055   |     0.9206       | 0.9392  |     0.9273    | 0.9141 |     0.9213         |[训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_ppformulanet_l_train.tar )|
+| PP-FormulaNet_plus-S | PPHGNetV2_B4 | [PP-FormulaNet_plus-S.yaml](../../../configs/rec/PP-FormuaNet/PP-FormulaNet_plus-S.yaml) |     -   |     -       | -  |     -    | - |     -         |[训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_ppformulanet_plus_s_train.tar )|
+| PP-FormulaNet_plus-M | PPHGNetV2_B6 | [PP-FormulaNet_plus-M.yaml](../../../configs/rec/PP-FormuaNet/PP-FormulaNet_plus-M.yaml) |     -   |     -       | -  |     -    | - |     -         |[训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_ppformulanet_plus_m_train.tar )|
+| PP-FormulaNet_plus-L | Vary_VIT_B | [PP-FormulaNet_plus-L.yaml](../../../configs/rec/PP-FormuaNet/PP-FormulaNet_plus-L.yaml) |     -   |     -       | -  |     -    | - |     -         |[训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_ppformulanet_plus_l_train.tar )|
 
 其中，SPE、CPE为UniMERNet的简单公式数据集和复杂公式数据集；Easy、Middle、Hard为PaddleX内部自建的简单公式数据集（LaTeX 代码长度 0-64）、中等公式数据集（LaTeX 代码长度  64-256）和复杂公式数据集（LaTeX 代码长度  256+）。
 
 
@@ -10,6 +10,9 @@ PP-FormulaNet is a formula recognition model independently developed by Baidu Pa
 | UniMERNet | Donut Swin | [UniMERNet.yaml](../../../configs/rec/UniMERNet.yaml) |     0.9187  |    0.9252       | 0.8658  |    0.8228   | 0.7740 |     0.8613        |[trained model](https://paddleocr.bj.bcebos.com/contribution/rec_unimernet_train.tar)|
 | PP-FormulaNet-S | PPHGNetV2_B4 | [PP-FormulaNet-S.yaml](../../../configs/rec/PP-FormuaNet/PP-FormulaNet-S.yaml) |    0.8694   |    0.8071       | 0.9294  |    0.9112    | 0.8391 |    0.8712       |[trained model](https://paddleocr.bj.bcebos.com/contribution/rec_ppformulanet_s_train.tar)|
 | PP-FormulaNet-L | Vary_VIT_B | [PP-FormulaNet-L.yaml](../../../configs/rec/PP-FormuaNet/PP-FormulaNet-L.yaml) |     0.9055   |     0.9206       | 0.9392  |     0.9273    | 0.9141 |     0.9213         |[trained model](https://paddleocr.bj.bcebos.com/contribution/rec_ppformulanet_l_train.tar )|
+| PP-FormulaNet_plus-S | PPHGNetV2_B4 | [PP-FormulaNet_plus-S.yaml](../../../configs/rec/PP-FormuaNet/PP-FormulaNet_plus-S.yaml) |     -   |     -       | -  |     -    | - |     -         |[trained model](https://paddleocr.bj.bcebos.com/contribution/rec_ppformulanet_plus_s_train.tar )|
+| PP-FormulaNet_plus-M | PPHGNetV2_B6 | [PP-FormulaNet_plus-M.yaml](../../../configs/rec/PP-FormuaNet/PP-FormulaNet_plus-M.yaml) |     -   |     -       | -  |     -    | - |     -         |[trained model](https://paddleocr.bj.bcebos.com/contribution/rec_ppformulanet_plus_m_train.tar )|
+| PP-FormulaNet_plus-L | Vary_VIT_B | [PP-FormulaNet_plus-L.yaml](../../../configs/rec/PP-FormuaNet/PP-FormulaNet_plus-L.yaml) |     -   |     -       | -  |     -    | - |     -         |[trained model](https://paddleocr.bj.bcebos.com/contribution/rec_ppformulanet_plus_l_train.tar )|
 
 Among them, SPE and CPE refer to the simple and complex formula datasets of UniMERNet, respectively. Easy, Middle, and Hard are simple (LaTeX code length 0-64), medium (LaTeX code length 64-256), and complex formula datasets (LaTeX code length 256+) built internally by PaddleX.