Skip to content

Commit 32726e4

Browse files
natolambertclaude
andauthored
Fix bibliography citation errors and hallucinations (#211)
Co-authored-by: Claude Opus 4.5 <[email protected]>
1 parent f35069b commit 32726e4

File tree

2 files changed

+38
-41
lines changed

2 files changed

+38
-41
lines changed

chapters/14-reasoning.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ STaR effectively approximates the policy gradient algorithm, but in practice fil
220220
TRICE [@hoffman2023training] also improves upon reasoning by generating traces and then optimizing with a custom Markov chain Monte Carlo inspired expectation maximization algorithm.
221221
VinePPO [@VinePPO] followed these and used a setup that shifted closer to modern reasoning models.
222222
VinePPO uses a PPO-based algorithm with binary rewards for math question correctness, training on GSM8K and MATH.
223-
Other work before OpenAI's o1 and DeepSeek R1 used code execution as a feedback signal for training [@gehring2024rlefgroundingcodellms], [@xudpoppo] or verification for theorem proving (called Reinforcement Learning from Verifier Feedback, RLVF, here) [@amit2024models].
223+
Other work before OpenAI's o1 and DeepSeek R1 used code execution as a feedback signal for training [@gehring2024rlefgroundingcodellms], [@xu2024dpo] or verification for theorem proving (called Reinforcement Learning from Verifier Feedback, RLVF, here) [@amit2024models].
224224
Tülu 3 expanded upon these methods by using a simple PPO trainer to reward completions with correct answers -- most importantly while maintaining the model's overall performance on a broad suite of evaluations.
225225
The binary rewards of Tülu 3 and modern reasoning training techniques can be contrasted to the iterative approach of STaR or the log-likelihood rewards of Quiet-STaR.
226226

chapters/bib.bib

Lines changed: 37 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ @article{bai2022constitutional
128128

129129
@article{dubey2024llama,
130130
title={The llama 3 herd of models},
131-
author={Dubey, Abhimanyu and Jauhri, Abhinav and Pandey, Abhinav and Kadian, Abhishek and Al-Dahle, Ahmad and Letman, Aiesha and Mathur, Akhil and Schelten, Alan and Yang, Amy and Fan, Angela and others},
131+
author={Grattafiori, Aaron and Dubey, Abhimanyu and Jauhri, Abhinav and Pandey, Abhinav and Kadian, Abhishek and Al-Dahle, Ahmad and Letman, Aiesha and Mathur, Akhil and Schelten, Alan and Yang, Amy and Fan, Angela and others},
132132
journal={arXiv preprint arXiv:2407.21783},
133133
year={2024}
134134
}
@@ -138,7 +138,7 @@ @article{rafailov2024direct
138138
author={Rafailov, Rafael and Sharma, Archit and Mitchell, Eric and Manning, Christopher D and Ermon, Stefano and Finn, Chelsea},
139139
journal={Advances in Neural Information Processing Systems},
140140
volume={36},
141-
year={2024}
141+
year={2023}
142142
}
143143

144144
@article{lambert2024t,
@@ -171,8 +171,9 @@ @inproceedings{chu2025sft
171171

172172
@inproceedings{park2024disentangling,
173173
title={Disentangling length from quality in direct preference optimization},
174-
booktitle = {Annual Meeting of the Association for Computational Linguistics (ACL)},
174+
booktitle = {Findings of the Association for Computational Linguistics: ACL 2024},
175175
author={Park, Ryan and Rafailov, Rafael and Ermon, Stefano and Finn, Chelsea},
176+
pages = {4998--5017},
176177
year = {2024}
177178
}
178179

@@ -406,13 +407,12 @@ @inproceedings{Vaswani2017AttentionIA
406407
url={https://api.semanticscholar.org/CorpusID:13756489}
407408
}
408409

409-
@article{Bahdanau2014NeuralMT,
410+
@inproceedings{Bahdanau2014NeuralMT,
410411
title={Neural Machine Translation by Jointly Learning to Align and Translate},
411412
author={Dzmitry Bahdanau and Kyunghyun Cho and Yoshua Bengio},
412-
journal={CoRR},
413-
year={2014},
414-
volume={abs/1409.0473},
415-
url={https://api.semanticscholar.org/CorpusID:11212020}
413+
booktitle={International Conference on Learning Representations (ICLR)},
414+
year={2015},
415+
url={https://arxiv.org/abs/1409.0473}
416416
}
417417

418418
@article{team2024gemma,
@@ -485,11 +485,14 @@ @article{olmo20242
485485
year={2024}
486486
}
487487

488-
@article{seed2025seed,
489-
title={Seed-thinking-v1. 5: Advancing superb reasoning models with reinforcement learning},
490-
author={Seed, ByteDance and Yuan, Yufeng and Yue, Yu and Wang, Mingxuan and Zuo, Xiaochen and Chen, Jiaze and Yan, Lin and Xu, Wenyuan and Zhang, Chi and Liu, Xin and others},
491-
journal={arXiv preprint arXiv:2504.13914},
492-
year={2025}
488+
@misc{seed2025seed,
489+
title={Seed1.5-Thinking: Advancing Superb Reasoning Models with Reinforcement Learning},
490+
author={ByteDance Seed and Jiaze Chen and Tiantian Fan and Xin Liu and Lingjun Liu and Zhiqi Lin and Mingxuan Wang and Chengyi Wang and Xiangpeng Wei and Wenyuan Xu and Yufeng Yuan and Yu Yue and Lin Yan and Qiying Yu and Xiaochen Zuo and Chi Zhang and Ruofei Zhu and Zhecheng An and Zhihao Bai and Yu Bao and Xingyan Bin and Jiangjie Chen and Feng Chen and Hongmin Chen and Riwei Chen and Liangqiang Chen and Zixin Chen and Jinsong Chen and Siyan Chen and Kaiyuan Chen and Zhi Chen and Jin Chen and Jiecao Chen and Jinxin Chi and Weinan Dai and Ning Dai and Jiahui Dai and Shihan Dou and Yantao Du and Zhengyin Du and Jianhui Duan and Chen Dun and Ting-Han Fan and Jiazhan Feng and Junda Feng and Ziyuan Feng and Yuwei Fu and Wenqi Fu and Hanjie Fu and Hao Ge and Hongyi Guo and Mingji Han and Li Han and Wenhao Hao and Xintong Hao and Qianyu He and Jerry He and Feng He and Wen Heng and Zehua Hong and Qi Hou and Liang Hu and Shengding Hu and Nan Hu and Kai Hua and Qi Huang and Ziyue Huang and Hongzhi Huang and Zihao Huang and Ting Huang and Wenhao Huang and Wei Jia and Bin Jia and Xiaoying Jia and Yuhua Jiang and Haobin Jiang and Ziheng Jiang and Kaihua Jiang and Chengquan Jiang and Jianpeng Jiao and Xiaoran Jin and Xing Jin and Xunhao Lai and Zheng Li and Xiang Li and Liyi Li and Hongkai Li and Zheng Li and Shengxian Wan and Ya Wang and Yunshui Li and Chenggang Li and Niuniu Li and Siyu Li and Xi Li and Xiao Li and Aoyan Li and Yuntao Li and Nianning Liang and Xinnian Liang and Haibin Lin and Weijian Lin and Ye Lin and Zhicheng Liu and Guanlin Liu and Guanlin Liu and Chenxiao Liu and Yan Liu and Gaohong Liu and Juncai Liu and Chundian Liu and Deyi Liu and Kaibo Liu and Siyao Liu and Qi Liu and Yongfei Liu and Kang Liu and Gan Liu and Boyi Liu and Rui Long and Chenwei Lou and Weiqiang Lou and Xiang Luo and Yao Luo and Caiping Lv and Heyang Lv and Bole Ma and Qianli Ma and Hongzhi Ma and Yiyuan Ma and Jin Ma and Wenchang Ma and Tingting Ma and Chen Mao and Qiyang Min and Zhe Nan and Guanghan Ning and Jinxiang Ou and Haojie Pan and Renming Pang and Yanghua Peng and Tao Peng and Lihua Qian and Lihua Qian and Mu Qiao and Meng Qu and Cheng Ren and Hongbin Ren and Yong Shan and Wei Shen and Ke Shen and Kai Shen and Guangming Sheng and Jinlong Shi and Wenlei Shi and Guang Shi and Shuai Shuai Cao and Yuxin Song and Zuquan Song and Jing Su and Yifan Sun and Tao Sun and Zewei Sun and Borui Wan and Zihan Wang and Xiaohui Wang and Xi Wang and Shuguang Wang and Jun Wang and Qinlong Wang and Chenyuan Wang and Shuai Wang and Zihan Wang and Changbao Wang and Jiaqiang Wang and Shihang Wang and Xuwu Wang and Zaiyuan Wang and Yuxuan Wang and Wenqi Wang and Taiqing Wang and Chengzhi Wei and Houmin Wei and Ziyun Wei and Shufa Wei and Zheng Wu and Yonghui Wu and Yangjun Wu and Bohong Wu and Shuang Wu and Jingqiao Wu and Ning Wu and Shuangzhi Wu and Jianmin Wu and Chenguang Xi and Fan Xia and Yuqiao Xian and Liang Xiang and Boren Xiang and Bowen Xiao and Zhen Xiao and Xia Xiao and Yongsheng Xiao and Chao Xin and Shulin Xin and Yuwen Xiong and Jingjing Xu and Ziwen Xu and Chenyin Xu and Jiayi Xu and Yifan Xu and Wei Xu and Yufei Xu and Shikun Xu and Shipeng Yan and Shen Yan and Qingping Yang and Xi Yang and Tianhao Yang and Yuehang Yang and Yuan Yang and Ximing Yang and Zeyu Yang and Guang Yang and Yifan Yang and Xuesong Yao and Bairen Yi and Fan Yin and Jianian Yin and Ziqiang Ying and Xiangyu Yu and Hongli Yu and Song Yu and Menghan Yu and Huan Yu and Siyu Yuan and Jun Yuan and Yutao Zeng and Tianyang Zhan and Zheng Zhang and Yun Zhang and Mofan Zhang and Wang Zhang and Ru Zhang and Zhi Zhang and Tianqi Zhang and Xinyi Zhang and Zhexi Zhang and Sijun Zhang and Wenqiang Zhang and Xiangxiang Zhang and Yongtao Zhang and Yuyu Zhang and Ge Zhang and He Zhang and Yue Zhang and Renjie Zheng and Ningxin Zheng and Zhuolin Zheng and Yaowei Zheng and Chen Zheng and Xiaoyun Zhi and Wanjun Zhong and Cheng Zhong and Zheng Zhong and Baoquan Zhong and Xun Zhou and Na Zhou and Huan Zhou and Hang Zhu and Defa Zhu and Wenjia Zhu and Lei Zuo},
491+
year={2025},
492+
eprint={2504.13914},
493+
archivePrefix={arXiv},
494+
primaryClass={cs.CL},
495+
url={https://arxiv.org/abs/2504.13914}
493496
}
494497

495498
@article{li2022branch,
@@ -993,7 +996,7 @@ @article{wu2024fine
993996
author={Wu, Zeqiu and Hu, Yushi and Shi, Weijia and Dziri, Nouha and Suhr, Alane and Ammanabrolu, Prithviraj and Smith, Noah A and Ostendorf, Mari and Hajishirzi, Hannaneh},
994997
journal={Advances in Neural Information Processing Systems},
995998
volume={36},
996-
year={2024}
999+
year={2023}
9971000
}
9981001

9991002
@article{chen2024learning,
@@ -1322,7 +1325,7 @@ @inproceedings{kim2023prometheus
13221325
title={Prometheus: Inducing fine-grained evaluation capability in language models},
13231326
author={Kim, Seungone and Shin, Jamin and Cho, Yejin and Jang, Joel and Longpre, Shayne and Lee, Hwaran and Yun, Sangdoo and Shin, Seongjin and Kim, Sungdong and Thorne, James and others},
13241327
booktitle={The Twelfth International Conference on Learning Representations},
1325-
year={2023}
1328+
year={2024}
13261329
}
13271330

13281331
@article{cobbe2021gsm8k,
@@ -1606,7 +1609,7 @@ @inproceedings{li2023remax
16061609
title={Remax: A simple, effective, and efficient reinforcement learning method for aligning large language models},
16071610
author={Li, Ziniu and Xu, Tian and Zhang, Yushun and Lin, Zhihang and Yu, Yang and Sun, Ruoyu and Luo, Zhi-Quan},
16081611
booktitle={Forty-first International Conference on Machine Learning},
1609-
year={2023}
1612+
year={2024}
16101613
}
16111614

16121615
@article{team2025kimi,
@@ -1946,12 +1949,12 @@ @article{franken2024self
19461949

19471950
@inproceedings{yuan2025selfrewardinglanguagemodels,
19481951
title={Self-Rewarding Language Models},
1949-
booktitle = {Annual Meeting of the Association for Computational Linguistics (ACL)},
1952+
booktitle = {International Conference on Machine Learning (ICML)},
19501953
author={Weizhe Yuan and Richard Yuanzhe Pang and Kyunghyun Cho and Xian Li and Sainbayar Sukhbaatar and Jing Xu and Jason Weston},
1951-
year = {2025},
1954+
year = {2024},
19521955
archivePrefix={arXiv},
19531956
primaryClass={cs.CL},
1954-
url={https://arxiv.org/abs/2401.10020},
1957+
url={https://arxiv.org/abs/2401.10020},
19551958
}
19561959

19571960
@article{bercovich2025llamanemotron,
@@ -2552,14 +2555,6 @@ @article{amit2024models
25522555
year = {2024}
25532556
}
25542557

2555-
@inproceedings{xudpoppo,
2556-
author={Shusheng Xu and Wei Fu and Jiaxuan Gao and Wenjie Ye and Weilin Liu and Zhiyu Mei and Guangju Wang and Chao Yu and Yi Wu},
2557-
title={Is DPO Superior to PPO for LLM Alignment? A Comprehensive Study},
2558-
year={2024},
2559-
cdate={1704067200000},
2560-
url={https://openreview.net/forum?id=6XH8R7YrSk},
2561-
booktitle={ICML}
2562-
}
25632558

25642559
@misc{wang2025ragenunderstandingselfevolutionllm,
25652560
title={RAGEN: Understanding Self-Evolution in LLM Agents via Multi-Turn Reinforcement Learning},
@@ -2760,11 +2755,14 @@ @article{shridhar2023distilling
27602755
publisher={Association for Computational Linguistics}
27612756
}
27622757

2763-
@article{hsieh2023distilling,
2764-
title={Distilling step-by-step! outperforming larger language models with less training data and smaller model sizes},
2765-
author={Hsieh, Cheng-Yu and Li, Chun-Liang and Yeh, Chih-Kuan and Nakhost, Hootan and Fujii, Yasuhisa and Ratner, Alexander and Krishna, Ranjay and Lee, Chen-Yu and Pfister, Tomas},
2766-
journal={arXiv preprint arXiv:2305.02301},
2767-
year={2023}
2758+
@inproceedings{hsieh2023distilling,
2759+
title={Distilling Step-by-Step! Outperforming Larger Language Models with Less Training Data and Smaller Model Sizes},
2760+
author={Hsieh, Cheng-Yu and Li, Chun-Liang and Yeh, Chih-kuan and Nakhost, Hootan and Fujii, Yasuhisa and Ratner, Alex and Krishna, Ranjay and Lee, Chen-Yu and Pfister, Tomas},
2761+
booktitle={Findings of the Association for Computational Linguistics: ACL 2023},
2762+
year={2023},
2763+
url={https://aclanthology.org/2023.findings-acl.507/},
2764+
doi={10.18653/v1/2023.findings-acl.507},
2765+
pages={8003--8017}
27682766
}
27692767

27702768
@article{gerstgrasser2024model,
@@ -2826,9 +2824,9 @@ @inproceedings{huang2025math
28262824

28272825
@inproceedings{hendrycks2020measuring,
28282826
title={Measuring massive multitask language understanding},
2829-
booktitle = {Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},
2827+
booktitle = {International Conference on Learning Representations (ICLR)},
28302828
author={Hendrycks, Dan and Burns, Collin and Basart, Steven and Zou, Andy and Mazeika, Mantas and Song, Dawn and Steinhardt, Jacob},
2831-
year = {2025}
2829+
year = {2021}
28322830
}
28332831

28342832
@article{mallen2023llm_memorization,
@@ -2999,17 +2997,16 @@ @misc{gao2023evalharness
29992997

30002998
@inproceedings{gu2024olmes,
30012999
author = {Gu, Yuling and Tafjord, Oyvind and Kuehl, Bailey and Haddad, Dany and Dodge, Jesse and Hajishirzi, Hannaneh},
3002-
title = {{OLMES: A Standard for Language Model Evaluations}
3003-
booktitle = {Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},},
3000+
title = {{OLMES: A Standard for Language Model Evaluations}},
3001+
booktitle = {Findings of the North American Chapter of the Association for Computational Linguistics (NAACL)},
30043002
year = {2025}
30053003
}
30063004

3007-
@inproceedings{liang2023helm,
3005+
@article{liang2023helm,
30083006
author = {Liang, Percy and Bommasani, Rishi and Lee, Tony and Tsipras, Dimitris and Soylu, Dilara and Yasunaga, Michihiro and Zhang, Yian and Narayanan, Deepak and Wu, Yuhuai and Kumar, Ananya and Newman, Benjamin and Yuan, Binhang and Yan, Bobby and Zhang, Ce and Cosgrove, Christian and Manning, Christopher D. and R\'e, Christopher and Acosta-Navas, Diana and Hudson, Drew A. and Zelikman, Eric and Durmus, Esin and Ladhak, Faisal and Rong, Frieda and Ren, Hongyu and Yao, Huaxiu and Wang, Jue and Santhanam, Keshav and Orr, Laurel J. and Zheng, Lucia and Y\'uksekg\"on\"ul, Mert and Suzgun, Mirac and Kim, Nathan and Guha, Neel and Chatterji, Niladri S. and Khattab, Omar and Henderson, Peter and Huang, Qian and Chi, Ryan and Xie, Sang Michael and Santurkar, Shibani and Ganguli, Surya and Hashimoto, Tatsunori and Icard, Thomas and Zhang, Tianyi and Chaudhary, Vishrav and Wang, William and Li, Xuechen and Mai, Yifan and Zhang, Yuhui and Koreeda, Yuta},
3009-
title = {{Holistic Evaluation of Language Models}
3010-
booktitle = {Annual Meeting of the Association for Computational Linguistics (ACL)},},
3007+
title = {Holistic Evaluation of Language Models},
30113008
journal = {Transactions on Machine Learning Research},
3012-
year = {2025},
3009+
year = {2023},
30133010
doi = {10.1111/nyas.15007},
30143011
note = {Also available as arXiv:2211.09110}
30153012
}

0 commit comments

Comments
 (0)