publications | Yuekun Yao

2026

Preprint

Loop, Think, & Generalize: Implicit Reasoning in Recurrent-Depth Transformers

Harsh Kohli, Srinivasan Parthasarathy, Huan Sun, and 1 more author

2026

arXiv preprint

arXiv Bib PDF

@misc{kohli2026loopthinkgeneralize,
  title = {Loop, Think, \& Generalize: Implicit Reasoning in Recurrent-Depth Transformers},
  author = {Kohli, Harsh and Parthasarathy, Srinivasan and Sun, Huan and Yao, Yuekun},
  year = {2026},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL},
  note = {arXiv preprint},
  url = {https://arxiv.org/abs/2604.07822},
}

2025

EMNLP

Language models can learn implicit multi-hop reasoning, but only if they have lots of training data

Yuekun Yao, Yupei Du, Dawei Zhu, and 2 more authors

In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing, 2025

arXiv Bib PDF

@inproceedings{yao2025implicit,
  title = {Language models can learn implicit multi-hop reasoning, but only if they have lots of training data},
  author = {Yao, Yuekun and Du, Yupei and Zhu, Dawei and Hahn, Michael and Koller, Alexander},
  booktitle = {Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing},
  year = {2025},
}

EMNLP

Reason to rote: Rethinking memorization in reasoning

Yupei Du, Philipp Mondorf, Silvia Casola, and 3 more authors

In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing, 2025

arXiv Bib PDF

@inproceedings{du2025reason,
  title = {Reason to rote: Rethinking memorization in reasoning},
  author = {Du, Yupei and Mondorf, Philipp and Casola, Silvia and Yao, Yuekun and Litschko, Robert and Plank, Barbara},
  booktitle = {Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing},
  year = {2025},
}

ACL

Anything Goes? A Crosslinguistic Study of (Im)possible Language Learning in LMs

Xiulin Yang, Tatsuya Aoyama, Yuekun Yao, and 1 more author

In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics, 2025

Bib PDF

@inproceedings{yang2025anything,
  title = {Anything Goes? A Crosslinguistic Study of (Im)possible Language Learning in LMs},
  author = {Yang, Xiulin and Aoyama, Tatsuya and Yao, Yuekun and Wilcox, Ethan},
  booktitle = {Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics},
  year = {2025},
}

2024

EMNLP Findings

Predicting generalization performance with correctness discriminators

Yuekun Yao and Alexander Koller

In Findings of the Association for Computational Linguistics: EMNLP 2024, 2024

arXiv Bib PDF

@inproceedings{yao2024predicting,
  title = {Predicting generalization performance with correctness discriminators},
  author = {Yao, Yuekun and Koller, Alexander},
  booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2024},
  year = {2024},
}

NAACL

Simple and effective data augmentation for compositional generalization

Yuekun Yao and Alexander Koller

In Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics, 2024

Bib PDF Code

@inproceedings{yao2024simple,
  title = {Simple and effective data augmentation for compositional generalization},
  author = {Yao, Yuekun and Koller, Alexander},
  booktitle = {Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics},
  year = {2024},
}

2023

EMNLP

SLOG: A Structural Generalization Benchmark for Semantic Parsing

Bingzhi Li, Lucia Donatelli, Alexander Koller, and 3 more authors

In Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, 2023

Bib PDF Code

@inproceedings{li2023slog,
  title = {SLOG: A Structural Generalization Benchmark for Semantic Parsing},
  author = {Li, Bingzhi and Donatelli, Lucia and Koller, Alexander and Linzen, Tal and Yao, Yuekun and Kim, Najoung},
  booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing},
  year = {2023},
}

2022

EMNLP

Structural generalization is hard for sequence-to-sequence models

Yuekun Yao and Alexander Koller

In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, 2022

Bib PDF Code Website

@inproceedings{yao2022structural,
  title = {Structural generalization is hard for sequence-to-sequence models},
  author = {Yao, Yuekun and Koller, Alexander},
  booktitle = {Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing},
  year = {2022},
}

2020

AMTA

Dynamic masking for improved stability in online spoken language translation

Yuekun Yao and Barry Haddow

In Proceedings of the 14th Biennial Conference of the Association for Machine Translation in the Americas, 2020

Bib PDF

@inproceedings{yao2020dynamic,
  title = {Dynamic masking for improved stability in online spoken language translation},
  author = {Yao, Yuekun and Haddow, Barry},
  booktitle = {Proceedings of the 14th Biennial Conference of the Association for Machine Translation in the Americas},
  year = {2020},
}

IWSLT

ELITR non-native speech translation at IWSLT 2020

Dominik Macháček, Jonáš Kratochvı́l, Sangeet Sagar, and 6 more authors

In Proceedings of the 17th International Conference on Spoken Language Translation, 2020

Bib PDF

@inproceedings{machacek2020elitr,
  title = {ELITR non-native speech translation at IWSLT 2020},
  author = {Mach{\'a}{\v c}ek, Dominik and Kratochv{\'\i}l, Jon{\'a}{\v s} and Sagar, Sangeet and {\v Z}ilinec, Mat{\'u}{\v s} and Bojar, Ond{\v r}ej and Nguyen, Thai-Son and Schneider, Felix and Williams, Philip and Yao, Yuekun},
  booktitle = {Proceedings of the 17th International Conference on Spoken Language Translation},
  year = {2020},
}