Research
At OUNLP, we explore the frontier of natural language processing and machine learning with the goal of building intelligent, trustworthy, and practical AI systems. Our work spans multi-party, multi-modal dialogue and discourse analysis (in domains like education and mental health), agentic models and domain-specific “world” models for human-AI teaming, efficient structured-prediction and symbolic methods to augment neural networks, and the robust deployment and evaluation of trustworthy AI.
Explore our publications by research theme:
Highlighted
A Multimodal Large Language Model for Materials Science
Nature Machine Intelligence
·
01 Apr 2026
·
doi:10.1038/s42256-026-01214-y
BibTeX
@article{tang2026matterchat,
title = {A Multimodal Large Language Model for Materials Science},
author = {Yingheng Tang and Wenbin Xu and Jie Cao and Weilu Gao and Steve Farrell and Benjamin Erichson and Michael W. Mahoney and Andy Nonaka and Zhi Yao},
journal = {Nature Machine Intelligence},
year = {2026},
doi = {10.1038/s42256-026-01214-y},
url = {https://doi.org/10.1038/s42256-026-01214-y},
publisher = {Nature Publishing Group}
}
All
2026
HydroAgent: Closing the Gap Between Frontier LLMs and Human Experts in Hydrologic Model Calibration via Simulator-Grounded RL
arXiv
·
01 May 2026
BibTeX
@misc{li2026hydroagent,
title = {HydroAgent: Closing the Gap Between Frontier LLMs and Human Experts in Hydrologic Model Calibration via Simulator-Grounded RL},
author = {Zhi Li and Songkun Yan and Jie Cao and Mofan Zhang and Anjiang Wei and Jinwoong Yoo and Yang Hong},
howpublished = {arXiv preprint},
year = {2026},
url = {https://arxiv.org/abs/2605.17792}
}
A Multimodal Large Language Model for Materials Science
Nature Machine Intelligence
·
01 Apr 2026
·
doi:10.1038/s42256-026-01214-y
BibTeX
@article{tang2026multimodal,
title = {A Multimodal Large Language Model for Materials Science},
author = {Yingheng Tang and Wenbin Xu and Jie Cao and Weilu Gao and Steve Farrell and Benjamin Erichson and Michael W. Mahoney and Andy Nonaka and Zhi Yao},
journal = {Nature Machine Intelligence},
year = {2026},
url = {https://doi.org/10.1038/s42256-026-01214-y}
}
Translation via Annotation: A Computational Study of Translating Classical Chinese into Japanese
EACL 2026
·
01 Mar 2026
BibTeX
@inproceedings{li2026translation,
title = {Translation via Annotation: A Computational Study of Translating Classical Chinese into Japanese},
author = {Zilong Li and Jie Cao},
booktitle = {EACL 2026},
year = {2026},
url = {https://aclanthology.org/2026.eacl-long.285/}
}
2025
A Mamba-type of deep state space model for reservoir release simulation with a large-scale verification over 441 dams across CONUS
Journal of Hydrology
·
01 Dec 2025
·
doi:10.1016/j.jhydrol.2025.134145
BibTeX
@article{zhang2025mambatype,
title = {A Mamba-type of deep state space model for reservoir release simulation with a large-scale verification over 441 dams across CONUS},
author = {Jiaorui Zhang and Haowen Yue and Milad Basirifard and Jie Cao and Tiantian Yang},
journal = {Journal of Hydrology},
year = {2025},
url = {https://doi.org/g977xq}
}
Do LLMs Encode Frame Semantics? Evidence from Frame Identification
EMNLP 2025
·
01 Nov 2025
BibTeX
@inproceedings{chundru2025do,
title = {Do LLMs Encode Frame Semantics? Evidence from Frame Identification},
author = {Jayanth Krishna Chundru and Rudrashis Poddar and Jie Cao and Tianyu Jiang},
booktitle = {EMNLP 2025},
year = {2025},
url = {https://aclanthology.org/2025.emnlp-main.1499/}
}
OUNLP at TSAR 2025 Shared Task: Multi-Round Text Simplifier via Code Generation
TSAR 2025 (EMNLP Workshop)
·
01 Nov 2025
BibTeX
@inproceedings{huynh2025ounlp,
title = {OUNLP at TSAR 2025 Shared Task: Multi-Round Text Simplifier via Code Generation},
author = {Cuong Huynh and Jie Cao},
booktitle = {TSAR 2025 (EMNLP Workshop)},
year = {2025},
url = {https://aclanthology.org/2025.tsar-1.19/}
}
“Understanding Robustness Lottery”: A Geometric Visual Comparative Analysis of Neural Network Pruning Approaches
IEEE Transactions on Visualization and Computer Graphics
·
01 Sep 2025
·
doi:10.1109/TVCG.2024.3514996
BibTeX
@article{li2025understanding,
title = {“Understanding Robustness Lottery”: A Geometric Visual Comparative Analysis of Neural Network Pruning Approaches},
author = {Zhimin Li and Shusen Liu and Xin Yu and Kailkhura Bhavya and Jie Cao and James Daniel Diffenderfer and Peer-Timo Bremer and Valerio Pascucci},
journal = {IEEE Transactions on Visualization and Computer Graphics},
year = {2025},
url = {https://doi.org/g977zt}
}
Towards Actionable Pedagogical Feedback: A Multi-Perspective Analysis of Mathematics Teaching and Tutoring Dialogue
International Educational Data Mining Society
·
12 Jul 2025
·
doi:10.5281/zenodo.15870176
BibTeX
@inproceedings{naim2025towards,
title = {Towards Actionable Pedagogical Feedback: A Multi-Perspective Analysis of Mathematics Teaching and Tutoring Dialogue},
author = { Jannatun Naim and Jie Cao and Fareen Tasneem and Jennifer Jacobs and Brent Milne and James Martin and Tamara Sumner},
booktitle = {International Educational Data Mining Society},
year = {2025},
url = {https://doi.org/g977zp}
}
AQUAH: Automatic Quantification and Unified Agent in Hydrology
arXiv
·
01 Jan 2025
·
doi:10.48550/arXiv.2508.02936
BibTeX
@misc{yan2025aquah,
title = {AQUAH: Automatic Quantification and Unified Agent in Hydrology},
author = {Songkun Yan and Zhi Li and Siyu Zhu and Yixin Wen and Mofan Zhang and Mengye Chen and Jie Cao and Yang Hong},
howpublished = {arXiv preprint},
year = {2025},
url = {https://doi.org/g977zr}
}
Rethinking On-policy Optimization for Query Augmentation
arXiv
·
01 Jan 2025
·
doi:10.48550/arXiv.2510.17139
BibTeX
@misc{xu2025rethinking,
title = {Rethinking On-policy Optimization for Query Augmentation},
author = {Zhichao Xu and Shengyao Zhuang and Xueguang Ma and Bingsen Chen and Yijun Tian and Fengran Mo and Jie Cao and Vivek Srikumar},
howpublished = {arXiv preprint},
year = {2025},
url = {https://doi.org/g98bgt}
}
2024
Classifying Tutor Discursive Moves at Scale in Mathematics Classrooms with Large Language Models
Learning @ Scale (L@S) 2024
·
01 Jul 2024
BibTeX
@inproceedings{moreaupernet2024classifying,
title = {Classifying Tutor Discursive Moves at Scale in Mathematics Classrooms with Large Language Models},
author = {Baptiste Moreau-Pernet and Yu Tian and Sandra Sawaya and Peter Foltz and Jie Cao and Brent Milne and Thomas Christie},
booktitle = {Learning @ Scale (L@S) 2024},
year = {2024},
url = {https://doi.org/10.1145/3657604.3664664}
}
Enhancing Talk Moves Analysis in Mathematics Tutoring through Classroom Teaching Discourse
arXiv
·
01 Jan 2024
·
doi:10.48550/arxiv.2412.13395
BibTeX
@misc{cao2024enhancing,
title = {Enhancing Talk Moves Analysis in Mathematics Tutoring through Classroom Teaching Discourse},
author = {Jie Cao and Abhijit Suresh and Jennifer Jacobs and Charis Clevenger and Amanda Howard and Chelsea Brown and Brent Milne and Tom Fischaber and Tamara Sumner and James H. Martin},
howpublished = {arXiv preprint},
year = {2024},
url = {https://doi.org/g977zq}
}
2023
Mind the Gap between the Application Track and the Real World
ACL 2023
·
01 Jul 2023
BibTeX
@inproceedings{ganesh2023mind,
title = {Mind the Gap between the Application Track and the Real World},
author = {Ananya Ganesh and Jie Cao and E. Margaret Perkoff and Rosy Southwell and Martha Palmer and Katharina Kann},
booktitle = {ACL 2023},
year = {2023},
url = {https://aclanthology.org/2023.acl-short.156/}
}
Comparing Neural Question Generation Architectures for Reading Comprehension
BEA 2023 (ACL Workshop)
·
01 Jul 2023
BibTeX
@inproceedings{perkoff2023comparing,
title = {Comparing Neural Question Generation Architectures for Reading Comprehension},
author = {E. Margaret Perkoff and Abhidip Bhattacharyya and Jon Cai and Jie Cao},
booktitle = {BEA 2023 (ACL Workshop)},
year = {2023},
url = {https://aclanthology.org/2023.bea-1.47/}
}
Designing an AI Partner for Jigsaw Classrooms
AIAIC 2023 (Workshop)
·
01 Jun 2023
BibTeX
@inproceedings{cao2023designing,
title = {Designing an AI Partner for Jigsaw Classrooms},
author = {Jie Cao and Rachel Dickler and Marie Grace and Alessandro Roncone and Leanne Hirshfield and Marilyn Walker and Martha Palmer},
booktitle = {AIAIC 2023 (Workshop)},
year = {2023},
url = {https://aichildinteraction.github.io/preprint/AIAIC23_paper_7399.pdf}
}
A Comparative Analysis of Automatic Speech Recognition Errors in Small Group Classroom Discourse
UMAP 2023
·
01 Jun 2023
BibTeX
@inproceedings{cao2023comparative,
title = {A Comparative Analysis of Automatic Speech Recognition Errors in Small Group Classroom Discourse},
author = {Jie Cao and Ananya Ganesh and Jon Cai and Rosy Southwell and E. Margaret Perkoff and Michael Regan and Katharina Kann and James Martin and Martha Palmer and Sidney D'Mello},
booktitle = {UMAP 2023},
year = {2023},
url = {https://dl.acm.org/doi/10.1145/3565472.3595606}
}
Dependency Dialogue Acts — Annotation Scheme and Case Study
IWSDS 2023
·
01 Feb 2023
BibTeX
@inproceedings{cai2023dependency,
title = {Dependency Dialogue Acts — Annotation Scheme and Case Study},
author = {Jon Cai and Brendan D. King and E. Margaret Perkoff and Shiran Dudy and Jie Cao and Marie Grace and Natalia Wojarnik and Ananya Ganesh and James Martin and Martha Palmer and Marilyn Walker and Jeffrey Flanigan},
booktitle = {IWSDS 2023},
year = {2023},
url = {https://arxiv.org/abs/2302.12944}
}
2022
Inductive Biases for Deep Linguistic Structured Prediction with Independent Factorization
PhD Dissertation, University of Utah
·
01 May 2022
BibTeX
@phdthesis{cao2022inductive,
title = {Inductive Biases for Deep Linguistic Structured Prediction with Independent Factorization},
author = {Jie Cao},
school = {University of Utah},
year = {2022},
url = {https://www.proquest.com/docview/2777357718}
}
2021
Database Workload Characterization with Query Plan Encoders
PVLDB (VLDB 2022)
·
01 Dec 2021
BibTeX
@article{paul2021database,
title = {Database Workload Characterization with Query Plan Encoders},
author = {Debjyoti Paul and Jie Cao and Feifei Li and Vivek Srikumar},
journal = {PVLDB (VLDB 2022)},
year = {2021},
url = {https://dl.acm.org/doi/10.14778/3503585.3503600}
}
A Comparative Study on Schema-Guided Dialogue State Tracking
NAACL 2021
·
01 Jun 2021
BibTeX
@inproceedings{cao2021comparative,
title = {A Comparative Study on Schema-Guided Dialogue State Tracking},
author = {Jie Cao and Yi Zhang},
booktitle = {NAACL 2021},
year = {2021},
url = {https://aclanthology.org/2021.naacl-main.62/}
}
Earlier
Amazon at MRP 2019: Parsing Meaning Representations with Lexical and Phrasal Anchoring
CoNLL 2019 (MRP Shared Task)
·
01 Nov 2019
BibTeX
@inproceedings{cao2019amazon,
title = {Amazon at MRP 2019: Parsing Meaning Representations with Lexical and Phrasal Anchoring},
author = {Jie Cao and Yi Zhang and Adel Youssef and Vivek Srikumar},
booktitle = {CoNLL 2019 (MRP Shared Task)},
year = {2019},
url = {https://aclanthology.org/K19-2013/}
}
Rhetorically Controlled Encoder-Decoder for Modern Chinese Poetry Generation
ACL 2019
·
01 Jul 2019
BibTeX
@inproceedings{liu2019rhetorically,
title = {Rhetorically Controlled Encoder-Decoder for Modern Chinese Poetry Generation},
author = {Zhiqiang Liu and Zuohui Fu and Jie Cao and Gerard de Melo and Yik-Cheung Tam and Cheng Niu and Jie Zhou},
booktitle = {ACL 2019},
year = {2019},
url = {https://aclanthology.org/P19-1192/}
}
Observing Dialogue in Therapy: Categorizing and Forecasting Behavioral Codes
ACL 2019
·
01 Jul 2019
BibTeX
@inproceedings{cao2019observing,
title = {Observing Dialogue in Therapy: Categorizing and Forecasting Behavioral Codes},
author = {Jie Cao and Michael Tanana and Zac Imel and Eric Poitras and David Atkins and Vivek Srikumar},
booktitle = {ACL 2019},
year = {2019},
url = {https://aclanthology.org/P19-1563/}
}
End-to-end Gated Self-attentive Memory Network for Dialog Response Selection
AAAI DSTC7 Workshop 2019
·
01 Jan 2019
BibTeX
@inproceedings{sun2019endtoend,
title = {End-to-end Gated Self-attentive Memory Network for Dialog Response Selection},
author = {Shuo Sun and Yik-Cheung Tam and Jie Cao and Canxiang Yan and Zuohui Fu and Cheng Niu and Jie Zhou},
booktitle = {AAAI DSTC7 Workshop 2019},
year = {2019},
url = {http://workshop.colips.org/dstc7/papers/13.pdf}
}
A Distributed SVM Method Based on the Iterative MapReduce
IEEE ICSC 2015
·
01 Feb 2015
BibTeX
@inproceedings{ke2015distributed,
title = {A Distributed SVM Method Based on the Iterative MapReduce},
author = {Xijiang Ke and Hai Jin and Xia Xie and Jie Cao},
booktitle = {IEEE ICSC 2015},
year = {2015},
}
JRBridge: A Framework of Large-Scale Statistical Computing for R
IEEE APSCC 2012
·
01 Dec 2012
BibTeX
@inproceedings{xie2012jrbridge,
title = {JRBridge: A Framework of Large-Scale Statistical Computing for R},
author = {Xia Xie and Jie Cao and Hai Jin and Xijiang Ke and Wenzhi Cao},
booktitle = {IEEE APSCC 2012},
year = {2012},
}