Research

At OUNLP, we explore the frontier of natural language processing and machine learning with the goal of building intelligent, trustworthy, and practical AI systems. Our work spans multi-party, multi-modal dialogue and discourse analysis (in domains like education and mental health), agentic models and domain-specific “world” models for human-AI teaming, efficient structured-prediction and symbolic methods to augment neural networks, and the robust deployment and evaluation of trustworthy AI.

Explore our publications by research theme:

Highlighted

A Multimodal Large Language Model for Materials Science

Yingheng Tang, Wenbin Xu, Jie Cao, Weilu Gao, Steve Farrell, Benjamin Erichson, Michael W. Mahoney, Andy Nonaka, Zhi Yao

Nature Machine Intelligence · Apr 2026 · doi:10.1038/s42256-026-01214-y

ai-for-science llm-agents materials-science multi-modal-llm structure-aware-modeling property-prediction interatomic-potential

BibTeX

@article{tang2026matterchat,
  title = {A Multimodal Large Language Model for Materials Science},
  author = {Yingheng Tang and Wenbin Xu and Jie Cao and Weilu Gao and Steve Farrell and Benjamin Erichson and Michael W. Mahoney and Andy Nonaka and Zhi Yao},
  journal = {Nature Machine Intelligence},
  year = {2026},
  doi = {10.1038/s42256-026-01214-y},
  url = {https://doi.org/10.1038/s42256-026-01214-y},
  publisher = {Nature Publishing Group}
}

All

2026

AI Agent for Hydrologic Modeling: Definition, Development and Application

Songkun Yan, Mengye Chen, Zhi Li, Yixin Wen, Siyu Zhu, …, Jie Cao, Xiaodong Chen, Chengbin Deng, Tiantian Yang, Yang Hong

Geophysical Research Letters · Jun 2026

ai-for-science llm-agents hydrology language-agents hydrologic-modeling flood-forecasting

BibTeX

@article{yan2026grl,
  title = {AI Agent for Hydrologic Modeling: Definition, Development and Application},
  author = {Yan, Songkun and Chen, Mengye and Li, Zhi and Wen, Yixin and Zhu, Siyu and Zhang, Mofan and Liu, Di and Cao, Jie and Chen, Xiaodong and Deng, Chengbin and Yang, Tiantian and Hong, Yang},
  journal = {Geophysical Research Letters},
  year = {2026},
  month = {June},
  url = {https://essopenarchive.org/doi/full/10.22541/essoar.176894821.13120988/v1}
}

Beyond Supervised Clarification: Input Rewriting with LLMs for Dialogue Discourse Parsing

Yiming Liu, Ziyue Zhang, Zhichao Xu, Xin Yu, Yingheng Tang, Tianyu Jiang, Jie Cao

SIGDIAL 2026 · Jun 2026

dialogue-and-discourse discourse-parsing dialogue-clarification large-language-models

BibTeX

@inproceedings{liu2026beyond,
  title = {Beyond Supervised Clarification: Input Rewriting with LLMs for Dialogue Discourse Parsing},
  author = {Liu, Yiming and Zhang, Ziyue and Xu, Zhichao and Yu, Xin and Tang, Yingheng and Jiang, Tianyu and Cao, Jie},
  booktitle = {Proceedings of the 27th Annual Meeting of the Special Interest Group on Discourse and Dialogue (SIGDIAL 2026)},
  year = {2026},
  url = {https://arxiv.org/abs/2607.01964}
}

Rethinking On-policy Optimization for Query Augmentation

Zhichao Xu, Shengyao Zhuang, Xueguang Ma, Bingsen Chen, Yijun Tian, Fengran Mo, Tao Li, Jie Cao, Vivek Srikumar

Transactions on Machine Learning Research · Jun 2026 · doi:10.48550/arXiv.2510.17139

llm-agents query-augmentation information-retrieval large-language-models reinforcement-learning

BibTeX

@article{xu2025rethinking,
  title = {Rethinking On-policy Optimization for Query Augmentation},
  author = {Xu, Zhichao and Zhuang, Shengyao and Ma, Xueguang and Chen, Bingsen and Tian, Yijun and Mo, Fengran and Li, Tao and Cao, Jie and Srikumar, Vivek},
  journal = {Transactions on Machine Learning Research},
  issn = {2835-8856},
  year = {2026},
  month = {June},
  url = {https://openreview.net/forum?id=mmqbjhz5Br}
}

HydroAgent: Closing the Gap Between Frontier LLMs and Human Experts in Hydrologic Model Calibration via Simulator-Grounded RL

Zhi Li, Songkun Yan, Jie Cao, Mofan Zhang, Anjiang Wei, Jinwoong Yoo, Yang Hong

arXiv · May 2026

ai-for-science llm-agents hydrology language-agents reinforcement-learning model-calibration

BibTeX

@misc{li2026hydroagentclosinggapfrontier,
  title = {HydroAgent: Closing the Gap Between Frontier LLMs and Human Experts in Hydrologic Model Calibration via Simulator-Grounded RL},
  author = {Li, Zhi and Yan, Songkun and Cao, Jie and Zhang, Mofan and Wei, Anjiang and Yoo, Jinwoong and Hong, Yang},
  year = {2026},
  eprint = {2605.17792},
  archivePrefix = {arXiv},
  primaryClass = {cs.LG},
  url = {https://arxiv.org/abs/2605.17792}
}

A Multimodal Large Language Model for Materials Science

Yingheng Tang, Wenbin Xu, Jie Cao, Weilu Gao, Steve Farrell, Benjamin Erichson, Michael W. Mahoney, Andy Nonaka, Zhi Yao

Nature Machine Intelligence · Apr 2026 · doi:10.1038/s42256-026-01214-y

ai-for-science llm-agents materials-science multi-modal-llm structure-aware-modeling property-prediction interatomic-potential

BibTeX

@article{tang2026matterchat,
  title = {A Multimodal Large Language Model for Materials Science},
  author = {Yingheng Tang and Wenbin Xu and Jie Cao and Weilu Gao and Steve Farrell and Benjamin Erichson and Michael W. Mahoney and Andy Nonaka and Zhi Yao},
  journal = {Nature Machine Intelligence},
  year = {2026},
  doi = {10.1038/s42256-026-01214-y},
  url = {https://doi.org/10.1038/s42256-026-01214-y},
  publisher = {Nature Publishing Group}
}

Translation via Annotation: A Computational Study of Translating Classical Chinese into Japanese

Zilong Li, Jie Cao

EACL 2026 · Mar 2026

structured-prediction machine-translation classical-chinese sequence-tagging low-resource-nlp

BibTeX

@inproceedings{li2026translation,
  title = {Translation via Annotation: A Computational Study of Translating Classical Chinese into Japanese},
  author = {Li, Zilong and Cao, Jie},
  booktitle = {Proceedings of the 2026 Conference of the European Chapter of the Association for Computational Linguistics (EACL 2026)},
  year = {2026},
  url = {https://aclanthology.org/2026.eacl-long.285/}
}

2025

A Mamba-type of deep state space model for reservoir release simulation with a large-scale verification over 441 dams across CONUS

Jiaorui Zhang, Haowen Yue, Milad Basirifard, Jie Cao, Tiantian Yang

Journal of Hydrology · Dec 2025 · doi:10.1016/j.jhydrol.2025.134145

ai-for-science reservoir-simulation deep-learning water-management interpretability hydrology

BibTeX

@article{ZHANG2025134145,
  title = {A {{Mamba-type}} of Deep State Space Model for Reservoir Release Simulation with a Large-Scale Verification over 441 Dams across {{CONUS}}},
  author = {Zhang, Jiaorui and Yue, Haowen and Basirifard, Milad and Cao, Jie and Yang, Tiantian},
  year = {2025},
  journal = {Journal of Hydrology},
  pages = {134145},
  issn = {0022-1694},
  doi = {10.1016/j.jhydrol.2025.134145},
  url = {https://www.sciencedirect.com/science/article/pii/S0022169425014830},
  keywords = {Large scale,Release simulation,SHAP,Structured State Space Model,Water management}
}

Do LLMs Encode Frame Semantics? Evidence from Frame Identification

Jayanth Krishna Chundru, Rudrashis Poddar, Jie Cao, Tianyu Jiang

EMNLP 2025 · Nov 2025

structured-prediction frame-semantics frame-identification large-language-models probing

BibTeX

@inproceedings{jayanth2025emnlp,
  title = {Do LLMs Encode Frame Semantics? Evidence from Frame Identification},
  booktitle = {Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing},
  author = {Chundru, Jayanth Krishna and Poddar, Rudrashis and Cao, Jie and Jiang, Tianyu},
  year = {2025},
  month = Nov,
  publisher = "Association for Computational Linguistics",
  venue = {Suzhou, China},
  url = "https://aclanthology.org/2025.emnlp-main.1499/"
}

OUNLP at TSAR 2025 Shared Task: Multi-Round Text Simplifier via Code Generation

Cuong Huynh, Jie Cao

TSAR 2025 (EMNLP Workshop) · Nov 2025

ai-for-education llm-agents text-simplification code-generation readability large-language-models

BibTeX

@inproceedings{huynh2025ounlp,
  title = {OUNLP at TSAR 2025 Shared Task: Multi-Round Text Simplifier via Code Generation},
  author = {Huynh, Cuong and Cao, Jie},
  booktitle = {Proceedings of the Workshop on Text Simplification, Accessibility and Readability (TSAR 2025)},
  year = {2025},
  url = {https://aclanthology.org/2025.tsar-1.19/}
}

“Understanding Robustness Lottery”: A Geometric Visual Comparative Analysis of Neural Network Pruning Approaches

Zhimin Li, Shusen Liu, Xin Yu, Kailkhura Bhavya, Jie Cao, James Daniel Diffenderfer, Peer-Timo Bremer, Valerio Pascucci

IEEE Transactions on Visualization and Computer Graphics · Sep 2025 · doi:10.1109/TVCG.2024.3514996

trustworthy-ai neural-network-pruning robustness visual-analysis geometric-comparative-analysis

BibTeX

@article{vis-pruning2024,
  title = {"Understanding Robustness Lottery": A Geometric Visual Comparative Analysis of Neural Network Pruning Approaches},
  author = {Li, Zhimin and Liu, Shusen and Yu, Xin and Kailkhura, Bhavya and Cao, Jie and Diffenderfer, James Daniel and Bremer, Peer-Timo and Pascucci, Valerio},
  url = {https://doi.org/10.1109/tvcg.2024.3514996},
  journal = {IEEE Transactions on Visualization and Computer Graphics},
  year = {2024}
}

Towards Actionable Pedagogical Feedback: A Multi-Perspective Analysis of Mathematics Teaching and Tutoring Dialogue

Jannatun Naim, Jie Cao, Fareen Tasneem, Jennifer Jacobs, Brent Milne, James Martin, Tamara Sumner

International Educational Data Mining Society · Jul 2025 · doi:10.5281/zenodo.15870176

ai-for-education dialogue-and-discourse pedagogical-feedback mathematics-education talk-moves discourse-analysis tutoring-dialogue

BibTeX

@inproceedings{naim2025edm,
  title = {Towards Actionable Pedagogical Feedback: A Multi- Perspective Analysis of Mathematics Teaching and Tutoring Dialogue},
  booktitle = {Proceedings of the 18th International Conference on Educational Data Mining},
  author = {Naim, Jannatun and Cao, Jie and Tasneem, Fareen and Jacobs, Jennifer and Milne, Brent and Martin, James and Sumner, Tamara},
  year = {2025},
  month = jul,
  pages = {328--341},
  publisher = {International Educational Data Mining Society},
  doi = {10.5281/zenodo.15870177},
  venue = {Palermo, Italy},
  url = "https://educationaldatamining.org/EDM2025/proceedings/2025.EDM.long-papers.201/index.html"
}

AQUAH: Automatic Quantification and Unified Agent in Hydrology

Songkun Yan, Zhi Li, Siyu Zhu, Yixin Wen, Mofan Zhang, Mengye Chen, Jie Cao, Yang Hong

arXiv · Jan 2025 · doi:10.48550/arXiv.2508.02936

ai-for-science llm-agents hydrologic-modeling language-agents vision-llm automatic-data-retrieval simulation

BibTeX

@article{songkun2025sea,
  title = {AQUAH: Automatic Quantification and Unified Agent in Hydrology},
  author = {Yan, Songkun and Li, Zhi and Zhu, Siyu and Wen, Yixin and Zhang, Mofan and Chen, Mengye and Cao, Jie and Hong, Yang},
  journal = {1st Workshop on Sustainability with Earth Observation and AI (co-located with ICCV 2025)},
  url = {https://arxiv.org/abs/2508.02936},
  year = {2025}
}

2024

Classifying Tutor Discursive Moves at Scale in Mathematics Classrooms with Large Language Models

Baptiste Moreau-Pernet, Yu Tian, Sandra Sawaya, Peter Foltz, Jie Cao, Brent Milne, Thomas Christie

Learning @ Scale (L@S) 2024 · Jul 2024

ai-for-education dialogue-and-discourse discourse-analysis math-tutoring large-language-models

BibTeX

@inproceedings{talkmove-llm-2024,
  author = {Moreau-Pernet, Baptiste and Tian, Yu and Sawaya, Sandra and Foltz, Peter and Cao, Jie and Milne, Brent and Christie, Thomas},
  title = {Classifying Tutor Discursive Moves at Scale in Mathematics Classrooms with Large Language Models},
  year = {2024},
  isbn = {9798400706332},
  publisher = {Association for Computing Machinery},
  url = {https://doi.org/10.1145/3657604.3664664},
  doi = {10.1145/3657604.3664664},
  booktitle = {Proceedings of the Eleventh ACM Conference on Learning @ Scale},
  pages = {361–365},
  numpages = {5},
  keywords = {discourse analysis, llm classification, math tutor training},
  location = {Atlanta, GA, USA},
  series = {L@S '24}
}

Enhancing Talk Moves Analysis in Mathematics Tutoring through Classroom Teaching Discourse

Jie Cao, Abhijit Suresh, Jennifer Jacobs, Charis Clevenger, Amanda Howard, Chelsea Brown, Brent Milne, Tom Fischaber, Tamara Sumner, James H. Martin

arXiv · Jan 2024 · doi:10.48550/arxiv.2412.13395

ai-for-education dialogue-and-discourse mathematics-tutoring saga22-dataset pretraining dialogue-context

BibTeX

@inproceedings{talkmove-coling-2024,
  title = {Enhancing Talk Moves Analysis in Mathematics Tutoring through Classroom Teaching Discourse},
  author = {Cao, Jie and Suresh, Abhijit and Jacobs, Jennifer and Clevenger, Charis and Howard, Amanda and Brown, Chelsea and Milne, Brent and Fischaber, Tom and Sumner, Tamara and Martin, James H.},
  booktitle = {The 31st International Conference on Computational Linguistics},
  year = {2025}
}

Earlier

Mind the Gap between the Application Track and the Real World

Ananya Ganesh, Jie Cao, E. Margaret Perkoff, Rosy Southwell, Martha Palmer, Katharina Kann

ACL 2023 · Jul 2023

trustworthy-ai nlp-applications evaluation generalization

BibTeX

@article{ananya-acl23,
  title = {Mind the Gap between the Application Track and the Real World},
  author = {Ganesh, Ananya and Cao, Jie and Perkoff, E. Margaret and Southwell, Rosy and Palmer, Martha and Kann, Katharina},
  journal = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics, 2023},
  year = {2023}
}

Comparing Neural Question Generation Architectures for Reading Comprehension

E. Margaret Perkoff, Abhidip Bhattacharyya, Jon Cai, Jie Cao

BEA 2023 (ACL Workshop) · Jul 2023

ai-for-education question-generation reading-comprehension education

BibTeX

@inproceedings{perkoff2023comparing,
  title = {Comparing Neural Question Generation Architectures for Reading Comprehension},
  author = {Perkoff, E. Margaret and Bhattacharyya, Abhidip and Cai, Jon and Cao, Jie},
  booktitle = {Proceedings of the 18th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2023)},
  year = {2023},
  url = {https://aclanthology.org/2023.bea-1.47/}
}

Designing an AI Partner for Jigsaw Classrooms

Jie Cao, Rachel Dickler, Marie Grace, Alessandro Roncone, Leanne Hirshfield, Marilyn Walker, Martha Palmer

AIAIC 2023 (Workshop) · Jun 2023

ai-for-education dialogue-and-discourse ai-partner classroom-interaction dialogue education

BibTeX

@article{cao-jigsaw23,
  title = {Designing an AI Partner for Jigsaw Classrooms},
  author = {Cao, Jie and Dickler, Rachel and Grace, Marie and Roncone, Alessandro and Hirshfield, Leanne and Walker, Marilyn and Palmer, Martha},
  journal = {Workshop on Language-Based AI Character Interaction with Children},
  year = {2023}
}

A Comparative Analysis of Automatic Speech Recognition Errors in Small Group Classroom Discourse

Jie Cao, Ananya Ganesh, Jon Cai, Rosy Southwell, E. Margaret Perkoff, Michael Regan, Katharina Kann, James Martin, Martha Palmer, Sidney D’Mello

UMAP 2023 · Jun 2023

ai-for-education dialogue-and-discourse speech-recognition classroom-discourse education

BibTeX

@article{cao-umap23,
  title = {A Comparative Analysis of Automatic Speech Recognition Errors in Small Group Classroom Discourse},
  author = {Cao, Jie and Ganesh, Ananya and Cai, Jon and Southwell, Rosy and Perkoff, Margaret and Regan, Michael and Kann, Katharina and Martin, James and Palmer, Martha and D'Mello, Sidney},
  journal = {Proceedings of the 31st ACM Conference on User Modeling Adaptation and Personalization},
  year = {2023}
}

Dependency Dialogue Acts — Annotation Scheme and Case Study

Jon Cai, Brendan D. King, E. Margaret Perkoff, Shiran Dudy, Jie Cao, …, Ananya Ganesh, James Martin, Martha Palmer, Marilyn Walker, Jeffrey Flanigan

IWSDS 2023 · Feb 2023

dialogue-and-discourse dialogue-acts annotation discourse-parsing

BibTeX

@article{jon-dda2022,
  title = {Dependency Dialogue Acts — Annotation Scheme and Case Study},
  author = {Cai, Jon and King, Brendan D. and Perkoff, Margaret and Dudy, Shiran and Cao, Jie and Grace, Marie and Wojarnik, Natalia and Ganesh, Ananya and Martin, James and Palmer, Martha and Walker, Marilyn and Flanigan, Jeffrey},
  journal = {The 13th International Workshop on Spoken Dialogue Systems Technology},
  year = {2022}
}

Inductive Biases for Deep Linguistic Structured Prediction with Independent Factorization

Jie Cao

PhD Dissertation, University of Utah · May 2022

structured-prediction inductive-bias parsing dissertation

BibTeX

@article{dissertation-proquest,
  title = {Inductive Biases for Deep Linguistic Structured Prediction with Independent Factorization},
  author = {Cao, Jie},
  journal = {Available from ProQuest Dissertations & Theses A&I;ProQuest Dissertations & Theses Global. (2777357718)},
  year = {2022}
}

Database Workload Characterization with Query Plan Encoders

Debjyoti Paul, Jie Cao, Feifei Li, Vivek Srikumar

PVLDB (VLDB 2022) · Dec 2021

structured-prediction databases query-plan-encoding contrastive-learning

BibTeX

@article{cao2021dbqencoder,
  title = {Database Workload Characterization with Query Plan Encoders},
  author = {Paul, Debjyoti and Cao, Jie and Li, Feifei and Srikumar, Vivek},
  journal = {Proceedings of the VLDB Endowment},
  volume = {15},
  number = {4},
  pages = {923--935},
  year = {2021},
  publisher = {VLDB Endowment}
}

A Comparative Study on Schema-Guided Dialogue State Tracking

Jie Cao, Yi Zhang

NAACL 2021 · Jun 2021

dialogue-and-discourse dialogue-state-tracking schema-guided-dialogue

BibTeX

@inproceedings{cao2021comparative,
  title = {A Comparative Study on Schema-Guided Dialogue State Tracking},
  author = {Cao, Jie and Zhang, Yi},
  booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
  pages = {782--796},
  year = {2021}
}

Amazon at MRP 2019: Parsing Meaning Representations with Lexical and Phrasal Anchoring

Jie Cao, Yi Zhang, Adel Youssef, Vivek Srikumar

CoNLL 2019 (MRP Shared Task) · Nov 2019

structured-prediction meaning-representation-parsing semantic-parsing shared-task

BibTeX

@inproceedings{cao2019amazon,
  title = {Amazon at MRP 2019: Parsing Meaning Representations with Lexical and Phrasal Anchoring},
  author = {Cao, Jie and Zhang, Yi and Youssef, Adel and Srikumar, Vivek},
  booktitle = {Proceedings of the Shared Task on Cross-Framework Meaning Representation Parsing at the Conference on Natural Language Learning},
  pages = {138--148},
  year = {2019}
}

Rhetorically Controlled Encoder-Decoder for Modern Chinese Poetry Generation

Zhiqiang Liu, Zuohui Fu, Jie Cao, Gerard de Melo, Yik-Cheung Tam, Cheng Niu, Jie Zhou

ACL 2019 · Jul 2019

structured-prediction poetry-generation controllable-generation

BibTeX

@inproceedings{rhetorical-poetry2019,
  title = {Rhetorically Controlled Encoder-Decoder for Modern Chinese Poetry Generation},
  author = {Liu, Zhiqiang and Fu, Zuohui and Cao, Jie and {de Melo}, Gerard and Tam, Yik-Cheung and Niu, Cheng and Zhou, Jie},
  booktitle = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics},
  year = {2019},
  location = {Florence, Italy}
}

Observing Dialogue in Therapy: Categorizing and Forecasting Behavioral Codes

Jie Cao, Michael Tanana, Zac Imel, Eric Poitras, David Atkins, Vivek Srikumar

ACL 2019 · Jul 2019

dialogue-and-discourse psychotherapy dialogue behavioral-coding mental-health

BibTeX

@inproceedings{cao2019observing,
  title = {Observing Dialogue in Therapy: Categorizing and Forecasting Behavioral Codes},
  author = {Cao, Jie and Tanana, Michael and Imel, Zac and Poitras, Eric and Atkins, David and Srikumar, Vivek},
  booktitle = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics},
  year = {2019},
  location = {Florence, Italy}
}

End-to-end Gated Self-attentive Memory Network for Dialog Response Selection

Shuo Sun, Yik-Cheung Tam, Jie Cao, Canxiang Yan, Zuohui Fu, Cheng Niu, Jie Zhou

AAAI DSTC7 Workshop 2019 · Jan 2019

dialogue-and-discourse dialogue response-selection memory-networks

BibTeX

@inproceedings{jie2019dstc,
  title = {End-to-end Gated Self-attentive Memory Network for Dialog Response Selection},
  author = {Sun, Shuo and Tam, Yik-Cheung and Cao, Jie and Yan, Canxiang and Fu, Zuohui and Niu, Cheng and Zhou, Jie},
  booktitle = {AAAI DSTC7 Workshop (Equal Contribution)},
  year = {2019},
  location = {Honolulu, USA}
}

A Distributed SVM Method Based on the Iterative MapReduce

Xijiang Ke, Hai Jin, Xia Xie, Jie Cao

IEEE ICSC 2015 · Feb 2015

support-vector-machines mapreduce distributed-learning

BibTeX

@inproceedings{ke2015distributed,
  title = {A Distributed SVM Method Based on the Iterative MapReduce},
  author = {Ke, Xijiang and Jin, Hai and Xie, Xia and Cao, Jie},
  booktitle = {Semantic Computing (ICSC), IEEE International Conference on},
  pages = {116--119},
  year = {2015},
  organization = {IEEE}
}

JRBridge: A Framework of Large-Scale Statistical Computing for R

Xia Xie, Jie Cao, Hai Jin, Xijiang Ke, Wenzhi Cao

IEEE APSCC 2012 · Dec 2012

distributed-computing statistical-computing r

BibTeX

@inproceedings{xie2012jrbridge,
  title = {JRBridge: A framework of large-scale statistical computing for R},
  author = {Xie, Xia and Cao, Jie and Jin, Hai and Ke, Xijiang and Cao, Wenzhi},
  booktitle = {Services Computing Conference (APSCC), IEEE Asia-Pacific},
  pages = {27--34},
  year = {2012},
  organization = {IEEE}
}