OUNLP — Natural Language Processing Lab University of Oklahoma, School of Computer Science

Research

At OUNLP, we explore the frontier of natural language processing and machine learning with the goal of building intelligent, trustworthy, and practical AI systems. Our work spans multi-party, multi-modal dialogue and discourse analysis (in domains like education and mental health), agentic models and domain-specific “world” models for human-AI teaming, efficient structured-prediction and symbolic methods to augment neural networks, and the robust deployment and evaluation of trustworthy AI.

Explore our publications by research theme:

Highlighted

A Multimodal Large Language Model for Materials Science
A Multimodal Large Language Model for Materials Science
Yingheng Tang, Wenbin Xu, Jie Cao, Weilu Gao, Steve Farrell, Benjamin Erichson, Michael W. Mahoney, Andy Nonaka, Zhi Yao
Nature Machine Intelligence  ·  01 Apr 2026  ·  doi:10.1038/s42256-026-01214-y
BibTeX
@article{tang2026matterchat,
  title = {A Multimodal Large Language Model for Materials Science},
  author = {Yingheng Tang and Wenbin Xu and Jie Cao and Weilu Gao and Steve Farrell and Benjamin Erichson and Michael W. Mahoney and Andy Nonaka and Zhi Yao},
  journal = {Nature Machine Intelligence},
  year = {2026},
  doi = {10.1038/s42256-026-01214-y},
  url = {https://doi.org/10.1038/s42256-026-01214-y},
  publisher = {Nature Publishing Group}
}

All

2026

HydroAgent: Closing the Gap Between Frontier LLMs and Human Experts in Hydrologic Model Calibration via Simulator-Grounded RL
HydroAgent: Closing the Gap Between Frontier LLMs and Human Experts in Hydrologic Model Calibration via Simulator-Grounded RL
Zhi Li, Songkun Yan, Jie Cao, Mofan Zhang, Anjiang Wei, Jinwoong Yoo, Yang Hong
arXiv  ·  01 May 2026
BibTeX
@misc{li2026hydroagent,
  title = {HydroAgent: Closing the Gap Between Frontier LLMs and Human Experts in Hydrologic Model Calibration via Simulator-Grounded RL},
  author = {Zhi Li and Songkun Yan and Jie Cao and Mofan Zhang and Anjiang Wei and Jinwoong Yoo and Yang Hong},
  howpublished = {arXiv preprint},
  year = {2026},
  url = {https://arxiv.org/abs/2605.17792}
}
A Multimodal Large Language Model for Materials Science
A Multimodal Large Language Model for Materials Science
Yingheng Tang, Wenbin Xu, Jie Cao, Weilu Gao, Steve Farrell, Benjamin Erichson, Michael W. Mahoney, Andy Nonaka, Zhi Yao
Nature Machine Intelligence  ·  01 Apr 2026  ·  doi:10.1038/s42256-026-01214-y
BibTeX
@article{tang2026multimodal,
  title = {A Multimodal Large Language Model for Materials Science},
  author = {Yingheng Tang and Wenbin Xu and Jie Cao and Weilu Gao and Steve Farrell and Benjamin Erichson and Michael W. Mahoney and Andy Nonaka and Zhi Yao},
  journal = {Nature Machine Intelligence},
  year = {2026},
  url = {https://doi.org/10.1038/s42256-026-01214-y}
}
Translation via Annotation: A Computational Study of Translating Classical Chinese into Japanese
Translation via Annotation: A Computational Study of Translating Classical Chinese into Japanese
Zilong Li, Jie Cao
EACL 2026  ·  01 Mar 2026
BibTeX
@inproceedings{li2026translation,
  title = {Translation via Annotation: A Computational Study of Translating Classical Chinese into Japanese},
  author = {Zilong Li and Jie Cao},
  booktitle = {EACL 2026},
  year = {2026},
  url = {https://aclanthology.org/2026.eacl-long.285/}
}

2025

A Mamba-type of deep state space model for reservoir release simulation with a large-scale verification over 441 dams across CONUS
A Mamba-type of deep state space model for reservoir release simulation with a large-scale verification over 441 dams across CONUS
Jiaorui Zhang, Haowen Yue, Milad Basirifard, Jie Cao, Tiantian Yang
Journal of Hydrology  ·  01 Dec 2025  ·  doi:10.1016/j.jhydrol.2025.134145
BibTeX
@article{zhang2025mambatype,
  title = {A Mamba-type of deep state space model for reservoir release simulation with a large-scale verification over 441 dams across CONUS},
  author = {Jiaorui Zhang and Haowen Yue and Milad Basirifard and Jie Cao and Tiantian Yang},
  journal = {Journal of Hydrology},
  year = {2025},
  url = {https://doi.org/g977xq}
}
Do LLMs Encode Frame Semantics Evidence from Frame Identification
Do LLMs Encode Frame Semantics? Evidence from Frame Identification
Jayanth Krishna Chundru, Rudrashis Poddar, Jie Cao, Tianyu Jiang
EMNLP 2025  ·  01 Nov 2025
BibTeX
@inproceedings{chundru2025do,
  title = {Do LLMs Encode Frame Semantics? Evidence from Frame Identification},
  author = {Jayanth Krishna Chundru and Rudrashis Poddar and Jie Cao and Tianyu Jiang},
  booktitle = {EMNLP 2025},
  year = {2025},
  url = {https://aclanthology.org/2025.emnlp-main.1499/}
}
OUNLP at TSAR 2025 Shared Task: Multi-Round Text Simplifier via Code Generation
OUNLP at TSAR 2025 Shared Task: Multi-Round Text Simplifier via Code Generation
Cuong Huynh, Jie Cao
TSAR 2025 (EMNLP Workshop)  ·  01 Nov 2025
BibTeX
@inproceedings{huynh2025ounlp,
  title = {OUNLP at TSAR 2025 Shared Task: Multi-Round Text Simplifier via Code Generation},
  author = {Cuong Huynh and Jie Cao},
  booktitle = {TSAR 2025 (EMNLP Workshop)},
  year = {2025},
  url = {https://aclanthology.org/2025.tsar-1.19/}
}
Understanding Robustness Lottery : A Geometric Visual Comparative Analysis of Neural Network Pruning Approaches
“Understanding Robustness Lottery”: A Geometric Visual Comparative Analysis of Neural Network Pruning Approaches
Zhimin Li, Shusen Liu, Xin Yu, Kailkhura Bhavya, Jie Cao, James Daniel Diffenderfer, Peer-Timo Bremer, Valerio Pascucci
IEEE Transactions on Visualization and Computer Graphics  ·  01 Sep 2025  ·  doi:10.1109/TVCG.2024.3514996
BibTeX
@article{li2025understanding,
  title = {“Understanding Robustness Lottery”: A Geometric Visual Comparative Analysis of Neural Network Pruning Approaches},
  author = {Zhimin Li and Shusen Liu and Xin Yu and Kailkhura Bhavya and Jie Cao and James Daniel Diffenderfer and Peer-Timo Bremer and Valerio Pascucci},
  journal = {IEEE Transactions on Visualization and Computer Graphics},
  year = {2025},
  url = {https://doi.org/g977zt}
}
Towards Actionable Pedagogical Feedback: A Multi-Perspective Analysis of Mathematics Teaching and Tutoring Dialogue
Towards Actionable Pedagogical Feedback: A Multi-Perspective Analysis of Mathematics Teaching and Tutoring Dialogue
Jannatun Naim, Jie Cao, Fareen Tasneem, Jennifer Jacobs, Brent Milne, James Martin, Tamara Sumner
International Educational Data Mining Society  ·  12 Jul 2025  ·  doi:10.5281/zenodo.15870176
BibTeX
@inproceedings{naim2025towards,
  title = {Towards Actionable Pedagogical Feedback: A Multi-Perspective Analysis of Mathematics Teaching and Tutoring Dialogue},
  author = { Jannatun Naim and  Jie Cao and  Fareen Tasneem and  Jennifer Jacobs and  Brent Milne and  James Martin and  Tamara Sumner},
  booktitle = {International Educational Data Mining Society},
  year = {2025},
  url = {https://doi.org/g977zp}
}
AQUAH: Automatic Quantification and Unified Agent in Hydrology
AQUAH: Automatic Quantification and Unified Agent in Hydrology
Songkun Yan, Zhi Li, Siyu Zhu, Yixin Wen, Mofan Zhang, Mengye Chen, Jie Cao, Yang Hong
arXiv  ·  01 Jan 2025  ·  doi:10.48550/arXiv.2508.02936
BibTeX
@misc{yan2025aquah,
  title = {AQUAH: Automatic Quantification and Unified Agent in Hydrology},
  author = {Songkun Yan and Zhi Li and Siyu Zhu and Yixin Wen and Mofan Zhang and Mengye Chen and Jie Cao and Yang Hong},
  howpublished = {arXiv preprint},
  year = {2025},
  url = {https://doi.org/g977zr}
}
Rethinking On-policy Optimization for Query Augmentation
Rethinking On-policy Optimization for Query Augmentation
Zhichao Xu, Shengyao Zhuang, Xueguang Ma, Bingsen Chen, Yijun Tian, Fengran Mo, Jie Cao, Vivek Srikumar
arXiv  ·  01 Jan 2025  ·  doi:10.48550/arXiv.2510.17139
BibTeX
@misc{xu2025rethinking,
  title = {Rethinking On-policy Optimization for Query Augmentation},
  author = {Zhichao Xu and Shengyao Zhuang and Xueguang Ma and Bingsen Chen and Yijun Tian and Fengran Mo and Jie Cao and Vivek Srikumar},
  howpublished = {arXiv preprint},
  year = {2025},
  url = {https://doi.org/g98bgt}
}

2024

Classifying Tutor Discursive Moves at Scale in Mathematics Classrooms with Large Language Models
Classifying Tutor Discursive Moves at Scale in Mathematics Classrooms with Large Language Models
Baptiste Moreau-Pernet, Yu Tian, Sandra Sawaya, Peter Foltz, Jie Cao, Brent Milne, Thomas Christie
Learning @ Scale (L@S) 2024  ·  01 Jul 2024
BibTeX
@inproceedings{moreaupernet2024classifying,
  title = {Classifying Tutor Discursive Moves at Scale in Mathematics Classrooms with Large Language Models},
  author = {Baptiste Moreau-Pernet and Yu Tian and Sandra Sawaya and Peter Foltz and Jie Cao and Brent Milne and Thomas Christie},
  booktitle = {Learning @ Scale (L@S) 2024},
  year = {2024},
  url = {https://doi.org/10.1145/3657604.3664664}
}
Enhancing Talk Moves Analysis in Mathematics Tutoring through Classroom Teaching Discourse
Enhancing Talk Moves Analysis in Mathematics Tutoring through Classroom Teaching Discourse
Jie Cao, Abhijit Suresh, Jennifer Jacobs, Charis Clevenger, Amanda Howard, Chelsea Brown, Brent Milne, Tom Fischaber, Tamara Sumner, James H. Martin
arXiv  ·  01 Jan 2024  ·  doi:10.48550/arxiv.2412.13395
BibTeX
@misc{cao2024enhancing,
  title = {Enhancing Talk Moves Analysis in Mathematics Tutoring through Classroom Teaching Discourse},
  author = {Jie Cao and Abhijit Suresh and Jennifer Jacobs and Charis Clevenger and Amanda Howard and Chelsea Brown and Brent Milne and Tom Fischaber and Tamara Sumner and James H. Martin},
  howpublished = {arXiv preprint},
  year = {2024},
  url = {https://doi.org/g977zq}
}

2023

Mind the Gap between the Application Track and the Real World
Mind the Gap between the Application Track and the Real World
Ananya Ganesh, Jie Cao, E. Margaret Perkoff, Rosy Southwell, Martha Palmer, Katharina Kann
ACL 2023  ·  01 Jul 2023
BibTeX
@inproceedings{ganesh2023mind,
  title = {Mind the Gap between the Application Track and the Real World},
  author = {Ananya Ganesh and Jie Cao and E. Margaret Perkoff and Rosy Southwell and Martha Palmer and Katharina Kann},
  booktitle = {ACL 2023},
  year = {2023},
  url = {https://aclanthology.org/2023.acl-short.156/}
}
Comparing Neural Question Generation Architectures for Reading Comprehension
Comparing Neural Question Generation Architectures for Reading Comprehension
E. Margaret Perkoff, Abhidip Bhattacharyya, Jon Cai, Jie Cao
BEA 2023 (ACL Workshop)  ·  01 Jul 2023
BibTeX
@inproceedings{perkoff2023comparing,
  title = {Comparing Neural Question Generation Architectures for Reading Comprehension},
  author = {E. Margaret Perkoff and Abhidip Bhattacharyya and Jon Cai and Jie Cao},
  booktitle = {BEA 2023 (ACL Workshop)},
  year = {2023},
  url = {https://aclanthology.org/2023.bea-1.47/}
}
Designing an AI Partner for Jigsaw Classrooms
Designing an AI Partner for Jigsaw Classrooms
Jie Cao, Rachel Dickler, Marie Grace, Alessandro Roncone, Leanne Hirshfield, Marilyn Walker, Martha Palmer
AIAIC 2023 (Workshop)  ·  01 Jun 2023
BibTeX
@inproceedings{cao2023designing,
  title = {Designing an AI Partner for Jigsaw Classrooms},
  author = {Jie Cao and Rachel Dickler and Marie Grace and Alessandro Roncone and Leanne Hirshfield and Marilyn Walker and Martha Palmer},
  booktitle = {AIAIC 2023 (Workshop)},
  year = {2023},
  url = {https://aichildinteraction.github.io/preprint/AIAIC23_paper_7399.pdf}
}
A Comparative Analysis of Automatic Speech Recognition Errors in Small Group Classroom Discourse
A Comparative Analysis of Automatic Speech Recognition Errors in Small Group Classroom Discourse
Jie Cao, Ananya Ganesh, Jon Cai, Rosy Southwell, E. Margaret Perkoff, Michael Regan, Katharina Kann, James Martin, Martha Palmer, Sidney D’Mello
UMAP 2023  ·  01 Jun 2023
BibTeX
@inproceedings{cao2023comparative,
  title = {A Comparative Analysis of Automatic Speech Recognition Errors in Small Group Classroom Discourse},
  author = {Jie Cao and Ananya Ganesh and Jon Cai and Rosy Southwell and E. Margaret Perkoff and Michael Regan and Katharina Kann and James Martin and Martha Palmer and Sidney D'Mello},
  booktitle = {UMAP 2023},
  year = {2023},
  url = {https://dl.acm.org/doi/10.1145/3565472.3595606}
}
Dependency Dialogue Acts Annotation Scheme and Case Study
Dependency Dialogue Acts — Annotation Scheme and Case Study
Jon Cai, Brendan D. King, E. Margaret Perkoff, Shiran Dudy, Jie Cao, …, Ananya Ganesh, James Martin, Martha Palmer, Marilyn Walker, Jeffrey Flanigan
IWSDS 2023  ·  01 Feb 2023
BibTeX
@inproceedings{cai2023dependency,
  title = {Dependency Dialogue Acts — Annotation Scheme and Case Study},
  author = {Jon Cai and Brendan D. King and E. Margaret Perkoff and Shiran Dudy and Jie Cao and Marie Grace and Natalia Wojarnik and Ananya Ganesh and James Martin and Martha Palmer and Marilyn Walker and Jeffrey Flanigan},
  booktitle = {IWSDS 2023},
  year = {2023},
  url = {https://arxiv.org/abs/2302.12944}
}

2022

Inductive Biases for Deep Linguistic Structured Prediction with Independent Factorization
Inductive Biases for Deep Linguistic Structured Prediction with Independent Factorization
Jie Cao
PhD Dissertation, University of Utah  ·  01 May 2022
BibTeX
@phdthesis{cao2022inductive,
  title = {Inductive Biases for Deep Linguistic Structured Prediction with Independent Factorization},
  author = {Jie Cao},
  school = {University of Utah},
  year = {2022},
  url = {https://www.proquest.com/docview/2777357718}
}

2021

Database Workload Characterization with Query Plan Encoders
Database Workload Characterization with Query Plan Encoders
Debjyoti Paul, Jie Cao, Feifei Li, Vivek Srikumar
PVLDB (VLDB 2022)  ·  01 Dec 2021
BibTeX
@article{paul2021database,
  title = {Database Workload Characterization with Query Plan Encoders},
  author = {Debjyoti Paul and Jie Cao and Feifei Li and Vivek Srikumar},
  journal = {PVLDB (VLDB 2022)},
  year = {2021},
  url = {https://dl.acm.org/doi/10.14778/3503585.3503600}
}
A Comparative Study on Schema-Guided Dialogue State Tracking
A Comparative Study on Schema-Guided Dialogue State Tracking
Jie Cao, Yi Zhang
NAACL 2021  ·  01 Jun 2021
BibTeX
@inproceedings{cao2021comparative,
  title = {A Comparative Study on Schema-Guided Dialogue State Tracking},
  author = {Jie Cao and Yi Zhang},
  booktitle = {NAACL 2021},
  year = {2021},
  url = {https://aclanthology.org/2021.naacl-main.62/}
}

Earlier

Amazon at MRP 2019: Parsing Meaning Representations with Lexical and Phrasal Anchoring
Amazon at MRP 2019: Parsing Meaning Representations with Lexical and Phrasal Anchoring
Jie Cao, Yi Zhang, Adel Youssef, Vivek Srikumar
CoNLL 2019 (MRP Shared Task)  ·  01 Nov 2019
BibTeX
@inproceedings{cao2019amazon,
  title = {Amazon at MRP 2019: Parsing Meaning Representations with Lexical and Phrasal Anchoring},
  author = {Jie Cao and Yi Zhang and Adel Youssef and Vivek Srikumar},
  booktitle = {CoNLL 2019 (MRP Shared Task)},
  year = {2019},
  url = {https://aclanthology.org/K19-2013/}
}
Rhetorically Controlled Encoder-Decoder for Modern Chinese Poetry Generation
Rhetorically Controlled Encoder-Decoder for Modern Chinese Poetry Generation
Zhiqiang Liu, Zuohui Fu, Jie Cao, Gerard de Melo, Yik-Cheung Tam, Cheng Niu, Jie Zhou
ACL 2019  ·  01 Jul 2019
BibTeX
@inproceedings{liu2019rhetorically,
  title = {Rhetorically Controlled Encoder-Decoder for Modern Chinese Poetry Generation},
  author = {Zhiqiang Liu and Zuohui Fu and Jie Cao and Gerard de Melo and Yik-Cheung Tam and Cheng Niu and Jie Zhou},
  booktitle = {ACL 2019},
  year = {2019},
  url = {https://aclanthology.org/P19-1192/}
}
Observing Dialogue in Therapy: Categorizing and Forecasting Behavioral Codes
Observing Dialogue in Therapy: Categorizing and Forecasting Behavioral Codes
Jie Cao, Michael Tanana, Zac Imel, Eric Poitras, David Atkins, Vivek Srikumar
ACL 2019  ·  01 Jul 2019
BibTeX
@inproceedings{cao2019observing,
  title = {Observing Dialogue in Therapy: Categorizing and Forecasting Behavioral Codes},
  author = {Jie Cao and Michael Tanana and Zac Imel and Eric Poitras and David Atkins and Vivek Srikumar},
  booktitle = {ACL 2019},
  year = {2019},
  url = {https://aclanthology.org/P19-1563/}
}
End-to-end Gated Self-attentive Memory Network for Dialog Response Selection
End-to-end Gated Self-attentive Memory Network for Dialog Response Selection
Shuo Sun, Yik-Cheung Tam, Jie Cao, Canxiang Yan, Zuohui Fu, Cheng Niu, Jie Zhou
AAAI DSTC7 Workshop 2019  ·  01 Jan 2019
BibTeX
@inproceedings{sun2019endtoend,
  title = {End-to-end Gated Self-attentive Memory Network for Dialog Response Selection},
  author = {Shuo Sun and Yik-Cheung Tam and Jie Cao and Canxiang Yan and Zuohui Fu and Cheng Niu and Jie Zhou},
  booktitle = {AAAI DSTC7 Workshop 2019},
  year = {2019},
  url = {http://workshop.colips.org/dstc7/papers/13.pdf}
}
A Distributed SVM Method Based on the Iterative MapReduce
A Distributed SVM Method Based on the Iterative MapReduce
Xijiang Ke, Hai Jin, Xia Xie, Jie Cao
IEEE ICSC 2015  ·  01 Feb 2015
BibTeX
@inproceedings{ke2015distributed,
  title = {A Distributed SVM Method Based on the Iterative MapReduce},
  author = {Xijiang Ke and Hai Jin and Xia Xie and Jie Cao},
  booktitle = {IEEE ICSC 2015},
  year = {2015},
}
JRBridge: A Framework of Large-Scale Statistical Computing for R
JRBridge: A Framework of Large-Scale Statistical Computing for R
Xia Xie, Jie Cao, Hai Jin, Xijiang Ke, Wenzhi Cao
IEEE APSCC 2012  ·  01 Dec 2012
BibTeX
@inproceedings{xie2012jrbridge,
  title = {JRBridge: A Framework of Large-Scale Statistical Computing for R},
  author = {Xia Xie and Jie Cao and Hai Jin and Xijiang Ke and Wenzhi Cao},
  booktitle = {IEEE APSCC 2012},
  year = {2012},
}