Publications

Cherian, A., Lohit, S., Peng, K.-C., "WISE: Weighted Iterative Society-of-Experts for Multimodal Multi-Agent Debate with Probabilistic Consensus", ICML SCALE AI Workshop, June 2026.
BibTeX TR2026-083 PDF
- @inproceedings{Cherian2026jun,
- author = {Cherian, Anoop and Lohit, Suhas and Peng, Kuan-Chuan},
- title = {{WISE: Weighted Iterative Society-of-Experts for Multimodal Multi-Agent Debate with Probabilistic Consensus}},
- booktitle = {ICML SCALE AI Workshop},
- year = 2026,
- month = jun,
- url = {https://www.merl.com/publications/TR2026-083}
- }
Zhou, Q., Gan, C., Cherian, A., "LLawCo: Learning Laws of Cooperation for Modeling Embodied Multi-Agent Behavior", International Conference on Machine Learning (ICML), June 2026.
BibTeX TR2026-081 PDF Video
- @inproceedings{Zhou2026jun,
- author = {Zhou, Qinhong and Gan, Chuang and Cherian, Anoop},
- title = {{LLawCo: Learning Laws of Cooperation for Modeling Embodied Multi-Agent Behavior}},
- booktitle = {International Conference on Machine Learning (ICML)},
- year = 2026,
- month = jun,
- url = {https://www.merl.com/publications/TR2026-081}
- }
Li, D., Zhang, J., Egger, B., Chatterjee, M., Lohit, S., Marks, T.K., Cherian, A., "AssemblyBench: Physics-Aware Assembly of Complex Industrial Objects", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2026.
BibTeX TR2026-076 PDF Video Data Software
- @inproceedings{Li2026jun,
- author = {Li, Danrui and Zhang, Jiahao and Egger, Bernhard and Chatterjee, Moitreya and Lohit, Suhas and Marks, Tim K. and Cherian, Anoop},
- title = {{AssemblyBench: Physics-Aware Assembly of Complex Industrial Objects}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2026,
- month = jun,
- url = {https://www.merl.com/publications/TR2026-076}
- }
Mumcu, F., Jones, M.J., Cherian, A., Yilmaz, Y., "Is Video Anomaly Detection Misframed? Evidence from LLM-Based and Multi-Scene Models", arXiv, May 2026.
BibTeX arXiv
- @article{Mumcu2026may,
- author = {Mumcu, Furkan and Jones, Michael J. and Cherian, Anoop and Yilmaz, Yasin},
- title = {{Is Video Anomaly Detection Misframed? Evidence from LLM-Based and Multi-Scene Models}},
- journal = {arXiv},
- year = 2026,
- month = may,
- url = {https://arxiv.org/abs/2605.12725}
- }
Cherian, A., Corcodel, R., Jain, S., Romeres, D., "LLMPhy: Parameter-Identifiable Physical Reasoning Combining Large Language Models and Physics Engines", International Conference on Artificial Intelligence and Statistics (AISTATS), May 2026.
BibTeX TR2026-052 PDF Data Software
- @inproceedings{Cherian2026may,
- author = {Cherian, Anoop and Corcodel, Radu and Jain, Siddarth and Romeres, Diego},
- title = {{LLMPhy: Parameter-Identifiable Physical Reasoning Combining Large Language Models and Physics Engines}},
- booktitle = {International Conference on Artificial Intelligence and Statistics (AISTATS)},
- year = 2026,
- month = may,
- url = {https://www.merl.com/publications/TR2026-052}
- }
Zhang, Y., Corcodel, R., Hori, C., Cherian, A., Zhao, D., "SpinBench: 3D Rotation as a Lens on Spatial Reasoning in VLMs", International Conference on Learning Representations (ICLR) 2026, April 2026.
BibTeX TR2026-045 PDF
- @inproceedings{Zhang2026apr2,
- author = {Zhang, Yuyou and Corcodel, Radu and Hori, Chiori and Cherian, Anoop and Zhao, Ding},
- title = {{SpinBench: 3D Rotation as a Lens on Spatial Reasoning in VLMs}},
- booktitle = {International Conference on Learning Representations (ICLR) 2026},
- year = 2026,
- month = apr,
- url = {https://www.merl.com/publications/TR2026-045}
- }
Kogashi, K., Cherian, A., Kuo, M.-Y.J., "MMHOI: Modeling Complex 3D Multi-Human Multi-Object Interactions", IEEE Winter Conference on Applications of Computer Vision (WACV), March 2026, pp. 1512-1521.
BibTeX TR2026-029 PDF Video Data
- @inproceedings{Kogashi2026mar,
- author = {Kogashi, Kaen and Cherian, Anoop and Kuo, Meng-Yu Jennifer},
- title = {{MMHOI: Modeling Complex 3D Multi-Human Multi-Object Interactions}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2026,
- pages = {1512--1521},
- month = mar,
- url = {https://www.merl.com/publications/TR2026-029}
- }
Mumcu, F., Jones, M.J., Yilmaz, Y., Cherian, A., "Leveraging Multimodal LLM Descriptions of Activity for Explainable Semi-Supervised Video Anomaly Detection", Transactions on Machine Learning Research, February 2026.
BibTeX TR2026-027 PDF
- @article{Mumcu2026feb2,
- author = {Mumcu, Furkan and Jones, Michael J. and Yilmaz, Yasin and Cherian, Anoop},
- title = {{Leveraging Multimodal LLM Descriptions of Activity for Explainable Semi-Supervised Video Anomaly Detection}},
- journal = {Transactions on Machine Learning Research},
- year = 2026,
- month = feb,
- url = {https://www.merl.com/publications/TR2026-027}
- }
Mumcu, F., Bekit, L., Jones, M.J., Cherian, A., Yilmaz, Y., "Agentic AI-Empowered Dynamic Survey Framework", arXiv, February 2026.
BibTeX arXiv
- @article{Mumcu2026feb,
- author = {Mumcu, Furkan and Bekit, Lokman and Jones, Michael J. and Cherian, Anoop and Yilmaz, Yasin},
- title = {{Agentic AI-Empowered Dynamic Survey Framework}},
- journal = {arXiv},
- year = 2026,
- month = feb,
- url = {https://arxiv.org/abs/2602.04071}
- }
Zhang, Y., Corcodel, R., Hori, C., Cherian, A., Zhao, D., "AxisBench: What Can Go Wrong in VLMs’ Spatial Reasoning?", Advances in Neural Information Processing Systems (NeurIPS) workshop, December 2025.
BibTeX TR2025-168 PDF
- @inproceedings{Zhang2025dec2,
- author = {{{Zhang, Yuyou and Corcodel, Radu and Hori, Chiori and Cherian, Anoop and Zhao, Ding}}},
- title = {{{AxisBench: What Can Go Wrong in VLMs’ Spatial Reasoning?}}},
- booktitle = {Advances in Neural Information Processing Systems (NeurIPS) workshop},
- year = 2025,
- month = dec,
- url = {https://www.merl.com/publications/TR2025-168}
- }
Zhang, J., Cherian, A., Rodriguez, C., Deng, W., Gould, S., "Manual-PA: Learning 3D Part Assembly from Instruction Diagrams", IEEE International Conference on Computer Vision (ICCV), September 2025, pp. 6304-6314.
BibTeX TR2025-139 PDF
- @inproceedings{Zhang2025sep,
- author = {Zhang, Jiahao and Cherian, Anoop and Rodriguez, Cristian and Deng, Weijian and Gould, Stephen},
- title = {{Manual-PA: Learning 3D Part Assembly from Instruction Diagrams}},
- booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},
- year = 2025,
- pages = {6304--6314},
- month = sep,
- url = {https://www.merl.com/publications/TR2025-139}
- }
Ni, Y., Wen, S., Koniusz, P., Cherian, A., "Noise Consistency Regularization for Improved Subject-Driven Image Synthesis", IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPR), June 2025, pp. 3116-3126.
BibTeX TR2025-073 PDF
- @inproceedings{Ni2025jun,
- author = {Ni, Yao and Wen, Song and Koniusz, Piotr and Cherian, Anoop},
- title = {{Noise Consistency Regularization for Improved Subject-Driven Image Synthesis}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPR)},
- year = 2025,
- pages = {3116--3126},
- month = jun,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2025-073}
- }
Singh, A., Jones, M.J., Peng, K.-C., Chatterjee, M., Cherian, A., Learned-Miller, E., "Improving Open-World Object Localization by Discovering Background", CVPR Workshop on Domain Generalization: Evolution, Breakthroughs and Future Horizon, May 2025, pp. 6449-6458.
BibTeX TR2025-058 PDF
- @inproceedings{Singh2025may,
- author = {Singh, Ashish and Jones, Michael J. and Peng, Kuan-Chuan and Chatterjee, Moitreya and Cherian, Anoop and Learned-Miller, Erik},
- title = {{Improving Open-World Object Localization by Discovering Background}},
- booktitle = {CVPR Workshop on Domain Generalization: Evolution, Breakthroughs and Future Horizon},
- year = 2025,
- pages = {6449--6458},
- month = may,
- url = {https://www.merl.com/publications/TR2025-058}
- }
Mumcu, F., Jones, M.J., Yilmaz, Y., Cherian, A., "ComplexVAD: Detecting Interaction Anomalies in Video", IEEE Winter Conference on Applications of Computer Vision (WACV) Workshop, DOI: 10.1109/WACVW65960.2025.00122, February 2025.
BibTeX TR2025-016 PDF Data
- @inproceedings{Mumcu2025feb,
- author = {Mumcu, Furkan and Jones, Michael J. and Yilmaz, Yasin and Cherian, Anoop},
- title = {{ComplexVAD: Detecting Interaction Anomalies in Video}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV) Workshop},
- year = 2025,
- month = feb,
- doi = {10.1109/WACVW65960.2025.00122},
- url = {https://www.merl.com/publications/TR2025-016}
- }
He, Y., Shin, S., Cherian, A., Trigoni, N., Markham, A., "SoundLoc3D: Invisible 3D Sound Source Localization and Classification Using a Multimodal RGB-D Acoustic Camera", IEEE Winter Conference on Applications of Computer Vision (WACV), December 2024, pp. 5408-5418.
BibTeX TR2025-003 PDF
- @inproceedings{He2024dec2,
- author = {He, Yuhang and Shin, Sangyun and Cherian, Anoop and Trigoni, Niki and Markham, Andrew},
- title = {{SoundLoc3D: Invisible 3D Sound Source Localization and Classification Using a Multimodal RGB-D Acoustic Camera}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2024,
- pages = {5408--5418},
- month = dec,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2025-003}
- }
Zhang, J., Zhang, F., Rodriguez, C., Ben-Shabat, I., Cherian, A., Gould, S., "Temporally Grounding Instructional Diagrams in Unconstrained Videos", IEEE Winter Conference on Applications of Computer Vision (WACV), December 2024, pp. 8090-8100.
BibTeX TR2025-002 PDF
- @inproceedings{Zhang2024dec,
- author = {Zhang, Jiahao and Zhang, Frederic and Rodriguez, Cristian and Ben-Shabat, Itzik and Cherian, Anoop and Gould, Stephen},
- title = {{Temporally Grounding Instructional Diagrams in Unconstrained Videos}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2024,
- pages = {8090--8100},
- month = dec,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2025-002}
- }
Zhang, J., Cherian, A., Rodriguez, C., Deng, W., Gould, S., "Manual-PA: Learning 3D Part Assembly from Instruction Diagrams", arXiv, November 2024.
BibTeX arXiv
- @article{Zhang2024nov,
- author = {Zhang, Jiahao and Cherian, Anoop and Rodriguez, Cristian and Deng, Weijian and Gould, Stephen},
- title = {{Manual-PA: Learning 3D Part Assembly from Instruction Diagrams}},
- journal = {arXiv},
- year = 2024,
- month = nov,
- url = {https://arxiv.org/abs/2411.18011}
- }
Cherian, A., Corcodel, R., Jain, S., Romeres, D., "LLMPhy: Complex Physical Reasoning Using Large Language Models and World Models", arXiv, November 2024.
BibTeX arXiv
- @article{Cherian2024oct,
- author = {Cherian, Anoop and Corcodel, Radu and Jain, Siddarth and Romeres, Diego},
- title = {{LLMPhy: Complex Physical Reasoning Using Large Language Models and World Models}},
- journal = {arXiv},
- year = 2024,
- month = nov,
- url = {https://arxiv.org/abs/2411.08027}
- }
Cherian, A., Peng, K.-C., Lohit, S., Matthiesen, J., Smith, K., Tenenbaum, J.B., "Evaluating Large Vision-and-Language Models on Children’s Mathematical Olympiads", Advances in Neural Information Processing Systems (NeurIPS), November 2024, pp. 15779-15800.
BibTeX TR2024-160 PDF Video Presentation
- @inproceedings{Cherian2024nov,
- author = {Cherian, Anoop and Peng, Kuan-Chuan and Lohit, Suhas and Matthiesen, Joanna and Smith, Kevin and Tenenbaum, Joshua B.},
- title = {{Evaluating Large Vision-and-Language Models on Children’s Mathematical Olympiads}},
- booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
- year = 2024,
- pages = {15779--15800},
- month = nov,
- publisher = {NeurIPS Proceedings},
- url = {https://www.merl.com/publications/TR2024-160}
- }
Cherian, A., Jain, S., Marks, T.K., "Few-shot Transparent Instance Segmentation for Bin Picking", IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), September 2024, pp. 5009-5016.
BibTeX TR2024-127 PDF Video
- @inproceedings{Cherian2024sep,
- author = {Cherian, Anoop and Jain, Siddarth and Marks, Tim K.},
- title = {{Few-shot Transparent Instance Segmentation for Bin Picking}},
- booktitle = {2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
- year = 2024,
- pages = {5009--5016},
- month = sep,
- publisher = {IEEE},
- url = {https://www.merl.com/publications/TR2024-127}
- }
Yin, J., Luo, A., Du, Y., Cherian, A., Marks, T.K., Le Roux, J., Gan, C., "Disentangled Acoustic Fields For Multimodal Physical Scene Understanding", IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), September 2024, pp. 557-564.
BibTeX TR2024-125 PDF
- @inproceedings{Yin2024sep,
- author = {Yin, Jie and Luo, Andrew and Du, Yilun and Cherian, Anoop and Marks, Tim K. and {Le Roux}, Jonathan and Gan, Chuang},
- title = {{Disentangled Acoustic Fields For Multimodal Physical Scene Understanding}},
- booktitle = {2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
- year = 2024,
- pages = {557--564},
- month = sep,
- publisher = {IEEE},
- url = {https://www.merl.com/publications/TR2024-125}
- }
Zhang, J., Zhang, F., Rodriguez, C., Ben-Shabat, I., Cherian, A., Gould, S., "Temporally Grounding Instructional Diagrams in Unconstrained Videos", arXiv, July 2024.
BibTeX arXiv
- @article{Zhang2024jul4,
- author = {Zhang, Jiahao and Zhang, Frederic and Rodriguez, Cristian and Ben-Shabat, Itzik and Cherian, Anoop and Gould, Stephen},
- title = {{Temporally Grounding Instructional Diagrams in Unconstrained Videos}},
- journal = {arXiv},
- year = 2024,
- month = jul,
- url = {https://arxiv.org/abs/2407.12066}
- }
Ni, H., Egger, B., Lohit, S., Cherian, A., Wang, Y., Koike-Akino, T., Huang, S.X., Marks, T.K., "TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2024, pp. 9015-9025.
BibTeX TR2024-059 PDF Video Software Presentation
- @inproceedings{Ni2024jun,
- author = {Ni, Haomiao and Egger, Bernhard and Lohit, Suhas and Cherian, Anoop and Wang, Ye and Koike-Akino, Toshiaki and Huang, Sharon X. and Marks, Tim K.},
- title = {{TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2024,
- pages = {9015--9025},
- month = jun,
- url = {https://www.merl.com/publications/TR2024-059}
- }
He, Y., Cherian, A., Wichern, G., Markham, A., "Deep Neural Room Acoustics Primitive", International Conference on Machine Learning (ICML), June 2024, pp. 17842-17857.
BibTeX TR2024-072 PDF
- @inproceedings{He2024jun,
- author = {He, Yuhang and Cherian, Anoop and Wichern, Gordon and Markham, Andrew},
- title = {{Deep Neural Room Acoustics Primitive}},
- booktitle = {International Conference on Machine Learning (ICML)},
- year = 2024,
- pages = {17842--17857},
- month = jun,
- url = {https://www.merl.com/publications/TR2024-072}
- }
Yang, Z., Liu, J., Chen, P., Cherian, A., Marks, T.K., Le Roux, J., Gan, C., "RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), April 2024, pp. 16251-16261.
BibTeX TR2024-043 PDF
- @inproceedings{Yang2024apr,
- author = {Yang, Zeyuan and Liu, Jiageng and Chen, Peihao and Cherian, Anoop and Marks, Tim K. and {Le Roux}, Jonathan and Gan, Chuang},
- title = {{RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2024,
- pages = {16251--16261},
- month = apr,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2024-043}
- }