Publications

Singh, A., Jones, M.J., Peng, K.-C., Chatterjee, M., Cherian, A., Learned-Miller, E., "Improving Open-World Object Localization by Discovering Background", CVPR Workshop on Domain Generalization: Evolution, Breakthroughs and Future Horizon, May 2025, pp. 6449-6458.
BibTeX TR2025-058 PDF
- @inproceedings{Singh2025may,
- author = {Singh, Ashish and Jones, Michael J. and Peng, Kuan-Chuan and Chatterjee, Moitreya and Cherian, Anoop and Learned-Miller, Erik},
- title = {{Improving Open-World Object Localization by Discovering Background}},
- booktitle = {CVPR Workshop on Domain Generalization: Evolution, Breakthroughs and Future Horizon},
- year = 2025,
- pages = {6449--6458},
- month = may,
- url = {https://www.merl.com/publications/TR2025-058}
- }
Basu, S., Lohit, S., Brand, M., "G-RepsNet: A Lightweight Construction of Equivariant Net- works for Arbitrary Matrix Groups", Transactions on Machine Learning Research (TMLR), May 2025.
BibTeX TR2025-056 PDF Software
- @article{Basu2025may,
- author = {Basu, Sourya and Lohit, Suhas and Brand, Matthew},
- title = {{G-RepsNet: A Lightweight Construction of Equivariant Net- works for Arbitrary Matrix Groups}},
- journal = {Transactions on Machine Learning Research (TMLR)},
- year = 2025,
- month = may,
- issn = {2835-8856},
- url = {https://www.merl.com/publications/TR2025-056}
- }
Tang, H., Ellis, K., Lohit, S., Jones, M.J., Chatterjee, M., "Programmatic Video Prediction Using Large Language Models", International Conference on Learning Representations Workshops (ICLRW), April 2025.
BibTeX TR2025-049 PDF
- @inproceedings{Tang2025apr,
- author = {Tang, Hao and Ellis, Kevin and Lohit, Suhas and Jones, Michael J. and Chatterjee, Moitreya},
- title = {{Programmatic Video Prediction Using Large Language Models}},
- booktitle = {International Conference on Learning Representations Workshops (ICLRW)},
- year = 2025,
- month = apr,
- url = {https://www.merl.com/publications/TR2025-049}
- }
Hori, C., Kambara, M., Sugiura, K., Ota, K., Khurana, S., Jain, S., Corcodel, R., Jha, D.K., Romeres, D., Le Roux, J., "Interactive Robot Action Replanning using Multimodal LLM Trained from Human Demonstration Videos", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10887717, April 2025.
BibTeX TR2025-034 PDF
- @inproceedings{Hori2025mar,
- author = {Hori, Chiori and Kambara, Motonari and Sugiura, Komei and Ota, Kei and Khurana, Sameer and Jain, Siddarth and Corcodel, Radu and Jha, Devesh K. and Romeres, Diego and {Le Roux}, Jonathan},
- title = {{Interactive Robot Action Replanning using Multimodal {LLM} Trained from Human Demonstration Videos}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = mar,
- doi = {10.1109/ICASSP49660.2025.10887717},
- url = {https://www.merl.com/publications/TR2025-034}
- }
Ranade, S., Pais, G., Whitaker, R., Nascimento, J., Miraldo, P., Ramalingam, S., "SurfR: Surface Reconstruction with Multi-scale Attention", International Conference on 3D Vision (3DV), DOI: 10.1109/3DV66043.2025.00056, March 2025, pp. 556-566.
BibTeX TR2025-039 PDF Presentation
- @inproceedings{Ranade2025mar,
- author = {{{Ranade, Siddhant and Pais, Goncalo and Whitaker, Ross and Nascimento, Jacinto and Miraldo, Pedro and Ramalingam, Srikumar}}},
- title = {{{SurfR: Surface Reconstruction with Multi-scale Attention}}},
- booktitle = {International Conference on 3D Vision (3DV)},
- year = 2025,
- pages = {556--566},
- month = mar,
- doi = {10.1109/3DV66043.2025.00056},
- url = {https://www.merl.com/publications/TR2025-039}
- }
Wang, Y., Peng, K.-C., Fu, R., "Towards Zero-shot 3D Anomaly Localization", IEEE Winter Conference on Applications of Computer Vision (WACV), Biswas, S. and Averbuch-Elor, H. and Štruc, V. and Yang, Y., Eds., DOI: 10.1109/WACV61041.2025.00148, February 2025, pp. 1447-1456.
BibTeX TR2025-020 PDF Video Presentation
- @inproceedings{Wang2025feb2,
- author = {Wang, Yizhou and Peng, Kuan-Chuan and Fu, Raymond},
- title = {{Towards Zero-shot 3D Anomaly Localization}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2025,
- editor = {Biswas, S. and Averbuch-Elor, H. and Štruc, V. and Yang, Y.},
- pages = {1447--1456},
- month = feb,
- publisher = {IEEE},
- doi = {10.1109/WACV61041.2025.00148},
- issn = {2642-9381},
- isbn = {979-8-3315-1083-1},
- url = {https://www.merl.com/publications/TR2025-020}
- }
Mumcu, F., Jones, M.J., Yilmaz, Y., Cherian, A., "ComplexVAD: Detecting Interaction Anomalies in Video", IEEE Winter Conference on Applications of Computer Vision (WACV) Workshop, DOI: 10.1109/WACVW65960.2025.00122, February 2025.
BibTeX TR2025-016 PDF Data
- @inproceedings{Mumcu2025feb,
- author = {Mumcu, Furkan and Jones, Michael J. and Yilmaz, Yasin and Cherian, Anoop},
- title = {{ComplexVAD: Detecting Interaction Anomalies in Video}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV) Workshop},
- year = 2025,
- month = feb,
- doi = {10.1109/WACVW65960.2025.00122},
- url = {https://www.merl.com/publications/TR2025-016}
- }
Lohit, S., Marks, T.K., "Rotation-Equivariant Neural Networks for Cloud Removal from Satellite Images", Asilomar Conference on Signals, Systems, and Computers (ACSSC), DOI: 10.1109/IEEECONF60004.2024.10942613, January 2025, pp. 1360-1365.
BibTeX TR2025-009 PDF
- @inproceedings{Lohit2025jan,
- author = {Lohit, Suhas and Marks, Tim K.},
- title = {{Rotation-Equivariant Neural Networks for Cloud Removal from Satellite Images}},
- booktitle = {2024 58th Asilomar Conference on Signals, Systems, and Computers (ACSSC)},
- year = 2025,
- pages = {1360--1365},
- month = jan,
- publisher = {IEEE},
- doi = {10.1109/IEEECONF60004.2024.10942613},
- issn = {2576-2303},
- isbn = {979-8-3503-5405-8},
- url = {https://www.merl.com/publications/TR2025-009}
- }
He, Y., Shin, S., Cherian, A., Trigoni, N., Markham, A., "SoundLoc3D: Invisible 3D Sound Source Localization and Classification Using a Multimodal RGB-D Acoustic Camera", IEEE Winter Conference on Applications of Computer Vision (WACV), December 2024, pp. 5408-5418.
BibTeX TR2025-003 PDF
- @inproceedings{He2024dec2,
- author = {He, Yuhang and Shin, Sangyun and Cherian, Anoop and Trigoni, Niki and Markham, Andrew},
- title = {{SoundLoc3D: Invisible 3D Sound Source Localization and Classification Using a Multimodal RGB-D Acoustic Camera}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2024,
- pages = {5408--5418},
- month = dec,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2025-003}
- }
Zhang, J., Zhang, F., Rodriguez, C., Ben-Shabat, I., Cherian, A., Gould, S., "Temporally Grounding Instructional Diagrams in Unconstrained Videos", IEEE Winter Conference on Applications of Computer Vision (WACV), December 2024, pp. 8090-8100.
BibTeX TR2025-002 PDF
- @inproceedings{Zhang2024dec,
- author = {Zhang, Jiahao and Zhang, Frederic and Rodriguez, Cristian and Ben-Shabat, Itzik and Cherian, Anoop and Gould, Stephen},
- title = {{Temporally Grounding Instructional Diagrams in Unconstrained Videos}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2024,
- pages = {8090--8100},
- month = dec,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2025-002}
- }
Cherian, A., Peng, K.-C., Lohit, S., Matthiesen, J., Smith, K., Tenenbaum, J.B., "Evaluating Large Vision-and-Language Models on Children’s Mathematical Olympiads", Advances in Neural Information Processing Systems (NeurIPS), November 2024, pp. 15779-15800.
BibTeX TR2024-160 PDF Video Presentation
- @inproceedings{Cherian2024nov,
- author = {Cherian, Anoop and Peng, Kuan-Chuan and Lohit, Suhas and Matthiesen, Joanna and Smith, Kevin and Tenenbaum, Joshua B.},
- title = {{Evaluating Large Vision-and-Language Models on Children’s Mathematical Olympiads}},
- booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
- year = 2024,
- pages = {15779--15800},
- month = nov,
- publisher = {NeurIPS Proceedings},
- url = {https://www.merl.com/publications/TR2024-160}
- }
Chang, H., Boularias, A., Jain, S., "Insert-One: One-Shot Robust Visual-Force Servoing for Novel Object Insertion with 6-DoF Tracking", 2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2024), DOI: 10.1109/IROS58592.2024.10801884, October 2024, pp. 2935-2942.
BibTeX TR2024-137 PDF
- @inproceedings{Chang2024oct,
- author = {Chang, Haonan and Boularias, Abdeslam and Jain, Siddarth},
- title = {{Insert-One: One-Shot Robust Visual-Force Servoing for Novel Object Insertion with 6-DoF Tracking}},
- booktitle = {2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2024)},
- year = 2024,
- pages = {2935--2942},
- month = oct,
- publisher = {IEEE},
- doi = {10.1109/IROS58592.2024.10801884},
- issn = {2153-0866},
- isbn = {979-8-3503-7770-5},
- url = {https://www.merl.com/publications/TR2024-137}
- }
Shimane, Y., Ho, K., Weiss, A., "Autonomous Horizon-Based Optical Navigation on Near-Planar Cislunar Libration Point Orbits", 4th Space Imaging Workshop, October 2024, pp. SIW24-27.
BibTeX TR2024-139 PDF
- @inproceedings{Shimane2024oct,
- author = {Shimane, Yuri and Ho, Koki and Weiss, Avishai},
- title = {{Autonomous Horizon-Based Optical Navigation on Near-Planar Cislunar Libration Point Orbits}},
- booktitle = {4th Space Imaging Workshop},
- year = 2024,
- pages = {SIW24--27},
- month = oct,
- url = {https://www.merl.com/publications/TR2024-139}
- }
Ota, K., Jha, D.K., Jain, S., Yerazunis, W.S., Corcodel, R., Shukla, Y., Bronars, A., Romeres, D., "Autonomous Robotic Assembly: From Part Singulation to Precise Assembly", IEEE/RSJ International Conference on Intelligent Robots and Systems., DOI: 10.1109/IROS58592.2024.10802423, October 2024, pp. 13525-13532.
BibTeX TR2024-133 PDF
- @inproceedings{Ota2024oct,
- author = {Ota, Kei and Jha, Devesh K. and Jain, Siddarth and Yerazunis, William S. and Corcodel, Radu and Shukla, Yash and Bronars, Antonia and Romeres, Diego},
- title = {{Autonomous Robotic Assembly: From Part Singulation to Precise Assembly}},
- booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems.},
- year = 2024,
- pages = {13525--13532},
- month = oct,
- publisher = {IEEE},
- doi = {10.1109/IROS58592.2024.10802423},
- issn = {2153-0866},
- url = {https://www.merl.com/publications/TR2024-133}
- }
Hegde, D., Lohit, S., Peng, K.-C., Jones, M.J., Patel, V.M., "Equivariant Spatio-Temporal Self-Supervision for LiDAR Object Detection", European Conference on Computer Vision (ECCV), Leonardis, A. and Ricci, E. and Roth, S. and Russakovsky, O. and Sattler, T. and Varol, G., Eds., DOI: 10.1007/978-3-031-73347-5_27, September 2024, pp. 475-491.
BibTeX TR2024-130 PDF Video Presentation
- @inproceedings{Hegde2024sep,
- author = {Hegde, Deepti and Lohit, Suhas and Peng, Kuan-Chuan and Jones, Michael J. and Patel, Vishal M.},
- title = {{Equivariant Spatio-Temporal Self-Supervision for LiDAR Object Detection}},
- booktitle = {European Conference on Computer Vision (ECCV)},
- year = 2024,
- editor = {Leonardis, A. and Ricci, E. and Roth, S. and Russakovsky, O. and Sattler, T. and Varol, G.},
- pages = {475--491},
- month = sep,
- publisher = {Springer},
- doi = {10.1007/978-3-031-73347-5_27},
- issn = {0302-9743},
- isbn = {978-3-031-73346-8},
- url = {https://www.merl.com/publications/TR2024-130}
- }
Pais, G., Piedade, V., Chatterjee, M., Greiff, M., Miraldo, P., "A Probability-guided Sampler for Neural Implicit Surface Rendering", European Conference on Computer Vision (ECCV), Leonardis, A. and Ricci, E. and Roth, S. and Russakovsky, O., Sattler, T. and Varol, G., Eds., DOI: 10.1007/978-3-031-72913-3_10, September 2024, pp. 164-182.
BibTeX TR2024-129 PDF Video
- @inproceedings{Pais2024sep,
- author = {Pais, Goncalo and Piedade, Valter and Chatterjee, Moitreya and Greiff, Marcus and Miraldo, Pedro},
- title = {{A Probability-guided Sampler for Neural Implicit Surface Rendering}},
- booktitle = {European Conference on Computer Vision (ECCV)},
- year = 2024,
- editor = {Leonardis, A. and Ricci, E. and Roth, S. and Russakovsky, O., Sattler, T. and Varol, G.},
- pages = {164--182},
- month = sep,
- publisher = {Springer, Cham},
- doi = {10.1007/978-3-031-72913-3_10},
- isbn = {978-3-031-72913-3},
- url = {https://www.merl.com/publications/TR2024-129}
- }
Cherian, A., Jain, S., Marks, T.K., "Few-shot Transparent Instance Segmentation for Bin Picking", IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), September 2024, pp. 5009-5016.
BibTeX TR2024-127 PDF Video
- @inproceedings{Cherian2024sep,
- author = {Cherian, Anoop and Jain, Siddarth and Marks, Tim K.},
- title = {{Few-shot Transparent Instance Segmentation for Bin Picking}},
- booktitle = {2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
- year = 2024,
- pages = {5009--5016},
- month = sep,
- publisher = {IEEE},
- url = {https://www.merl.com/publications/TR2024-127}
- }
Yin, J., Luo, A., Du, Y., Cherian, A., Marks, T.K., Le Roux, J., Gan, C., "Disentangled Acoustic Fields For Multimodal Physical Scene Understanding", IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), September 2024, pp. 557-564.
BibTeX TR2024-125 PDF
- @inproceedings{Yin2024sep,
- author = {Yin, Jie and Luo, Andrew and Du, Yilun and Cherian, Anoop and Marks, Tim K. and {Le Roux}, Jonathan and Gan, Chuang},
- title = {{Disentangled Acoustic Fields For Multimodal Physical Scene Understanding}},
- booktitle = {2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
- year = 2024,
- pages = {557--564},
- month = sep,
- publisher = {IEEE},
- url = {https://www.merl.com/publications/TR2024-125}
- }
Roque, P., Miraldo, P., Dimarogonas, D., "Multi-Agent Formation Control using Epipolar Constraints", IEEE Robotics and Automation Letters, DOI: 10.1109/LRA.2024.3444690, Vol. 9, No. 12, pp. 11002-11009, September 2024.
BibTeX TR2024-147 PDF
- @article{Roque2024sep,
- author = {Roque, Pedro and Miraldo, Pedro and Dimarogonas, Dimos},
- title = {{Multi-Agent Formation Control using Epipolar Constraints}},
- journal = {IEEE Robotics and Automation Letters},
- year = 2024,
- volume = 9,
- number = 12,
- pages = {11002--11009},
- month = sep,
- doi = {10.1109/LRA.2024.3444690},
- issn = {2377-3766},
- url = {https://www.merl.com/publications/TR2024-147}
- }
Ho, C.-H., Peng, K.-C., Vasconcelos, N., "Long-Tailed Anomaly Detection with Learnable Class Names", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Farhadi, A. and Crandall, D. and Sato, I. and Wu, J. and Pless, R. and Akata, Z., Eds., DOI: 10.1109/CVPR52733.2024.01182, June 2024, pp. 12435-12446.
BibTeX TR2024-040 PDF Video Data Presentation
- @inproceedings{Ho2024jun,
- author = {Ho, Chih-Hui and Peng, Kuan-Chuan and Vasconcelos, Nuno},
- title = {{Long-Tailed Anomaly Detection with Learnable Class Names}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2024,
- editor = {Farhadi, A. and Crandall, D. and Sato, I. and Wu, J. and Pless, R. and Akata, Z.},
- pages = {12435--12446},
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/CVPR52733.2024.01182},
- issn = {2575-7075},
- isbn = {979-8-3503-5300-6},
- url = {https://www.merl.com/publications/TR2024-040}
- }
Ni, H., Egger, B., Lohit, S., Cherian, A., Wang, Y., Koike-Akino, T., Huang, S.X., Marks, T.K., "TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2024, pp. 9015-9025.
BibTeX TR2024-059 PDF Video Software Presentation
- @inproceedings{Ni2024jun,
- author = {Ni, Haomiao and Egger, Bernhard and Lohit, Suhas and Cherian, Anoop and Wang, Ye and Koike-Akino, Toshiaki and Huang, Sharon X. and Marks, Tim K.},
- title = {{TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2024,
- pages = {9015--9025},
- month = jun,
- url = {https://www.merl.com/publications/TR2024-059}
- }
Dey, R., Egger, B., Boddeti, V., Wang, Y., Marks, T.K., "CoLa-SDF: Controllable Latent StyleSDF for Disentangled 3D Face Generation", IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), DOI: 10.1109/CVPRW63382.2024.00291, June 2024, pp. 2852-2861.
BibTeX TR2024-045 PDF
- @inproceedings{Dey2024apr,
- author = {Dey, Rahul and Egger, Bernhard and Boddeti, Vishnu and Wang, Ye and Marks, Tim K.},
- title = {{CoLa-SDF: Controllable Latent StyleSDF for Disentangled 3D Face Generation}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)},
- year = 2024,
- pages = {2852--2861},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/CVPRW63382.2024.00291},
- isbn = {979-8-3503-6547-4},
- url = {https://www.merl.com/publications/TR2024-045}
- }
He, Y., Cherian, A., Wichern, G., Markham, A., "Deep Neural Room Acoustics Primitive", International Conference on Machine Learning (ICML), June 2024, pp. 17842-17857.
BibTeX TR2024-072 PDF
- @inproceedings{He2024jun,
- author = {He, Yuhang and Cherian, Anoop and Wichern, Gordon and Markham, Andrew},
- title = {{Deep Neural Room Acoustics Primitive}},
- booktitle = {International Conference on Machine Learning (ICML)},
- year = 2024,
- pages = {17842--17857},
- month = jun,
- url = {https://www.merl.com/publications/TR2024-072}
- }
Kambara, M., Hori, C., Sugiura, K., Ota, K., Jha, D.K., Khurana, S., Jain, S., Corcodel, R., Romeres, D., Le Roux, J., "Human Action Understanding-based Robot Planning using Multimodal LLM", IEEE International Conference on Robotics and Automation (ICRA), June 2024.
BibTeX TR2024-066 PDF
- @inproceedings{Kambara2024jun,
- author = {Kambara, Motonari and Hori, Chiori and Sugiura, Komei and Ota, Kei and Jha, Devesh K. and Khurana, Sameer and Jain, Siddarth and Corcodel, Radu and Romeres, Diego and {Le Roux}, Jonathan},
- title = {{Human Action Understanding-based Robot Planning using Multimodal LLM}},
- booktitle = {IEEE International Conference on Robotics and Automation (ICRA) Workshop},
- year = 2024,
- month = jun,
- url = {https://www.merl.com/publications/TR2024-066}
- }
Liu, X., Tai, Y.-W., Tang, C.-K., Miraldo, P., Lohit, S., Chatterjee, M., "Gear-NeRF: Free-Viewpoint Rendering and Tracking with Motion-aware Spatio-Temporal Sampling", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2024, pp. 19667-19679.
BibTeX TR2024-042 PDF Videos Software
- @inproceedings{Liu2024may,
- author = {Liu, Xinhang and Tai, Yu-wing and Tang, Chi-Keung and Miraldo, Pedro and Lohit, Suhas and Chatterjee, Moitreya},
- title = {{Gear-NeRF: Free-Viewpoint Rendering and Tracking with Motion-aware Spatio-Temporal Sampling}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2024,
- pages = {19667--19679},
- month = may,
- publisher = {IEEE},
- url = {https://www.merl.com/publications/TR2024-042}
- }