Publications

Wang, J.-W., Nikovski, D.N., "State Representation Learning for Visual Servo Control", European Control Conference (ECC), June 2025.
BibTeX TR2025-094 PDF
- @inproceedings{Wang2025jun,
- author = {Wang, Jen-Wei and Nikovski, Daniel N.},
- title = {{State Representation Learning for Visual Servo Control}},
- booktitle = {European Control Conference (ECC)},
- year = 2025,
- month = jun,
- url = {https://www.merl.com/publications/TR2025-094}
- }
Jung, Y.G., Park, J., Yoon, J., Peng, K.-C., Kim, W., Teoh, A.B.J., Camps, O., "TailedCore: Few-Shot Sampling for Unsupervised Long-Tail Noisy Anomaly Detection", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2025.
BibTeX TR2025-077 PDF Video Presentation
- @inproceedings{Jung2025jun,
- author = {{{Jung, Yoon G. and Park, Jaewoo and Yoon, Jaeho and Peng, Kuan-Chuan and Kim, Wonchul and Teoh, Andrew B. J. and Camps, Octavia}}},
- title = {{{TailedCore: Few-Shot Sampling for Unsupervised Long-Tail Noisy Anomaly Detection}}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2025,
- month = jun,
- url = {https://www.merl.com/publications/TR2025-077}
- }
Lai, Y.-H., Ebbers, J., Wang, Y.-C.F., Germain, F.G., Jones, M.J., Chatterjee, M., "UWAV: Uncertainty-weighted Weakly-supervised Audio-Visual Video Parsing", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2025.
BibTeX TR2025-072 PDF
- @inproceedings{Lai2025jun,
- author = {Lai, Yung-Hsuan and Ebbers, Janek and Wang, Yu-Chiang Frank and Germain, François G and Jones, Michael J. and Chatterjee, Moitreya},
- title = {{UWAV: Uncertainty-weighted Weakly-supervised Audio-Visual Video Parsing}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2025,
- month = jun,
- url = {https://www.merl.com/publications/TR2025-072}
- }
Hegde, D., Lohit, S., Peng, K.-C., Jones, M.J., Patel, V.M., "Multimodal 3D Object Detection on Unseen Domains", IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshop, June 2025.
BibTeX TR2025-078 PDF
- @inproceedings{Hegde2025jun,
- author = {Hegde, Deepti and Lohit, Suhas and Peng, Kuan-Chuan and Jones, Michael J. and Patel, Vishal M.},
- title = {{Multimodal 3D Object Detection on Unseen Domains}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshop},
- year = 2025,
- month = jun,
- url = {https://www.merl.com/publications/TR2025-078}
- }
Li, K., Zhang, T., Peng, K.-C., Wang, G., "PF3Det: A Prompted Foundation Feature Assisted Visual LiDAR 3D Detector", IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshop, June 2025.
BibTeX TR2025-076 PDF Presentation
- @inproceedings{Li2025jun,
- author = {{{Li, Kaidong and Zhang, Tianxiao and Peng, Kuan-Chuan and Wang, Guanghui}}},
- title = {{{PF3Det: A Prompted Foundation Feature Assisted Visual LiDAR 3D Detector}}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshop},
- year = 2025,
- month = jun,
- url = {https://www.merl.com/publications/TR2025-076}
- }
Sawada, N., Miraldo, P., Lohit, S., Marks, T.K., Chatterjee, M., "FreBIS: Frequency-Based Stratification for Neural Implicit Surface Representations", IEEE Conference on Computer Vision and Pattern Recognition Workshop (CVPR), June 2025.
BibTeX TR2025-074 PDF
- @inproceedings{Sawada2025jun,
- author = {Sawada, Naoko and Miraldo, Pedro and Lohit, Suhas and Marks, Tim K. and Chatterjee, Moitreya},
- title = {{FreBIS: Frequency-Based Stratification for Neural Implicit Surface Representations}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshop (CVPR)},
- year = 2025,
- month = jun,
- url = {https://www.merl.com/publications/TR2025-074}
- }
Ni, Y., Wen, S., Koniusz, P., Cherian, A., "Noise Consistency Regularization for Improved Subject-Driven Image Synthesis", IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPR), June 2025, pp. 3116-3126.
BibTeX TR2025-073 PDF
- @inproceedings{Ni2025jun,
- author = {Ni, Yao and Wen, Song and Koniusz, Piotr and Cherian, Anoop},
- title = {{Noise Consistency Regularization for Improved Subject-Driven Image Synthesis}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPR)},
- year = 2025,
- pages = {3116--3126},
- month = jun,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2025-073}
- }
Lu, K., Ma, C., Hori, C., Romeres, D., "KitchenVLA: Iterative Vision-Language Corrections for Robotic Execution of Human Tasks", IEEE International Conference on Robotics and Automation Workshop on Safely Leveraging Vision-Language Foundation Models in Robotics (SafeLVMs@ICRA), May 2025.
BibTeX TR2025-068 PDF
- @inproceedings{Lu2025may,
- author = {Lu, Kai and Ma, Chenyang and Hori, Chiori and Romeres, Diego},
- title = {{KitchenVLA: Iterative Vision-Language Corrections for Robotic Execution of Human Tasks}},
- booktitle = {IEEE International Conference on Robotics and Automation Workshop on Safely Leveraging Vision-Language Foundation Models in Robotics (SafeLVMs@ICRA)},
- year = 2025,
- month = may,
- url = {https://www.merl.com/publications/TR2025-068}
- }
Singh, A., Jones, M.J., Peng, K.-C., Chatterjee, M., Cherian, A., Learned-Miller, E., "Improving Open-World Object Localization by Discovering Background", CVPR Workshop on Domain Generalization: Evolution, Breakthroughs and Future Horizon, May 2025.
BibTeX TR2025-058 PDF
- @inproceedings{Singh2025may,
- author = {Singh, Ashish and Jones, Michael J. and Peng, Kuan-Chuan and Chatterjee, Moitreya and Cherian, Anoop and Learned-Miller, Erik},
- title = {{Improving Open-World Object Localization by Discovering Background}},
- booktitle = {CVPR Workshop on Domain Generalization: Evolution, Breakthroughs and Future Horizon},
- year = 2025,
- month = may,
- url = {https://www.merl.com/publications/TR2025-058}
- }
Basu, S., Lohit, S., Brand, M., "G-RepsNet: A Lightweight Construction of Equivariant Net- works for Arbitrary Matrix Groups", Transactions on Machine Learning Research (TMLR), May 2025.
BibTeX TR2025-056 PDF Software
- @article{Basu2025may,
- author = {Basu, Sourya and Lohit, Suhas and Brand, Matthew},
- title = {{G-RepsNet: A Lightweight Construction of Equivariant Net- works for Arbitrary Matrix Groups}},
- journal = {Transactions on Machine Learning Research (TMLR)},
- year = 2025,
- month = may,
- url = {https://www.merl.com/publications/TR2025-056}
- }
Tang, H., Ellis, K., Lohit, S., Jones, M.J., Chatterjee, M., "Programmatic Video Prediction Using Large Language Models", International Conference on Learning Representations Workshops (ICLRW), April 2025.
BibTeX TR2025-049 PDF
- @inproceedings{Tang2025apr,
- author = {Tang, Hao and Ellis, Kevin and Lohit, Suhas and Jones, Michael J. and Chatterjee, Moitreya},
- title = {{Programmatic Video Prediction Using Large Language Models}},
- booktitle = {International Conference on Learning Representations Workshops (ICLRW)},
- year = 2025,
- month = apr,
- url = {https://www.merl.com/publications/TR2025-049}
- }
Hori, C., Kambara, M., Sugiura, K., Ota, K., Khurana, S., Jain, S., Corcodel, R., Jha, D.K., Romeres, D., Le Roux, J., "Interactive Robot Action Replanning using Multimodal LLM Trained from Human Demonstration Videos", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10887717, April 2025.
BibTeX TR2025-034 PDF
- @inproceedings{Hori2025mar,
- author = {Hori, Chiori and Kambara, Motonari and Sugiura, Komei and Ota, Kei and Khurana, Sameer and Jain, Siddarth and Corcodel, Radu and Jha, Devesh K. and Romeres, Diego and {Le Roux}, Jonathan},
- title = {{Interactive Robot Action Replanning using Multimodal {LLM} Trained from Human Demonstration Videos}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = mar,
- doi = {10.1109/ICASSP49660.2025.10887717},
- url = {https://www.merl.com/publications/TR2025-034}
- }
Ranade, S., Pais, G., Whitaker, R., Nascimento, J., Miraldo, P., Ramalingam, S., "SurfR: Surface Reconstruction with Multi-scale Attention", International Conference on 3D Vision (3DV), March 2025.
BibTeX TR2025-039 PDF Presentation
- @inproceedings{Ranade2025mar,
- author = {{{Ranade, Siddhant and Pais, Goncalo and Whitaker, Ross and Nascimento, Jacinto and Miraldo, Pedro and Ramalingam, Srikumar}}},
- title = {{{SurfR: Surface Reconstruction with Multi-scale Attention}}},
- booktitle = {International Conference on 3D Vision (3DV)},
- year = 2025,
- month = mar,
- url = {https://www.merl.com/publications/TR2025-039}
- }
Wang, Y., Peng, K.-C., Fu, R., "Towards Zero-shot 3D Anomaly Localization", IEEE Winter Conference on Applications of Computer Vision (WACV), Biswas, S. and Averbuch-Elor, H. and Štruc, V. and Yang, Y., Eds., DOI: 10.1109/WACV61041.2025.00148, February 2025, pp. 1447-1456.
BibTeX TR2025-020 PDF Video Presentation
- @inproceedings{Wang2025feb2,
- author = {Wang, Yizhou and Peng, Kuan-Chuan and Fu, Raymond},
- title = {{Towards Zero-shot 3D Anomaly Localization}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2025,
- editor = {Biswas, S. and Averbuch-Elor, H. and Štruc, V. and Yang, Y.},
- pages = {1447--1456},
- month = feb,
- publisher = {IEEE},
- doi = {10.1109/WACV61041.2025.00148},
- issn = {2642-9381},
- isbn = {979-8-3315-1083-1},
- url = {https://www.merl.com/publications/TR2025-020}
- }
Mumcu, F., Jones, M.J., Yilmaz, Y., Cherian, A., "ComplexVAD: Detecting Interaction Anomalies in Video", IEEE Winter Conference on Applications of Computer Vision (WACV) Workshop, February 2025.
BibTeX TR2025-016 PDF Data
- @inproceedings{Mumcu2025feb,
- author = {Mumcu, Furkan and Jones, Michael J. and Yilmaz, Yasin and Cherian, Anoop},
- title = {{ComplexVAD: Detecting Interaction Anomalies in Video}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV) Workshop},
- year = 2025,
- month = feb,
- url = {https://www.merl.com/publications/TR2025-016}
- }
Lohit, S., Marks, T.K., "Rotation-Equivariant Neural Networks for Cloud Removal from Satellite Images", Asilomar Conference on Signals, Systems, and Computers (ACSSC), DOI: 10.1109/IEEECONF60004.2024.10942613, January 2025, pp. 1360-1365.
BibTeX TR2025-009 PDF
- @inproceedings{Lohit2025jan,
- author = {Lohit, Suhas and Marks, Tim K.},
- title = {{Rotation-Equivariant Neural Networks for Cloud Removal from Satellite Images}},
- booktitle = {2024 58th Asilomar Conference on Signals, Systems, and Computers (ACSSC)},
- year = 2025,
- pages = {1360--1365},
- month = jan,
- publisher = {IEEE},
- doi = {10.1109/IEEECONF60004.2024.10942613},
- issn = {2576-2303},
- isbn = {979-8-3503-5405-8},
- url = {https://www.merl.com/publications/TR2025-009}
- }
He, Y., Shin, S., Cherian, A., Trigoni, N., Markham, A., "SoundLoc3D: Invisible 3D Sound Source Localization and Classification Using a Multimodal RGB-D Acoustic Camera", IEEE Winter Conference on Applications of Computer Vision (WACV), December 2024, pp. 5408-5418.
BibTeX TR2025-003 PDF
- @inproceedings{He2024dec2,
- author = {He, Yuhang and Shin, Sangyun and Cherian, Anoop and Trigoni, Niki and Markham, Andrew},
- title = {{SoundLoc3D: Invisible 3D Sound Source Localization and Classification Using a Multimodal RGB-D Acoustic Camera}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2024,
- pages = {5408--5418},
- month = dec,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2025-003}
- }
Zhang, J., Zhang, F., Rodriguez, C., Ben-Shabat, I., Cherian, A., Gould, S., "Temporally Grounding Instructional Diagrams in Unconstrained Videos", IEEE Winter Conference on Applications of Computer Vision (WACV), December 2024, pp. 8090-8100.
BibTeX TR2025-002 PDF
- @inproceedings{Zhang2024dec,
- author = {Zhang, Jiahao and Zhang, Frederic and Rodriguez, Cristian and Ben-Shabat, Itzik and Cherian, Anoop and Gould, Stephen},
- title = {{Temporally Grounding Instructional Diagrams in Unconstrained Videos}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2024,
- pages = {8090--8100},
- month = dec,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2025-002}
- }
Cherian, A., Peng, K.-C., Lohit, S., Matthiesen, J., Smith, K., Tenenbaum, J.B., "Evaluating Large Vision-and-Language Models on Children’s Mathematical Olympiads", Advances in Neural Information Processing Systems (NeurIPS), November 2024, pp. 15779-15800.
BibTeX TR2024-160 PDF Video Presentation
- @inproceedings{Cherian2024nov,
- author = {Cherian, Anoop and Peng, Kuan-Chuan and Lohit, Suhas and Matthiesen, Joanna and Smith, Kevin and Tenenbaum, Joshua B.},
- title = {{Evaluating Large Vision-and-Language Models on Children’s Mathematical Olympiads}},
- booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
- year = 2024,
- pages = {15779--15800},
- month = nov,
- publisher = {NeurIPS Proceedings},
- url = {https://www.merl.com/publications/TR2024-160}
- }
Chang, H., Boularias, A., Jain, S., "Insert-One: One-Shot Robust Visual-Force Servoing for Novel Object Insertion with 6-DoF Tracking", 2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2024), DOI: 10.1109/IROS58592.2024.10801884, October 2024, pp. 2935-2942.
BibTeX TR2024-137 PDF
- @inproceedings{Chang2024oct,
- author = {Chang, Haonan and Boularias, Abdeslam and Jain, Siddarth},
- title = {{Insert-One: One-Shot Robust Visual-Force Servoing for Novel Object Insertion with 6-DoF Tracking}},
- booktitle = {2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2024)},
- year = 2024,
- pages = {2935--2942},
- month = oct,
- publisher = {IEEE},
- doi = {10.1109/IROS58592.2024.10801884},
- issn = {2153-0866},
- isbn = {979-8-3503-7770-5},
- url = {https://www.merl.com/publications/TR2024-137}
- }
Shimane, Y., Ho, K., Weiss, A., "Autonomous Horizon-Based Optical Navigation on Near-Planar Cislunar Libration Point Orbits", 4th Space Imaging Workshop, October 2024, pp. SIW24-27.
BibTeX TR2024-139 PDF
- @inproceedings{Shimane2024oct,
- author = {Shimane, Yuri and Ho, Koki and Weiss, Avishai},
- title = {{Autonomous Horizon-Based Optical Navigation on Near-Planar Cislunar Libration Point Orbits}},
- booktitle = {4th Space Imaging Workshop},
- year = 2024,
- pages = {SIW24--27},
- month = oct,
- url = {https://www.merl.com/publications/TR2024-139}
- }
Ota, K., Jha, D.K., Jain, S., Yerazunis, W.S., Corcodel, R., Shukla, Y., Bronars, A., Romeres, D., "Autonomous Robotic Assembly: From Part Singulation to Precise Assembly", IEEE/RSJ International Conference on Intelligent Robots and Systems., DOI: 10.1109/IROS58592.2024.10802423, October 2024, pp. 13525-13532.
BibTeX TR2024-133 PDF
- @inproceedings{Ota2024oct,
- author = {Ota, Kei and Jha, Devesh K. and Jain, Siddarth and Yerazunis, William S. and Corcodel, Radu and Shukla, Yash and Bronars, Antonia and Romeres, Diego},
- title = {{Autonomous Robotic Assembly: From Part Singulation to Precise Assembly}},
- booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems.},
- year = 2024,
- pages = {13525--13532},
- month = oct,
- publisher = {IEEE},
- doi = {10.1109/IROS58592.2024.10802423},
- issn = {2153-0866},
- url = {https://www.merl.com/publications/TR2024-133}
- }
Hegde, D., Lohit, S., Peng, K.-C., Jones, M.J., Patel, V.M., "Equivariant Spatio-Temporal Self-Supervision for LiDAR Object Detection", European Conference on Computer Vision (ECCV), Leonardis, A. and Ricci, E. and Roth, S. and Russakovsky, O. and Sattler, T. and Varol, G., Eds., DOI: 10.1007/978-3-031-73347-5_27, September 2024, pp. 475-491.
BibTeX TR2024-130 PDF Video Presentation
- @inproceedings{Hegde2024sep,
- author = {Hegde, Deepti and Lohit, Suhas and Peng, Kuan-Chuan and Jones, Michael J. and Patel, Vishal M.},
- title = {{Equivariant Spatio-Temporal Self-Supervision for LiDAR Object Detection}},
- booktitle = {European Conference on Computer Vision (ECCV)},
- year = 2024,
- editor = {Leonardis, A. and Ricci, E. and Roth, S. and Russakovsky, O. and Sattler, T. and Varol, G.},
- pages = {475--491},
- month = sep,
- publisher = {Springer},
- doi = {10.1007/978-3-031-73347-5_27},
- issn = {0302-9743},
- isbn = {978-3-031-73346-8},
- url = {https://www.merl.com/publications/TR2024-130}
- }
Pais, G., Piedade, V., Chatterjee, M., Greiff, M., Miraldo, P., "A Probability-guided Sampler for Neural Implicit Surface Rendering", European Conference on Computer Vision (ECCV), Leonardis, A. and Ricci, E. and Roth, S. and Russakovsky, O., Sattler, T. and Varol, G., Eds., DOI: 10.1007/978-3-031-72913-3_10, September 2024, pp. 164-182.
BibTeX TR2024-129 PDF Video
- @inproceedings{Pais2024sep,
- author = {Pais, Goncalo and Piedade, Valter and Chatterjee, Moitreya and Greiff, Marcus and Miraldo, Pedro},
- title = {{A Probability-guided Sampler for Neural Implicit Surface Rendering}},
- booktitle = {European Conference on Computer Vision (ECCV)},
- year = 2024,
- editor = {Leonardis, A. and Ricci, E. and Roth, S. and Russakovsky, O., Sattler, T. and Varol, G.},
- pages = {164--182},
- month = sep,
- publisher = {Springer, Cham},
- doi = {10.1007/978-3-031-72913-3_10},
- isbn = {978-3-031-72913-3},
- url = {https://www.merl.com/publications/TR2024-129}
- }
Cherian, A., Jain, S., Marks, T.K., "Few-shot Transparent Instance Segmentation for Bin Picking", IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), September 2024, pp. 5009-5016.
BibTeX TR2024-127 PDF Video
- @inproceedings{Cherian2024sep,
- author = {Cherian, Anoop and Jain, Siddarth and Marks, Tim K.},
- title = {{Few-shot Transparent Instance Segmentation for Bin Picking}},
- booktitle = {2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
- year = 2024,
- pages = {5009--5016},
- month = sep,
- publisher = {IEEE},
- url = {https://www.merl.com/publications/TR2024-127}
- }