Publications

606 / 3,737 publications found.


  •  Cherian, A., Peng, K.-C., Lohit, S., Matthiesen, J., Smith, K., Tenenbaum, J.B., "Evaluating Large Vision-and-Language Models on Children’s Mathematical Olympiads", Advances in Neural Information Processing Systems (NeurIPS), November 2024.
    BibTeX TR2024-160 PDF Presentation
    • @inproceedings{Cherian2024nov,
    • author = {{Cherian, Anoop and Peng, Kuan-Chuan and Lohit, Suhas and Matthiesen, Joanna and Smith, Kevin and Tenenbaum, Joshua B.}},
    • title = {Evaluating Large Vision-and-Language Models on Children’s Mathematical Olympiads},
    • booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
    • year = 2024,
    • month = nov,
    • url = {https://www.merl.com/publications/TR2024-160}
    • }
  •  Chang, H., Boularias, A., Jain, S., "Insert-One: One-Shot Robust Visual-Force Servoing for Novel Object Insertion with 6-DoF Tracking", 2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2024), October 2024.
    BibTeX TR2024-137 PDF
    • @inproceedings{Chang2024oct,
    • author = {Chang, Haonan and Boularias, Abdeslam and Jain, Siddarth}},
    • title = {Insert-One: One-Shot Robust Visual-Force Servoing for Novel Object Insertion with 6-DoF Tracking},
    • booktitle = {2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2024)},
    • year = 2024,
    • month = oct,
    • url = {https://www.merl.com/publications/TR2024-137}
    • }
  •  Shimane, Y., Ho, K., Weiss, A., "Autonomous Horizon-Based Optical Navigation on Near-Planar Cislunar Libration Point Orbits", 4th Space Imaging Workshop, October 2024.
    BibTeX TR2024-139 PDF
    • @inproceedings{Shimane2024oct,
    • author = {Shimane, Yuri and Ho, Koki and Weiss, Avishai}},
    • title = {Autonomous Horizon-Based Optical Navigation on Near-Planar Cislunar Libration Point Orbits},
    • booktitle = {4th Space Imaging Workshop},
    • year = 2024,
    • month = oct,
    • url = {https://www.merl.com/publications/TR2024-139}
    • }
  •  Ota, K., Jha, D.K., Jain, S., Yerazunis, W.S., Corcodel, R., Shukla, Y., Bronars, A., Romeres, D., "Autonomous Robotic Assembly: From Part Singulation to Precise Assembly", IEEE/RSJ International Conference on Intelligent Robots and Systems., October 2024.
    BibTeX TR2024-133 PDF
    • @inproceedings{Ota2024oct,
    • author = {Ota, Kei and Jha, Devesh K. and Jain, Siddarth and Yerazunis, William S. and Corcodel, Radu and Shukla, Yash and Bronars, Antonia and Romeres, Diego}},
    • title = {Autonomous Robotic Assembly: From Part Singulation to Precise Assembly},
    • booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems.},
    • year = 2024,
    • month = oct,
    • url = {https://www.merl.com/publications/TR2024-133}
    • }
  •  Hegde, D., Lohit, S., Peng, K.-C., Jones, M.J., Patel, V.M., "Equivariant Spatio-Temporal Self-Supervision for LiDAR Object Detection", European Conference on Computer Vision (ECCV), Leonardis, A. and Ricci, E. and Roth, S. and Russakovsky, O. and Sattler, T. and Varol, G., Eds., DOI: 10.1007/​978-3-031-73347-5_27, September 2024, pp. 475-491.
    BibTeX TR2024-130 PDF Video Presentation
    • @inproceedings{Hegde2024sep,
    • author = {{Hegde, Deepti and Lohit, Suhas and Peng, Kuan-Chuan and Jones, Michael J. and Patel, Vishal M.}},
    • title = {Equivariant Spatio-Temporal Self-Supervision for LiDAR Object Detection},
    • booktitle = {European Conference on Computer Vision (ECCV)},
    • year = 2024,
    • editor = {Leonardis, A. and Ricci, E. and Roth, S. and Russakovsky, O. and Sattler, T. and Varol, G.},
    • pages = {475--491},
    • month = sep,
    • publisher = {Springer},
    • doi = {10.1007/978-3-031-73347-5_27},
    • issn = {0302-9743},
    • isbn = {978-3-031-73346-8},
    • url = {https://www.merl.com/publications/TR2024-130}
    • }
  •  Pais, G., Piedade, V., Chatterjee, M., Greiff, M., Miraldo, P., "A Probability-guided Sampler for Neural Implicit Surface Rendering", European Conference on Computer Vision (ECCV), September 2024.
    BibTeX TR2024-129 PDF
    • @inproceedings{Pais2024sep,
    • author = {Pais, Goncalo and Piedade, Valter and Chatterjee, Moitreya and Greiff, Marcus and Miraldo, Pedro}},
    • title = {A Probability-guided Sampler for Neural Implicit Surface Rendering},
    • booktitle = {European Conference on Computer Vision (ECCV)},
    • year = 2024,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2024-129}
    • }
  •  Cherian, A., Jain, S., Marks, T.K., "Few-shot Transparent Instance Segmentation for Bin Picking", IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), September 2024.
    BibTeX TR2024-127 PDF
    • @inproceedings{Cherian2024sep,
    • author = {Cherian, Anoop and Jain, Siddarth and Marks, Tim K.}},
    • title = {Few-shot Transparent Instance Segmentation for Bin Picking},
    • booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
    • year = 2024,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2024-127}
    • }
  •  Yin, J., Luo, A., Du, Y., Cherian, A., Marks, T.K., Le Roux, J., Gan, C., "Disentangled Acoustic Fields For Multimodal Physical Scene Understanding", IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), September 2024.
    BibTeX TR2024-125 PDF
    • @inproceedings{Yin2024sep,
    • author = {Yin, Jie and Luo, Andrew and Du, Yilun and Cherian, Anoop and Marks, Tim K. and Le Roux, Jonathan and Gan, Chuang}},
    • title = {Disentangled Acoustic Fields For Multimodal Physical Scene Understanding},
    • booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
    • year = 2024,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2024-125}
    • }
  •  Roque, P., Miraldo, P., Dimarogonas, D., "Multi-Agent Formation Control using Epipolar Constraints", IEEE Robotics and Automation Letters, DOI: 10.1109/​LRA.2024.3444690, Vol. 9, No. 12, pp. 11002-11009, September 2024.
    BibTeX TR2024-147 PDF
    • @article{Roque2024sep,
    • author = {Roque, Pedro and Miraldo, Pedro and Dimarogonas, Dimos}},
    • title = {Multi-Agent Formation Control using Epipolar Constraints},
    • journal = {IEEE Robotics and Automation Letters},
    • year = 2024,
    • volume = 9,
    • number = 12,
    • pages = {11002--11009},
    • month = sep,
    • doi = {10.1109/LRA.2024.3444690},
    • issn = {2377-3766},
    • url = {https://www.merl.com/publications/TR2024-147}
    • }
  •  Ho, C.-H., Peng, K.-C., Vasconcelos, N., "Long-Tailed Anomaly Detection with Learnable Class Names", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Farhadi, A. and Crandall, D. and Sato, I. and Wu, J. and Pless, R. and Akata, Z., Eds., DOI: 10.1109/​CVPR52733.2024.01182, June 2024, pp. 12435-12446.
    BibTeX TR2024-040 PDF Video Data Presentation
    • @inproceedings{Ho2024jun,
    • author = {Ho, Chih-Hui and Peng, Kuan-Chuan and Vasconcelos, Nuno},
    • title = {Long-Tailed Anomaly Detection with Learnable Class Names},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2024,
    • editor = {Farhadi, A. and Crandall, D. and Sato, I. and Wu, J. and Pless, R. and Akata, Z.},
    • pages = {12435--12446},
    • month = jun,
    • publisher = {IEEE},
    • doi = {10.1109/CVPR52733.2024.01182},
    • issn = {2575-7075},
    • isbn = {979-8-3503-5300-6},
    • url = {https://www.merl.com/publications/TR2024-040}
    • }
  •  Ni, H., Egger, B., Lohit, S., Cherian, A., Wang, Y., Koike-Akino, T., Huang, S.X., Marks, T.K., "TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2024, pp. 9015-9025.
    BibTeX TR2024-059 PDF Video Software Presentation
    • @inproceedings{Ni2024jun,
    • author = {Ni, Haomiao and Egger, Bernhard and Lohit, Suhas and Cherian, Anoop and Wang, Ye and Koike-Akino, Toshiaki and Huang, Sharon X. and Marks, Tim K.},
    • title = {TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2024,
    • pages = {9015--9025},
    • month = jun,
    • url = {https://www.merl.com/publications/TR2024-059}
    • }
  •  He, Y., Cherian, A., Wichern, G., Markham, A., "Deep Neural Room Acoustics Primitive", International Conference on Machine Learning (ICML), June 2024, pp. 17842-17857.
    BibTeX TR2024-072 PDF
    • @inproceedings{He2024jun,
    • author = {He, Yuhang and Cherian, Anoop and Wichern, Gordon and Markham, Andrew}},
    • title = {Deep Neural Room Acoustics Primitive},
    • booktitle = {International Conference on Machine Learning (ICML)},
    • year = 2024,
    • pages = {17842--17857},
    • month = jun,
    • url = {https://www.merl.com/publications/TR2024-072}
    • }
  •  Kambara, M., Hori, C., Sugiura, K., Ota, K., Jha, D.K., Khurana, S., Jain, S., Corcodel, R., Romeres, D., Le Roux, J., "Human Action Understanding-based Robot Planning using Multimodal LLM", IEEE International Conference on Robotics and Automation (ICRA), June 2024.
    BibTeX TR2024-066 PDF
    • @inproceedings{Kambara2024jun,
    • author = {Kambara, Motonari and Hori, Chiori and Sugiura, Komei and Ota, Kei and Jha, Devesh K. and Khurana, Sameer and Jain, Siddarth and Corcodel, Radu and Romeres, Diego and Le Roux, Jonathan}},
    • title = {Human Action Understanding-based Robot Planning using Multimodal LLM},
    • booktitle = {IEEE International Conference on Robotics and Automation (ICRA) Workshop},
    • year = 2024,
    • month = jun,
    • url = {https://www.merl.com/publications/TR2024-066}
    • }
  •  Liu, X., Tai, Y.-W., Tang, C.-K., Miraldo, P., Lohit, S., Chatterjee, M., "Gear-NeRF: Free-Viewpoint Rendering and Tracking with Motion-aware Spatio-Temporal Sampling", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2024, pp. 19667-19679.
    BibTeX TR2024-042 PDF Videos Software
    • @inproceedings{Liu2024may,
    • author = {Liu, Xinhang and Tai, Yu-wing and Tang, Chi-Keung and Miraldo, Pedro and Lohit, Suhas and Chatterjee, Moitreya},
    • title = {Gear-NeRF: Free-Viewpoint Rendering and Tracking with Motion-aware Spatio-Temporal Sampling},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2024,
    • pages = {19667--19679},
    • month = may,
    • publisher = {IEEE},
    • url = {https://www.merl.com/publications/TR2024-042}
    • }
  •  Singh, A., Jones, M.J., Learned-Miller, E., "Tracklet-based Explainable Video Anomaly Localization", IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, May 2024, pp. 3992-4001.
    BibTeX TR2024-057 PDF
    • @inproceedings{Singh2024may,
    • author = {Singh, Ashish and Jones, Michael J. and Learned-Miller, Erik}},
    • title = {Tracklet-based Explainable Video Anomaly Localization},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
    • year = 2024,
    • pages = {3992--4001},
    • month = may,
    • url = {https://www.merl.com/publications/TR2024-057}
    • }
  •  Dey, R., Egger, B., Boddeti, V., Wang, Y., Marks, T.K., "CoLa-SDF: Controllable Latent StyleSDF for Disentangled 3D Face Generation", IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), April 2024.
    BibTeX TR2024-045 PDF
    • @inproceedings{Dey2024apr,
    • author = {Dey, Rahul and Egger, Bernhard and Boddeti, Vishnu and Wang, Ye and Marks, Tim K.},
    • title = {CoLa-SDF: Controllable Latent StyleSDF for Disentangled 3D Face Generation},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)},
    • year = 2024,
    • month = apr,
    • url = {https://www.merl.com/publications/TR2024-045}
    • }
  •  Pan, Z., Wichern, G., Germain, F.G., Subramanian, A., Le Roux, J., "Late Audio-Visual Fusion for In-The-Wild Speaker Diarization", Hands-free Speech Communication and Microphone Arrays (HSCMA), DOI: 10.1109/​ICASSPW62465.2024.10626914, April 2024, pp. 174-178.
    BibTeX TR2024-029 PDF
    • @inproceedings{Pan2024apr,
    • author = {Pan, Zexu and Wichern, Gordon and Germain, François G and Subramanian, Aswin and Le Roux, Jonathan},
    • title = {Late Audio-Visual Fusion for In-The-Wild Speaker Diarization},
    • booktitle = {Hands-free Speech Communication and Microphone Arrays (HSCMA)},
    • year = 2024,
    • pages = {174--178},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSPW62465.2024.10626914},
    • isbn = {979-8-3503-7451-3},
    • url = {https://www.merl.com/publications/TR2024-029}
    • }
  •  Yang, Z., Liu, J., Chen, P., Cherian, A., Marks, T.K., Le Roux, J., Gan, C., "RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), April 2024, pp. 16251-16261.
    BibTeX TR2024-043 PDF
    • @inproceedings{Yang2024apr,
    • author = {Yang, Zeyuan and Liu, Jiageng and Chen, Peihao and Cherian, Anoop and Marks, Tim K. and Le Roux, Jonathan and Gan, Chuang},
    • title = {RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2024,
    • pages = {16251--16261},
    • month = apr,
    • publisher = {CVF},
    • url = {https://www.merl.com/publications/TR2024-043}
    • }
  •  Gaur, A., Pais, G., Miraldo, P., "Oriented-grid Encoder for 3D Implicit Representations", International Conference on 3D Vision (3DV), DOI: 10.1109/​3DV62453.2024.00101, March 2024, pp. 1208-1218.
    BibTeX TR2024-031 PDF
    • @inproceedings{Gaur2024mar,
    • author = {Gaur, Arihant and Pais, Goncalo and Miraldo, Pedro},
    • title = {Oriented-grid Encoder for 3D Implicit Representations},
    • booktitle = {International Conference on 3D Vision (3DV)},
    • year = 2024,
    • pages = {1208--1218},
    • month = mar,
    • publisher = {IEEE},
    • doi = {10.1109/3DV62453.2024.00101},
    • issn = {2475-7888},
    • isbn = {979-8-3503-6245-9},
    • url = {https://www.merl.com/publications/TR2024-031}
    • }
  •  Sholokhov, A., Rapp, J., Nabi, S., Brunton, S., Kutz, N., Mansour, H., "Single-pixel imaging of dynamic flows using Neural ODE regularization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10447584, March 2024, pp. 2530-2534.
    BibTeX TR2024-024 PDF
    • @inproceedings{Sholokhov2024mar,
    • author = {Sholokhov, Aleksei and Rapp, Joshua and Nabi, Saleh and Brunton, Steven and Kutz, Nathan and Mansour, Hassan},
    • title = {Single-pixel imaging of dynamic flows using Neural ODE regularization},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {2530--2534},
    • month = mar,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP48485.2024.10447584},
    • url = {https://www.merl.com/publications/TR2024-024}
    • }
  •  Hori, C., Wang, P., Rahman, M., Vaca-Rubio, C., Khurana, S., Cherian, A., Le Roux, J., "Wi-Fi based Indoor Monitoring Enhanced by Multimodal Fusion", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10447600, March 2024, pp. 13296-13300.
    BibTeX TR2024-012 PDF
    • @inproceedings{Hori2024mar,
    • author = {Hori, Chiori and Wang, Pu and Rahman, Mahbub and Vaca-Rubio, Cristian and Khurana, Sameer and Cherian, Anoop and Le Roux, Jonathan},
    • title = {Wi-Fi based Indoor Monitoring Enhanced by Multimodal Fusion},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {13296--13300},
    • month = mar,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP48485.2024.10447600},
    • issn = {2379-190X},
    • isbn = {979-8-3503-4485-1},
    • url = {https://www.merl.com/publications/TR2024-012}
    • }
  •  Srinivas, N., Vinod, A.P., Di Cairano, S., Weiss, A., "Lunar Landing with Feasible Divert using Controllable Sets", AIAA SciTech, DOI: 10.2514/​6.2024-0324, January 2024, pp. AIAA 2024-0324.
    BibTeX TR2024-004 PDF
    • @inproceedings{Srinivas2024jan,
    • author = {Srinivas, Neeraj and Vinod, Abraham P. and Di Cairano, Stefano and Weiss, Avishai},
    • title = {Lunar Landing with Feasible Divert using Controllable Sets},
    • booktitle = {AIAA SCITECH 2024 Forum},
    • year = 2024,
    • pages = {AIAA 2024--0324},
    • month = jan,
    • doi = {10.2514/6.2024-0324},
    • url = {https://www.merl.com/publications/TR2024-004}
    • }
  •  Carmichael, Z., Jones, L.S., Cherian, A., Michael J., , Scheirer, W., "Pixel-Grounded Prototypical Part Networks", IEEE Winter Conference on Applications of Computer Vision (WACV), DOI: 10.1109/​WACV57701.2024.00470, January 2024.
    BibTeX TR2024-002 PDF Video Software Presentation
    • @inproceedings{Carmichael2024jan,
    • author = {Carmichael, Zachariah and Jones, Lohit, Suhas and Cherian, Anoop and Michael J. and Scheirer, Walter},
    • title = {Pixel-Grounded Prototypical Part Networks},
    • booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
    • year = 2024,
    • month = jan,
    • doi = {10.1109/WACV57701.2024.00470},
    • url = {https://www.merl.com/publications/TR2024-002}
    • }
  •  Liu, X., Paul, S., Chatterjee, M., Cherian, A., "CAVEN: An Embodied Conversational Agent for Efficient Audio-Visual Navigation in Noisy Environments", AAAI Conference on Artificial Intelligence, DOI: 10.1609/​aaai.v38i4.28167, December 2023, pp. 3765-3773.
    BibTeX TR2023-154 PDF
    • @inproceedings{Liu2023dec2,
    • author = {Liu, Xiulong and Paul, Sudipta and Chatterjee, Moitreya and Cherian, Anoop},
    • title = {CAVEN: An Embodied Conversational Agent for Efficient Audio-Visual Navigation in Noisy Environments},
    • booktitle = {Proceedings of the 38th AAAI Conference on Artificial Intelligence},
    • year = 2023,
    • pages = {3765--3773},
    • month = dec,
    • doi = {10.1609/aaai.v38i4.28167},
    • url = {https://www.merl.com/publications/TR2023-154}
    • }
  •  Pan, Z., Wichern, G., Masuyama, Y., Germain, F.G., Khurana, S., Hori, C., Le Roux, J., "Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/​ASRU57964.2023.10389618, December 2023.
    BibTeX TR2023-152 PDF Video
    • @inproceedings{Pan2023dec2,
    • author = {Pan, Zexu and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and Khurana, Sameer and Hori, Chiori and Le Roux, Jonathan},
    • title = {Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2023,
    • month = dec,
    • doi = {10.1109/ASRU57964.2023.10389618},
    • isbn = {979-8-3503-0689-7},
    • url = {https://www.merl.com/publications/TR2023-152}
    • }