Publications

602 / 3,704 publications found.


  •  Hegde, D., Lohit, S., Peng, K.-C., Jones, M.J., Patel, V.M., "Equivariant Spatio-Temporal Self-Supervision for LiDAR Object Detection", European Conference on Computer Vision (ECCV), September 2024.
    BibTeX TR2024-130 PDF
    • @inproceedings{Hegde2024sep,
    • author = {Hegde, Deepti and Lohit, Suhas and Peng, Kuan-Chuan and Jones, Michael J. and Patel, Vishal M.}},
    • title = {Equivariant Spatio-Temporal Self-Supervision for LiDAR Object Detection},
    • booktitle = {European Conference on Computer Vision (ECCV)},
    • year = 2024,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2024-130}
    • }
  •  Pais, G., Piedade, V., Chatterjee, M., Greiff, M., Miraldo, P., "A Probability-guided Sampler for Neural Implicit Surface Rendering", European Conference on Computer Vision (ECCV), September 2024.
    BibTeX TR2024-129 PDF
    • @inproceedings{Pais2024sep,
    • author = {Pais, Goncalo and Piedade, Valter and Chatterjee, Moitreya and Greiff, Marcus and Miraldo, Pedro}},
    • title = {A Probability-guided Sampler for Neural Implicit Surface Rendering},
    • booktitle = {European Conference on Computer Vision (ECCV)},
    • year = 2024,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2024-129}
    • }
  •  Cherian, A., Jain, S., Marks, T.K., "Few-shot Transparent Instance Segmentation for Bin Picking", IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), September 2024.
    BibTeX TR2024-127 PDF
    • @inproceedings{Cherian2024sep,
    • author = {Cherian, Anoop and Jain, Siddarth and Marks, Tim K.}},
    • title = {Few-shot Transparent Instance Segmentation for Bin Picking},
    • booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
    • year = 2024,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2024-127}
    • }
  •  Yin, J., Luo, A., Du, Y., Cherian, A., Marks, T.K., Le Roux, J., Gan, C., "Disentangled Acoustic Fields For Multimodal Physical Scene Understanding", IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), September 2024.
    BibTeX TR2024-125 PDF
    • @inproceedings{Yin2024sep,
    • author = {Yin, Jie and Luo, Andrew and Du, Yilun and Cherian, Anoop and Marks, Tim K. and Le Roux, Jonathan and Gan, Chuang}},
    • title = {Disentangled Acoustic Fields For Multimodal Physical Scene Understanding},
    • booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
    • year = 2024,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2024-125}
    • }
  •  Ho, C.-H., Peng, K.-C., Vasconcelos, N., "Long-Tailed Anomaly Detection with Learnable Class Names", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2024.
    BibTeX TR2024-040 PDF Video Data Presentation
    • @inproceedings{Ho2024jun,
    • author = {Ho, Chih-Hui and Peng, Kuan-Chuan and Vasconcelos, Nuno},
    • title = {Long-Tailed Anomaly Detection with Learnable Class Names},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2024,
    • month = jun,
    • url = {https://www.merl.com/publications/TR2024-040}
    • }
  •  Ni, H., Egger, B., Lohit, S., Cherian, A., Wang, Y., Koike-Akino, T., Huang, S.X., Marks, T.K., "TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2024.
    BibTeX TR2024-059 PDF Video Software Presentation
    • @inproceedings{Ni2024jun,
    • author = {Ni, Haomiao and Egger, Bernhard and Lohit, Suhas and Cherian, Anoop and Wang, Ye and Koike-Akino, Toshiaki and Huang, Sharon X. and Marks, Tim K.},
    • title = {TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2024,
    • month = jun,
    • url = {https://www.merl.com/publications/TR2024-059}
    • }
  •  He, Y., Cherian, A., Wichern, G., Markham, A., "Deep Neural Room Acoustics Primitive", International Conference on Machine Learning (ICML), June 2024.
    BibTeX TR2024-072 PDF
    • @inproceedings{He2024jun,
    • author = {He, Yuhang and Cherian, Anoop and Wichern, Gordon and Markham, Andrew}},
    • title = {Deep Neural Room Acoustics Primitive},
    • booktitle = {International Conference on Machine Learning (ICML)},
    • year = 2024,
    • month = jun,
    • url = {https://www.merl.com/publications/TR2024-072}
    • }
  •  Kambara, M., Hori, C., Sugiura, K., Ota, K., Jha, D.K., Khurana, S., Jain, S., Corcodel, R., Romeres, D., Le Roux, J., "Human Action Understanding-based Robot Planning using Multimodal LLM", IEEE International Conference on Robotics and Automation (ICRA), June 2024.
    BibTeX TR2024-066 PDF
    • @inproceedings{Kambara2024jun,
    • author = {Kambara, Motonari and Hori, Chiori and Sugiura, Komei and Ota, Kei and Jha, Devesh K. and Khurana, Sameer and Jain, Siddarth and Corcodel, Radu and Romeres, Diego and Le Roux, Jonathan}},
    • title = {Human Action Understanding-based Robot Planning using Multimodal LLM},
    • booktitle = {IEEE International Conference on Robotics and Automation (ICRA) Workshop},
    • year = 2024,
    • month = jun,
    • url = {https://www.merl.com/publications/TR2024-066}
    • }
  •  Liu, X., Tai, Y.-W., Tang, C.-K., Miraldo, P., Lohit, S., Chatterjee, M., "Gear-NeRF: Free-Viewpoint Rendering and Tracking with Motion-aware Spatio-Temporal Sampling", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2024, pp. 19667-19679.
    BibTeX TR2024-042 PDF Videos Software
    • @inproceedings{Liu2024may,
    • author = {Liu, Xinhang and Tai, Yu-wing and Tang, Chi-Keung and Miraldo, Pedro and Lohit, Suhas and Chatterjee, Moitreya},
    • title = {Gear-NeRF: Free-Viewpoint Rendering and Tracking with Motion-aware Spatio-Temporal Sampling},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2024,
    • pages = {19667--19679},
    • month = may,
    • publisher = {IEEE},
    • url = {https://www.merl.com/publications/TR2024-042}
    • }
  •  Singh, A., Jones, M.J., Learned-Miller, E., "Tracklet-based Explainable Video Anomaly Localization", IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, May 2024.
    BibTeX TR2024-057 PDF
    • @inproceedings{Singh2024may,
    • author = {Singh, Ashish and Jones, Michael J. and Learned-Miller, Erik}},
    • title = {Tracklet-based Explainable Video Anomaly Localization},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
    • year = 2024,
    • month = may,
    • url = {https://www.merl.com/publications/TR2024-057}
    • }
  •  Dey, R., Egger, B., Boddeti, V., Wang, Y., Marks, T.K., "CoLa-SDF: Controllable Latent StyleSDF for Disentangled 3D Face Generation", IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), April 2024.
    BibTeX TR2024-045 PDF
    • @inproceedings{Dey2024apr,
    • author = {Dey, Rahul and Egger, Bernhard and Boddeti, Vishnu and Wang, Ye and Marks, Tim K.},
    • title = {CoLa-SDF: Controllable Latent StyleSDF for Disentangled 3D Face Generation},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)},
    • year = 2024,
    • month = apr,
    • url = {https://www.merl.com/publications/TR2024-045}
    • }
  •  Pan, Z., Wichern, G., Germain, F.G., Subramanian, A., Le Roux, J., "Late Audio-Visual Fusion for In-The-Wild Speaker Diarization", Hands-free Speech Communication and Microphone Arrays (HSCMA), April 2024.
    BibTeX TR2024-029 PDF
    • @inproceedings{Pan2024apr,
    • author = {Pan, Zexu and Wichern, Gordon and Germain, François G and Subramanian, Aswin and Le Roux, Jonathan},
    • title = {Late Audio-Visual Fusion for In-The-Wild Speaker Diarization},
    • booktitle = {Hands-free Speech Communication and Microphone Arrays (HSCMA)},
    • year = 2024,
    • month = apr,
    • url = {https://www.merl.com/publications/TR2024-029}
    • }
  •  Yang, Z., Liu, J., Chen, P., Cherian, A., Marks, T.K., Le Roux, J., Gan, C., "RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), April 2024, pp. 16251-16261.
    BibTeX TR2024-043 PDF
    • @inproceedings{Yang2024apr,
    • author = {Yang, Zeyuan and Liu, Jiageng and Chen, Peihao and Cherian, Anoop and Marks, Tim K. and Le Roux, Jonathan and Gan, Chuang},
    • title = {RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2024,
    • pages = {16251--16261},
    • month = apr,
    • publisher = {CVF},
    • url = {https://www.merl.com/publications/TR2024-043}
    • }
  •  Gaur, A., Pais, G., Miraldo, P., "Oriented-grid Encoder for 3D Implicit Representations", International Conference on 3D Vision (3DV), DOI: 10.1109/​3DV62453.2024.00101, March 2024, pp. 1208-1218.
    BibTeX TR2024-031 PDF
    • @inproceedings{Gaur2024mar,
    • author = {Gaur, Arihant and Pais, Goncalo and Miraldo, Pedro},
    • title = {Oriented-grid Encoder for 3D Implicit Representations},
    • booktitle = {International Conference on 3D Vision (3DV)},
    • year = 2024,
    • pages = {1208--1218},
    • month = mar,
    • publisher = {IEEE},
    • doi = {10.1109/3DV62453.2024.00101},
    • issn = {2475-7888},
    • isbn = {979-8-3503-6245-9},
    • url = {https://www.merl.com/publications/TR2024-031}
    • }
  •  Sholokhov, A., Rapp, J., Nabi, S., Brunton, S., Kutz, N., Mansour, H., "Single-pixel imaging of dynamic flows using Neural ODE regularization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10447584, March 2024, pp. 2530-2534.
    BibTeX TR2024-024 PDF
    • @inproceedings{Sholokhov2024mar,
    • author = {Sholokhov, Aleksei and Rapp, Joshua and Nabi, Saleh and Brunton, Steven and Kutz, Nathan and Mansour, Hassan},
    • title = {Single-pixel imaging of dynamic flows using Neural ODE regularization},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {2530--2534},
    • month = mar,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP48485.2024.10447584},
    • url = {https://www.merl.com/publications/TR2024-024}
    • }
  •  Hori, C., Wang, P., Rahman, M., Vaca-Rubio, C., Khurana, S., Cherian, A., Le Roux, J., "Wi-Fi based Indoor Monitoring Enhanced by Multimodal Fusion", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10447600, March 2024, pp. 13296-13300.
    BibTeX TR2024-012 PDF
    • @inproceedings{Hori2024mar,
    • author = {Hori, Chiori and Wang, Pu and Rahman, Mahbub and Vaca-Rubio, Cristian and Khurana, Sameer and Cherian, Anoop and Le Roux, Jonathan},
    • title = {Wi-Fi based Indoor Monitoring Enhanced by Multimodal Fusion},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {13296--13300},
    • month = mar,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP48485.2024.10447600},
    • issn = {2379-190X},
    • isbn = {979-8-3503-4485-1},
    • url = {https://www.merl.com/publications/TR2024-012}
    • }
  •  Srinivas, N., Vinod, A.P., Di Cairano, S., Weiss, A., "Lunar Landing with Feasible Divert using Controllable Sets", AIAA SciTech, DOI: 10.2514/​6.2024-0324, January 2024, pp. AIAA 2024-0324.
    BibTeX TR2024-004 PDF
    • @inproceedings{Srinivas2024jan,
    • author = {Srinivas, Neeraj and Vinod, Abraham P. and Di Cairano, Stefano and Weiss, Avishai},
    • title = {Lunar Landing with Feasible Divert using Controllable Sets},
    • booktitle = {AIAA SCITECH 2024 Forum},
    • year = 2024,
    • pages = {AIAA 2024--0324},
    • month = jan,
    • doi = {10.2514/6.2024-0324},
    • url = {https://www.merl.com/publications/TR2024-004}
    • }
  •  Carmichael, Z., Jones, L.S., Cherian, A., Michael J., , Scheirer, W., "Pixel-Grounded Prototypical Part Networks", IEEE Winter Conference on Applications of Computer Vision (WACV), DOI: 10.1109/​WACV57701.2024.00470, January 2024.
    BibTeX TR2024-002 PDF Video Software Presentation
    • @inproceedings{Carmichael2024jan,
    • author = {Carmichael, Zachariah and Jones, Lohit, Suhas and Cherian, Anoop and Michael J. and Scheirer, Walter},
    • title = {Pixel-Grounded Prototypical Part Networks},
    • booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
    • year = 2024,
    • month = jan,
    • doi = {10.1109/WACV57701.2024.00470},
    • url = {https://www.merl.com/publications/TR2024-002}
    • }
  •  Liu, X., Paul, S., Chatterjee, M., Cherian, A., "CAVEN: An Embodied Conversational Agent for Efficient Audio-Visual Navigation in Noisy Environments", AAAI Conference on Artificial Intelligence, DOI: 10.1609/​aaai.v38i4.28167, December 2023, pp. 3765-3773.
    BibTeX TR2023-154 PDF
    • @inproceedings{Liu2023dec2,
    • author = {Liu, Xiulong and Paul, Sudipta and Chatterjee, Moitreya and Cherian, Anoop},
    • title = {CAVEN: An Embodied Conversational Agent for Efficient Audio-Visual Navigation in Noisy Environments},
    • booktitle = {Proceedings of the 38th AAAI Conference on Artificial Intelligence},
    • year = 2023,
    • pages = {3765--3773},
    • month = dec,
    • doi = {10.1609/aaai.v38i4.28167},
    • url = {https://www.merl.com/publications/TR2023-154}
    • }
  •  Pan, Z., Wichern, G., Masuyama, Y., Germain, F.G., Khurana, S., Hori, C., Le Roux, J., "Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/​ASRU57964.2023.10389618, December 2023.
    BibTeX TR2023-152 PDF Video
    • @inproceedings{Pan2023dec2,
    • author = {Pan, Zexu and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and Khurana, Sameer and Hori, Chiori and Le Roux, Jonathan},
    • title = {Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2023,
    • month = dec,
    • doi = {10.1109/ASRU57964.2023.10389618},
    • isbn = {979-8-3503-0689-7},
    • url = {https://www.merl.com/publications/TR2023-152}
    • }
  •  He, Y., Shin, S., Cherian, A., Markham, A., Trigon, N., "Sound3DVDet: 3D Sound Source Detection using Multiview Microphone Array and RGB Images", IEEE Winter Conference on Applications of Computer Vision (WACV), December 2023, pp. 5496-5507.
    BibTeX TR2023-144 PDF
    • @inproceedings{He2023dec,
    • author = {He, Yuhang and Shin, Sangyun and Cherian, Anoop and Markham, Andrew and Trigon, Niki},
    • title = {Sound3DVDet: 3D Sound Source Detection using Multiview Microphone Array and RGB Images},
    • booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
    • year = 2023,
    • pages = {5496--5507},
    • month = dec,
    • url = {https://www.merl.com/publications/TR2023-144}
    • }
  •  Delattre, F., Dirnfeld, D., Nguyen, P., Scarano, S., Jones, M.J., Miraldo, P., Learned-Miller, E., "Robust Frame-to-Frame Camera Rotation Estimation in Crowded Scenes", IEEE International Conference on Computer Vision (ICCV), DOI: 10.1109/​ICCV51070.2023.00894, October 2023, pp. 3715-3724.
    BibTeX TR2023-123 PDF Video Software
    • @inproceedings{Delattre2023oct,
    • author = {Delattre, Fabien and Dirnfeld, David and Nguyen, Phat and Scarano, Stephen and Jones, Michael J. and Miraldo, Pedro and Learned-Miller, Erik},
    • title = {Robust Frame-to-Frame Camera Rotation Estimation in Crowded Scenes},
    • booktitle = {IEEE International Conference on Computer Vision (ICCV)},
    • year = 2023,
    • pages = {3715--3724},
    • month = oct,
    • publisher = {IEEE/CVF},
    • doi = {10.1109/ICCV51070.2023.00894},
    • issn = {2380-7504},
    • isbn = {979-8-3503-0718-4},
    • url = {https://www.merl.com/publications/TR2023-123}
    • }
  •  Miraldo, P., Piedade, V., "BANSAC: A dynamic BAyesian Network for adaptive SAmple Consensus", IEEE International Conference on Computer Vision (ICCV), DOI: 10.1109/​ICCV51070.2023.00346, October 2023, pp. 3715-3724.
    BibTeX TR2023-124 PDF Video Software
    • @inproceedings{Miraldo2023oct,
    • author = {Miraldo, Pedro and Piedade, Valter},
    • title = {BANSAC: A dynamic BAyesian Network for adaptive SAmple Consensus},
    • booktitle = {IEEE International Conference on Computer Vision (ICCV)},
    • year = 2023,
    • pages = {3715--3724},
    • month = oct,
    • publisher = {IEEE/CVF},
    • doi = {10.1109/ICCV51070.2023.00346},
    • issn = {2380-7504},
    • isbn = {979-8-3503-0718-4},
    • url = {https://www.merl.com/publications/TR2023-124}
    • }
  •  Nair, N.G., Cherian, A., Lohit, S., Wang, Y., Koike-Akino, T., Patel, V.M., Marks, T.K., "Steered Diffusion: A Generalized Framework for Plug-and-Play Conditional Image Synthesis", IEEE International Conference on Computer Vision (ICCV), October 2023, pp. 20850-20860.
    BibTeX TR2023-126 PDF Software Presentation
    • @inproceedings{Nair2023sep,
    • author = {Nair, Nithin Gopalakrishnan and Cherian, Anoop and Lohit, Suhas and Wang, Ye and Koike-Akino, Toshiaki and Patel, Vishal M. and Marks, Tim K.},
    • title = {Steered Diffusion: A Generalized Framework for Plug-and-Play Conditional Image Synthesis},
    • booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision},
    • year = 2023,
    • pages = {20850--20860},
    • month = oct,
    • publisher = {IEEE/CVF},
    • url = {https://www.merl.com/publications/TR2023-126}
    • }
  •  Sharma, M., Chatterjee, M., Peng, K.-C., Lohit, S., Jones, M.J., "Tensor Factorization for Leveraging Cross-Modal Knowledge in Data-Constrained Infrared Object Detection", IEEE International Conference on Computer Vision Workshops (ICCV), October 2023, pp. 924-932.
    BibTeX TR2023-125 PDF Presentation
    • @inproceedings{Sharma2023oct,
    • author = {Sharma, Manish and Chatterjee, Moitreya and Peng, Kuan-Chuan and Lohit, Suhas and Jones, Michael J.},
    • title = {Tensor Factorization for Leveraging Cross-Modal Knowledge in Data-Constrained Infrared Object Detection},
    • booktitle = {IEEE International Conference on Computer Vision Workshops (ICCV)},
    • year = 2023,
    • pages = {924--932},
    • month = oct,
    • url = {https://www.merl.com/publications/TR2023-125}
    • }