Publications

Li, D., Zhang, J., Egger, B., Chatterjee, M., Lohit, S., Marks, T.K., Cherian, A., "AssemblyBench: Physics-Aware Assembly of Complex Industrial Objects", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2026.
BibTeX TR2026-076 PDF Video Data Software
- @inproceedings{Li2026jun,
- author = {Li, Danrui and Zhang, Jiahao and Egger, Bernhard and Chatterjee, Moitreya and Lohit, Suhas and Marks, Tim K. and Cherian, Anoop},
- title = {{AssemblyBench: Physics-Aware Assembly of Complex Industrial Objects}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2026,
- month = jun,
- url = {https://www.merl.com/publications/TR2026-076}
- }
Shenoy, V., Lohit, S., Mansour, H., Chellappa, R., Marks, T.K., "Recovering Pulse Waves from Video Using Deep Unrolling and Deep Equilibrium Models", IEEE Transactions on Image Processing, DOI: 10.1109/TIP.2026.3671653, Vol. 35, pp. 2755-2770, March 2026.
BibTeX TR2026-031 PDF
- @article{Shenoy2026mar,
- author = {Shenoy, Vineet and Lohit, Suhas and Mansour, Hassan and Chellappa, Rama and Marks, Tim K.},
- title = {{Recovering Pulse Waves from Video Using Deep Unrolling and Deep Equilibrium Models}},
- journal = {IEEE Transactions on Image Processing},
- year = 2026,
- volume = 35,
- pages = {2755--2770},
- month = mar,
- doi = {10.1109/TIP.2026.3671653},
- issn = {1941-0042},
- url = {https://www.merl.com/publications/TR2026-031}
- }
Shenoy, V., Wu, S., Comas, A., Lohit, S., Mansour, H., Marks, T.K., "Time-Series U-Net with Recurrence for Noise-Robust Imaging Photoplethysmography", IEEE Access, DOI: 10.1109/ACCESS.2025.3617284, Vol. 13, pp. 173923-173938, October 2025.
BibTeX TR2025-145 PDF
- @article{Shenoy2025oct,
- author = {Shenoy, Vineet and Wu, Shaoju and Comas, Armand and Lohit, Suhas and Mansour, Hassan and Marks, Tim K.},
- title = {{Time-Series U-Net with Recurrence for Noise-Robust Imaging Photoplethysmography}},
- journal = {IEEE Access},
- year = 2025,
- volume = 13,
- pages = {173923--173938},
- month = oct,
- doi = {10.1109/ACCESS.2025.3617284},
- url = {https://www.merl.com/publications/TR2025-145}
- }
Hu, Y., Lohit, S., Kamilov, U., Marks, T.K., "Multimodal Diffusion Bridge with Attention-Based SAR Fusion for Satellite Image Cloud Removal", IEEE Transactions on Geoscience and Remote Sensing, DOI: 10.1109/TGRS.2025.3604654, Vol. 63, September 2025.
BibTeX TR2025-138 PDF
- @article{Hu2025sep2,
- author = {Hu, Yuyang and Lohit, Suhas and Kamilov, Ulugbek and Marks, Tim K.},
- title = {{Multimodal Diffusion Bridge with Attention-Based SAR Fusion for Satellite Image Cloud Removal}},
- journal = {IEEE Transactions on Geoscience and Remote Sensing},
- year = 2025,
- volume = 63,
- month = sep,
- doi = {10.1109/TGRS.2025.3604654},
- issn = {1558-0644},
- url = {https://www.merl.com/publications/TR2025-138}
- }
Sawada, N., Miraldo, P., Lohit, S., Marks, T.K., Chatterjee, M., "FreBIS: Frequency-Based Stratification for Neural Implicit Surface Representations", IEEE Conference on Computer Vision and Pattern Recognition Workshop (CVPR), DOI: 10.1109/CVPRW67362.2025.00041, June 2025, pp. 369-379.
BibTeX TR2025-074 PDF
- @inproceedings{Sawada2025jun,
- author = {Sawada, Naoko and Miraldo, Pedro and Lohit, Suhas and Marks, Tim K. and Chatterjee, Moitreya},
- title = {{FreBIS: Frequency-Based Stratification for Neural Implicit Surface Representations}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshop (CVPR)},
- year = 2025,
- pages = {369--379},
- month = jun,
- doi = {10.1109/CVPRW67362.2025.00041},
- url = {https://www.merl.com/publications/TR2025-074}
- }
Shenoy, V., Wu, S., Comas, A., Marks, T.K., Lohit, S., Mansour, H., "Time-Series U-Net with Recurrence for Noise-Robust Imaging Photoplethysmography", arXiv, March 2025.
BibTeX arXiv
- @article{Shenoy2025mar2,
- author = {Shenoy, Vineet and Wu, Shaoju and Comas, Armand and Marks, Tim K. and Lohit, Suhas and Mansour, Hassan},
- title = {{Time-Series U-Net with Recurrence for Noise-Robust Imaging Photoplethysmography}},
- journal = {arXiv},
- year = 2025,
- month = mar,
- url = {https://arxiv.org/abs/2503.17351}
- }
Lohit, S., Marks, T.K., "Rotation-Equivariant Neural Networks for Cloud Removal from Satellite Images", Asilomar Conference on Signals, Systems, and Computers (ACSSC), DOI: 10.1109/IEEECONF60004.2024.10942613, January 2025, pp. 1360-1365.
BibTeX TR2025-009 PDF
- @inproceedings{Lohit2025jan,
- author = {Lohit, Suhas and Marks, Tim K.},
- title = {{Rotation-Equivariant Neural Networks for Cloud Removal from Satellite Images}},
- booktitle = {2024 58th Asilomar Conference on Signals, Systems, and Computers (ACSSC)},
- year = 2025,
- pages = {1360--1365},
- month = jan,
- publisher = {IEEE},
- doi = {10.1109/IEEECONF60004.2024.10942613},
- issn = {2576-2303},
- isbn = {979-8-3503-5405-8},
- url = {https://www.merl.com/publications/TR2025-009}
- }
Cherian, A., Jain, S., Marks, T.K., "Few-shot Transparent Instance Segmentation for Bin Picking", IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), September 2024, pp. 5009-5016.
BibTeX TR2024-127 PDF Video
- @inproceedings{Cherian2024sep,
- author = {Cherian, Anoop and Jain, Siddarth and Marks, Tim K.},
- title = {{Few-shot Transparent Instance Segmentation for Bin Picking}},
- booktitle = {2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
- year = 2024,
- pages = {5009--5016},
- month = sep,
- publisher = {IEEE},
- url = {https://www.merl.com/publications/TR2024-127}
- }
Yin, J., Luo, A., Du, Y., Cherian, A., Marks, T.K., Le Roux, J., Gan, C., "Disentangled Acoustic Fields For Multimodal Physical Scene Understanding", IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), September 2024, pp. 557-564.
BibTeX TR2024-125 PDF
- @inproceedings{Yin2024sep,
- author = {Yin, Jie and Luo, Andrew and Du, Yilun and Cherian, Anoop and Marks, Tim K. and {Le Roux}, Jonathan and Gan, Chuang},
- title = {{Disentangled Acoustic Fields For Multimodal Physical Scene Understanding}},
- booktitle = {2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
- year = 2024,
- pages = {557--564},
- month = sep,
- publisher = {IEEE},
- url = {https://www.merl.com/publications/TR2024-125}
- }
Ni, H., Egger, B., Lohit, S., Cherian, A., Wang, Y., Koike-Akino, T., Huang, S.X., Marks, T.K., "TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2024, pp. 9015-9025.
BibTeX TR2024-059 PDF Video Software Presentation
- @inproceedings{Ni2024jun,
- author = {Ni, Haomiao and Egger, Bernhard and Lohit, Suhas and Cherian, Anoop and Wang, Ye and Koike-Akino, Toshiaki and Huang, Sharon X. and Marks, Tim K.},
- title = {{TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2024,
- pages = {9015--9025},
- month = jun,
- url = {https://www.merl.com/publications/TR2024-059}
- }
Dey, R., Egger, B., Boddeti, V., Wang, Y., Marks, T.K., "CoLa-SDF: Controllable Latent StyleSDF for Disentangled 3D Face Generation", IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), DOI: 10.1109/CVPRW63382.2024.00291, June 2024, pp. 2852-2861.
BibTeX TR2024-045 PDF
- @inproceedings{Dey2024apr,
- author = {Dey, Rahul and Egger, Bernhard and Boddeti, Vishnu and Wang, Ye and Marks, Tim K.},
- title = {{CoLa-SDF: Controllable Latent StyleSDF for Disentangled 3D Face Generation}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)},
- year = 2024,
- pages = {2852--2861},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/CVPRW63382.2024.00291},
- isbn = {979-8-3503-6547-4},
- url = {https://www.merl.com/publications/TR2024-045}
- }
Yang, Z., Liu, J., Chen, P., Cherian, A., Marks, T.K., Le Roux, J., Gan, C., "RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), April 2024, pp. 16251-16261.
BibTeX TR2024-043 PDF
- @inproceedings{Yang2024apr,
- author = {Yang, Zeyuan and Liu, Jiageng and Chen, Peihao and Cherian, Anoop and Marks, Tim K. and {Le Roux}, Jonathan and Gan, Chuang},
- title = {{RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2024,
- pages = {16251--16261},
- month = apr,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2024-043}
- }
Nair, N.G., Cherian, A., Lohit, S., Wang, Y., Koike-Akino, T., Patel, V.M., Marks, T.K., "Steered Diffusion: A Generalized Framework for Plug-and-Play Conditional Image Synthesis", IEEE International Conference on Computer Vision (ICCV), October 2023, pp. 20850-20860.
BibTeX TR2023-126 PDF Software Presentation
- @inproceedings{Nair2023sep,
- author = {Nair, Nithin Gopalakrishnan and Cherian, Anoop and Lohit, Suhas and Wang, Ye and Koike-Akino, Toshiaki and Patel, Vishal M. and Marks, Tim K.},
- title = {{Steered Diffusion: A Generalized Framework for Plug-and-Play Conditional Image Synthesis}},
- booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision},
- year = 2023,
- pages = {20850--20860},
- month = oct,
- publisher = {IEEE/CVF},
- url = {https://www.merl.com/publications/TR2023-126}
- }
Shenoy, V., Marks, T.K., Mansour, H., Lohit, S., "Unrolled IPPG: Video Heart Rate Esitmation via Unrolling Proximal Gradient Descent", IEEE International Conference on Image Processing (ICIP), DOI: 10.1109/ICIP49359.2023.10222169, September 2023, pp. 2715-2719.
BibTeX TR2023-116 PDF Video
- @inproceedings{Shenoy2023sep,
- author = {Shenoy, Vineet and Marks, Tim K. and Mansour, Hassan and Lohit, Suhas},
- title = {{Unrolled IPPG: Video Heart Rate Esitmation via Unrolling Proximal Gradient Descent}},
- booktitle = {IEEE International Conference on Image Processing (ICIP)},
- year = 2023,
- pages = {2715--2719},
- month = sep,
- publisher = {IEEE},
- doi = {10.1109/ICIP49359.2023.10222169},
- isbn = {978-1-7281-9835-4},
- url = {https://www.merl.com/publications/TR2023-116}
- }
Cherian, A., Jain, S., Marks, T.K., Sullivan, A., "Discriminative 3D Shape Modeling for Few-Shot Instance Segmentation", IEEE International Conference on Robotics and Automation (ICRA), DOI: 10.1109/ICRA48891.2023.10160644, May 2023, pp. 9296-9302.
BibTeX TR2023-010 PDF Presentation
- @inproceedings{Cherian2023may,
- author = {Cherian, Anoop and Jain, Siddarth and Marks, Tim K. and Sullivan, Alan},
- title = {{Discriminative 3D Shape Modeling for Few-Shot Instance Segmentation}},
- booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
- year = 2023,
- pages = {9296--9302},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICRA48891.2023.10160644},
- url = {https://www.merl.com/publications/TR2023-010}
- }
Ota, K., Tung, H.-Y., Smith, K., Cherian, A., Marks, T.K., Sullivan, A., Kanezaki, A., Tenenbaum, J.B., "H-SAUR: Hypothesize, Simulate, Act, Update, and Repeat for Understanding Object Articulations from Interactions", IEEE International Conference on Robotics and Automation (ICRA), DOI: 10.1109/ICRA48891.2023.10160575, May 2023, pp. 7272-7278.
BibTeX TR2023-009 PDF
- @inproceedings{Ota2023may,
- author = {Ota, Kei and Tung, Hsiao-Yu and Smith, Kevin and Cherian, Anoop and Marks, Tim K. and Sullivan, Alan and Kanezaki, Asako and Tenenbaum, Joshua B.},
- title = {{H-SAUR: Hypothesize, Simulate, Act, Update, and Repeat for Understanding Object Articulations from Interactions}},
- booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
- year = 2023,
- pages = {7272--7278},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICRA48891.2023.10160575},
- url = {https://www.merl.com/publications/TR2023-009}
- }
Shah, A.P., Geng, S., Gao, P., Cherian, A., Hori, T., Marks, T.K., Le Roux, J., Hori, C., "Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), April 2022, pp. 7732-7736.
BibTeX TR2022-019 PDF
- @inproceedings{Shah2022apr,
- author = {Shah, Ankit Parag and Geng, Shijie and Gao, Peng and Cherian, Anoop and Hori, Takaaki and Marks, Tim K. and {Le Roux}, Jonathan and Hori, Chiori},
- title = {{Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {7732--7736},
- month = apr,
- publisher = {IEEE},
- issn = {1520-6149},
- isbn = {978-1-6654-0540-9},
- url = {https://www.merl.com/publications/TR2022-019}
- }
Hori, C., Shah, A.P., Geng, S., Gao, P., Cherian, A., Hori, T., Le Roux, J., Marks, T.K., "Overview of Audio Visual Scene-Aware Dialog with Reasoning Track for Natural Language Generation in DSTC10", The 10th Dialog System Technology Challenge Workshop at AAAI, February 2022.
BibTeX TR2022-016 PDF
- @inproceedings{Hori2022feb,
- author = {Hori, Chiori and Shah, Ankit Parag and Geng, Shijie and Gao, Peng and Cherian, Anoop and Hori, Takaaki and {Le Roux}, Jonathan and Marks, Tim K.},
- title = {{Overview of Audio Visual Scene-Aware Dialog with Reasoning Track for Natural Language Generation in DSTC10}},
- booktitle = {The 10th Dialog System Technology Challenge Workshop at AAAI},
- year = 2022,
- month = feb,
- url = {https://www.merl.com/publications/TR2022-016}
- }
Cherian, A., Hori, C., Marks, T.K., Le Roux, J., "(2.5+1)D Spatio-Temporal Scene Graphs for Video Question Answering", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v36i1.19922, February 2022, pp. 444-453.
BibTeX TR2022-014 PDF Video Presentation
- @inproceedings{Cherian2022feb,
- author = {Cherian, Anoop and Hori, Chiori and Marks, Tim K. and {Le Roux}, Jonathan},
- title = {{(2.5+1)D Spatio-Temporal Scene Graphs for Video Question Answering}},
- booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
- year = 2022,
- pages = {444--453},
- month = feb,
- doi = {10.1609/aaai.v36i1.19922},
- url = {https://www.merl.com/publications/TR2022-014}
- }
Medin, S.C., Egger, B., Cherian, A., Wang, Y., Tenenbaum, J.B., Liu, X., Marks, T.K., "MOST-GAN: 3D Morphable StyleGAN for Disentangled Face Image Manipulation", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v36i2.20091, February 2022, pp. 1962-1971.
BibTeX TR2022-011 PDF Video Data Presentation
- @inproceedings{Medin2022feb,
- author = {Medin, Safa C. and Egger, Bernhard and Cherian, Anoop and Wang, Ye and Tenenbaum, Joshua B. and Liu, Xiaoming and Marks, Tim K.},
- title = {{MOST-GAN: 3D Morphable StyleGAN for Disentangled Face Image Manipulation}},
- booktitle = {AAAI Conference on Artificial Intelligence},
- year = 2022,
- pages = {1962--1971},
- month = feb,
- doi = {10.1609/aaai.v36i2.20091},
- url = {https://www.merl.com/publications/TR2022-011}
- }
Cherian, A., Pais, G., Jain, S., Marks, T.K., Sullivan, A., "InSeGAN: A Generative Approach to Segmenting Identical Instances in Depth Images", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 10023-10032.
BibTeX TR2021-097 PDF Video Data Software Presentation
- @inproceedings{Cherian2021oct,
- author = {Cherian, Anoop and Pais, Goncalo and Jain, Siddarth and Marks, Tim K. and Sullivan, Alan},
- title = {{InSeGAN: A Generative Approach to Segmenting Identical Instances in Depth Images}},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2021,
- pages = {10023--10032},
- month = oct,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2021-097}
- }
Comas, A., Marks, T.K., Mansour, H., Lohit, S., Ma, Y., Liu, X., "TURNIP: Time-series U-NET with Recurrence for NIR Imaging PPG", IEEE International Conference on Image Processing (ICIP), DOI: 10.1109/ICIP42928.2021.9506663, September 2021, pp. 309-313.
BibTeX TR2021-099 PDF
- @inproceedings{Comas2021sep,
- author = {Comas, Armand and Marks, Tim K. and Mansour, Hassan and Lohit, Suhas and Ma, Yechi and Liu, Xiaoming},
- title = {{TURNIP: Time-series U-NET with Recurrence for NIR Imaging PPG}},
- booktitle = {IEEE International Conference on Image Processing (ICIP)},
- year = 2021,
- pages = {309--313},
- month = sep,
- publisher = {IEEE},
- doi = {10.1109/ICIP42928.2021.9506663},
- url = {https://www.merl.com/publications/TR2021-099}
- }
Kim, S., Galley, M., Gunasekara, C., Lee, S., Atkinson, A., Peng, B., Schulz, H., Gao, J., Li, J., Adada, M., Huang, M., Lastras, L., Kummerfeld, J.K., Lasecki, W.S., Hori, C., Cherian, A., Marks, T.K., Rastogi, A., Zang, X., Sunkara, S., Gupta, R., "Overview of the Eighth Dialog System Technology Challenge: DSTC8", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2021.3078368, May 2021.
BibTeX TR2021-064 PDF
- @article{Kim2021may,
- author = {Kim, Seokhwan and Galley, Michel and Gunasekara, Chulaka and Lee, Sungjin and Atkinson, Adam and Peng, Baolin and Schulz, Hannes and Gao, Jianfeng and Li, Jinchao and Adada, Mahmoud and Huang, Minlie and Lastras, Luis and Kummerfeld, Jonathan K. and Lasecki, Walter S. and Hori, Chiori and Cherian, Anoop and Marks, Tim K. and Rastogi, Abhinav and Zang, Xiaoxue and Sunkara, Srinivas and Gupta, Raghav},
- title = {{Overview of the Eighth Dialog System Technology Challenge: DSTC8}},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2021,
- month = may,
- doi = {10.1109/TASLP.2021.3078368},
- issn = {2329-9290},
- url = {https://www.merl.com/publications/TR2021-064}
- }
Hori, C., Tsuchiya, M., Chen, S., Cherian, A., Hori, T., Harsham, B.A., Marks, T.K., Le Roux, J., Sullivan, A., Vetro, A., "マルチモーダルセンシング情報に基づくScene-aware Interaction 技術", Society of Automotive Engineers of Japan, Vol. 75, No. 5, pp. 66-71, May 2021.
BibTeX TR2021-042 PDF Video
- @article{Hori2021may,
- author = {Hori, Chiori and Tsuchiya, Masato and Chen, Siheng and Cherian, Anoop and Hori, Takaaki and Harsham, Bret A. and Marks, Tim K. and {Le Roux}, Jonathan and Sullivan, Alan and Vetro, Anthony},
- title = {{マルチモーダルセンシング情報に基づくScene-aware Interaction 技術}},
- journal = {Society of Automotive Engineers of Japan},
- year = 2021,
- volume = 75,
- number = 5,
- pages = {66--71},
- month = may,
- url = {https://www.merl.com/publications/TR2021-042}
- }
Nowara, E., Marks, T.K., Mansour, H., Veeraraghavan, A., "Near-Infrared Imaging Photoplethysmography During Driving", IEEE Transactions on Intelligent Transportation Systems, DOI: 10.1109/TITS.2020.3038317, pp. 1-12, December 2020.
BibTeX TR2020-161 PDF
- @article{Nowara2020dec,
- author = {Nowara, Ewa and Marks, Tim K. and Mansour, Hassan and Veeraraghavan, Ashok},
- title = {{Near-Infrared Imaging Photoplethysmography During Driving}},
- journal = {IEEE Transactions on Intelligent Transportation Systems},
- year = 2020,
- pages = {1--12},
- month = dec,
- doi = {10.1109/TITS.2020.3038317},
- url = {https://www.merl.com/publications/TR2020-161}
- }