Publications

Hsieh, J.-W., Wu, Y.-H., Hsieh, Y.-K., Li, X., Peng, K.-C., Chang, M.-C., "SoREL: Soft-Label Refurbishment with Ensemble Learning for Noisy Long-Tailed Classification", CVPR Findings, June 2026.
BibTeX TR2026-075 PDF
- @inproceedings{Hsieh2026jun2,
- author = {Hsieh, Jun-Wei and Wu, Ying-Hsuan and Hsieh, Yi-Kuan and Li, Xin and Peng, Kuan-Chuan and Chang, Ming-Ching},
- title = {{SoREL: Soft-Label Refurbishment with Ensemble Learning for Noisy Long-Tailed Classification}},
- booktitle = {CVPR Findings},
- year = 2026,
- month = jun,
- url = {https://www.merl.com/publications/TR2026-075}
- }
Hsieh, J.-W., Wu, Y.-H., Hsieh, Y.-K., Li, X., Peng, K.-C., Chang, M.-C., "SoREL: Soft-Label Refurbishment with Ensemble Learning for Noisy Long-Tailed Classification Supplementary Material", CVPR Findings, June 2026.
BibTeX TR2026-074 PDF
- @inproceedings{Hsieh2026jun,
- author = {Hsieh, Jun-Wei and Wu, Ying-Hsuan and Hsieh, Yi-Kuan and Li, Xin and Peng, Kuan-Chuan and Chang, Ming-Ching},
- title = {{SoREL: Soft-Label Refurbishment with Ensemble Learning for Noisy Long-Tailed Classification Supplementary Material}},
- booktitle = {CVPR Findings},
- year = 2026,
- month = jun,
- url = {https://www.merl.com/publications/TR2026-074}
- }
Li, D., Zhang, J., Egger, B., Chatterjee, M., Lohit, S., Marks, T.K., Cherian, A., "AssemblyBench: Physics-Aware Assembly of Complex Industrial Objects", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2026.
BibTeX TR2026-076 PDF Video Data Software
- @inproceedings{Li2026jun,
- author = {Li, Danrui and Zhang, Jiahao and Egger, Bernhard and Chatterjee, Moitreya and Lohit, Suhas and Marks, Tim K. and Cherian, Anoop},
- title = {{AssemblyBench: Physics-Aware Assembly of Complex Industrial Objects}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2026,
- month = jun,
- url = {https://www.merl.com/publications/TR2026-076}
- }
Liu, X., Miraldo, P., Lohit, S., Jiang, H., Sawada, N., Tai, Y.-W., Tang, C.-K., Chatterjee, M., "Point4Cast: Streaming Dynamic Scene Reconstruction and Forecasting", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2026.
BibTeX TR2026-077 PDF
- @inproceedings{Liu2026jun,
- author = {Liu, Xinhang and Miraldo, Pedro and Lohit, Suhas and Jiang, Huaizu and Sawada, Naoko and Tai, Yu-Wing and Tang, Chi-Keung and Chatterjee, Moitreya},
- title = {{Point4Cast: Streaming Dynamic Scene Reconstruction and Forecasting}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2026,
- month = jun,
- url = {https://www.merl.com/publications/TR2026-077}
- }
Ding, T., Xie, Y., Liang, Y., Chatterjee, M., Miraldo, P., Jiang, H., "LASER: Layer-wise Scale Alignment for Training-Free Streaming 4D Reconstruction", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2026.
BibTeX TR2026-055 PDF
- @inproceedings{Ding2026may,
- author = {Ding, Tianye and Xie, Yiming and Liang, Yiqing and Chatterjee, Moitreya and Miraldo, Pedro and Jiang, Huaizu},
- title = {{LASER: Layer-wise Scale Alignment for Training-Free Streaming 4D Reconstruction}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2026,
- month = may,
- url = {https://www.merl.com/publications/TR2026-055}
- }
Manam, L., Govindu, V., "Parallel Rigidity Matters for Bundle Adjustment", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2026.
BibTeX TR2026-053 PDF Video Presentation
- @inproceedings{Lalit2026may,
- author = {{Manam, Lalit and Govindu, Venu}},
- title = {{Parallel Rigidity Matters for Bundle Adjustment}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2026,
- month = may,
- url = {https://www.merl.com/publications/TR2026-053}
- }
Piedade, V., Manam, L., Yamazaki, M., Miraldo, P., "Revisiting Monocular SLAM with Spatio-Temporal Scene Modeling", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2026.
BibTeX TR2026-056 PDF Video Software Presentation
- @inproceedings{Piedade2026may,
- author = {{Piedade, Valter and Manam, Lalit and Yamazaki, Masashi and Miraldo, Pedro}},
- title = {{Revisiting Monocular SLAM with Spatio-Temporal Scene Modeling}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2026,
- month = may,
- url = {https://www.merl.com/publications/TR2026-056}
- }
Cherian, A., Corcodel, R., Jain, S., Romeres, D., "LLMPhy: Parameter-Identifiable Physical Reasoning Combining Large Language Models and Physics Engines", International Conference on Artificial Intelligence and Statistics (AISTATS), May 2026.
BibTeX TR2026-052 PDF Data Software
- @inproceedings{Cherian2026may,
- author = {Cherian, Anoop and Corcodel, Radu and Jain, Siddarth and Romeres, Diego},
- title = {{LLMPhy: Parameter-Identifiable Physical Reasoning Combining Large Language Models and Physics Engines}},
- booktitle = {International Conference on Artificial Intelligence and Statistics (AISTATS)},
- year = 2026,
- month = may,
- url = {https://www.merl.com/publications/TR2026-052}
- }
Aihara, R., Masuyama, Y., Paissan, F., Germain, F.G., Wichern, G., Le Roux, J., "SUNAC: Source-aware Unified Neural Audio Codec", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2026.
BibTeX TR2026-032 PDF
- @inproceedings{Aihara2026may,
- author = {Aihara, Ryo and Masuyama, Yoshiki and Paissan, Francesco and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{SUNAC: Source-aware Unified Neural Audio Codec}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2026,
- month = may,
- url = {https://www.merl.com/publications/TR2026-032}
- }
Han, J., Wang, R., Masuyama, Y., Delcroix, M., Rohdin, J., Du, J., Burget, L., "Spatially Aware Self-Supervised Models for Multi-Channel Neural Speaker Diarization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2026.
BibTeX TR2026-047 PDF
- @inproceedings{Han2026may,
- author = {Han, Jiangyu and Wang, Ruoyu and Masuyama, Yoshiki and Delcroix, Marc and Rohdin, Johan and Du, Jun and Burget, Lukáš},
- title = {{Spatially Aware Self-Supervised Models for Multi-Channel Neural Speaker Diarization}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2026,
- month = may,
- url = {https://www.merl.com/publications/TR2026-047}
- }
Kato, S., Wang, P., Fujihashi, T., Markham, A., "Heatmap-to-SMPL Multi-View Radar Transformer for Multi-Person 3D Pose Estimation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2026.
BibTeX TR2026-040 PDF
- @inproceedings{Kato2026may,
- author = {Kato, Sorachi and Wang, Pu and Fujihashi, Takuya and Markham, Andrew},
- title = {{Heatmap-to-SMPL Multi-View Radar Transformer for Multi-Person 3D Pose Estimation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2026,
- month = may,
- url = {https://www.merl.com/publications/TR2026-040}
- }
Masuyama, Y., Germain, F.G., Wichern, G., Hori, C., Le Roux, J., "Velocity Potential Neural Field for Efficient Ambisonics Impulse Response Modeling", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2026.
BibTeX TR2026-033 PDF
- @inproceedings{Masuyama2026may,
- author = {Masuyama, Yoshiki and Germain, François G and Wichern, Gordon and Hori, Chiori and {Le Roux}, Jonathan},
- title = {{Velocity Potential Neural Field for Efficient Ambisonics Impulse Response Modeling}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2026,
- month = may,
- url = {https://www.merl.com/publications/TR2026-033}
- }
Masuyama, Y., Saijo, K., Paissan, F., Han, J., Delcroix, M., Aihara, R., Germain, F.G., Wichern, G., Le Roux, J., "FlexIO: Flexible Single- and Multi-Channel Speech Separation and Enhancement", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2026.
BibTeX TR2026-034 PDF
- @inproceedings{Masuyama2026may2,
- author = {Masuyama, Yoshiki and Saijo, Kohei and Paissan, Francesco and Han, Jiangyu and Delcroix, Marc and Aihara, Ryo and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{FlexIO: Flexible Single- and Multi-Channel Speech Separation and Enhancement}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2026,
- month = may,
- url = {https://www.merl.com/publications/TR2026-034}
- }
Aihara, R., Masuyama, Y., Germain, F.G., Wichern, G., Le Roux, J., "Exploring Disentangled Neural Speech Codecs from Self-Supervised Representations", IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW), May 2026.
BibTeX TR2026-035 PDF
- @inproceedings{Aihara2026may2,
- author = {Aihara, Ryo and Masuyama, Yoshiki and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Exploring Disentangled Neural Speech Codecs from Self-Supervised Representations}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)},
- year = 2026,
- month = may,
- url = {https://www.merl.com/publications/TR2026-035}
- }
Bimbraw, K., Wang, Y., Liu, J., Koike-Akino, T., "GPT Sonography: Hand Gesture Decoding from Forearm Ultrasound Images via a Large Vision-Language Model", IEEE Access, DOI: 10.1109/ACCESS.2026.3687477, Vol. 14, pp. 70724-70736, April 2026.
BibTeX TR2026-054 PDF
- @article{Bimbraw2026may,
- author = {Bimbraw, Keshav and Wang, Ye and Liu, Jing and Koike-Akino, Toshiaki},
- title = {{GPT Sonography: Hand Gesture Decoding from Forearm Ultrasound Images via a Large Vision-Language Model}},
- journal = {IEEE Access},
- year = 2026,
- volume = 14,
- pages = {70724--70736},
- month = may,
- doi = {10.1109/ACCESS.2026.3687477},
- issn = {2169-3536},
- url = {https://www.merl.com/publications/TR2026-054}
- }
Suzuki, K., Liu, J., Wang, Y., Hori, C., Brand, M., Romeres, D., Koike-Akino, T., "Embedding Morphology into Transformers for Cross-Robot Policy Learning", International Conference on Learning Representations (ICLR) Workshop, April 2026.
BibTeX TR2026-048 PDF
- @inproceedings{Suzuki2026apr,
- author = {Suzuki, Kei and Liu, Jing and Wang, Ye and Hori, Chiori and Brand, Matthew and Romeres, Diego and Koike-Akino, Toshiaki},
- title = {{Embedding Morphology into Transformers for Cross-Robot Policy Learning}},
- booktitle = {International Conference on Learning Representations (ICLR) Workshop on Efficient Spatial Reasoning},
- year = 2026,
- month = apr,
- url = {https://www.merl.com/publications/TR2026-048}
- }
Koike-Akino, T., Liu, J., Wang, Y., "TTQ: Activation-Aware Test-Time Quantization to Accelerate LLM Inference on the Fly", International Conference on Learning Representations (ICLR) Workshop, April 2026.
BibTeX TR2026-044 PDF Presentation
- @inproceedings{Koike-Akino2026apr,
- author = {{Koike-Akino, Toshiaki and Liu, Jing and Wang, Ye}},
- title = {{TTQ: Activation-Aware Test-Time Quantization to Accelerate LLM Inference on the Fly}},
- booktitle = {International Conference on Learning Representations (ICLR) Workshop on Test-Time Updates (TTU)},
- year = 2026,
- month = apr,
- url = {https://www.merl.com/publications/TR2026-044}
- }
Wang, Z., Hu, H., Deng, X., Mowlavi, S., Nakahira, Y., "OpInf-LLM: Parametric PDE Solving with LLMs via Operator Inference", International Conference on Learning Representations (ICLR) Workshop on AI and Partial Differential Equations (AI&PDE), April 2026.
BibTeX TR2026-043 PDF
- @inproceedings{Wang2026apr2,
- author = {Wang, Zhuoyuan and Hu, Hanjiang and Deng, Xiyu and Mowlavi, Saviz and Nakahira, Yorie},
- title = {{OpInf-LLM: Parametric PDE Solving with LLMs via Operator Inference}},
- booktitle = {International Conference on Learning Representations (ICLR) Workshop on AI and Partial Differential Equations (AI\&PDE)},
- year = 2026,
- month = apr,
- url = {https://www.merl.com/publications/TR2026-043}
- }
Ryo, H., Wang, Y., Koike-Akino, T., Liu, J., Parsons, K., Hato, J., "Evaluating Security Policy Compliance in Infrastructure as Code Generated by Large Language Models", International Symposium on Digital Forensics and Security, DOI: 10.1109/ISDFS69419.2026.11458930, March 2026.
BibTeX TR2026-036 PDF
- @inproceedings{Ryo2026mar,
- author = {Ryo, Hase and Wang, Ye and Koike-Akino, Toshiaki and Liu, Jing and Parsons, Kieran and Hato, Jumpei},
- title = {{Evaluating Security Policy Compliance in Infrastructure as Code Generated by Large Language Models}},
- booktitle = {International Symposium on Digital Forensics and Security},
- year = 2026,
- month = mar,
- doi = {10.1109/ISDFS69419.2026.11458930},
- issn = {2768-1831},
- isbn = {979-8-3315-7310-2},
- url = {https://www.merl.com/publications/TR2026-036}
- }
Koike-Akino, T., Chen, X., Liu, J., Wang, Y., Wang, P., Brand, M., "LatentLLM: Activation-Aware Transform to Multi-Head Latent Attention", AAAI Conference on Artificial Intelligence, Sven Koenig, Chad Jenkins, Matthew E. Taylor, Eds., DOI: 10.1609/aaai.v40i27.39425, March 2026, vol. 40, pp. 22644-22652.
BibTeX TR2026-018 PDF Video Presentation
- @inproceedings{Koike-Akino2026jan,
- author = {{{Koike-Akino, Toshiaki and Chen, Xiangyu and Liu, Jing and Wang, Ye and Wang, Pu and Brand, Matthew}}},
- title = {{{LatentLLM: Activation-Aware Transform to Multi-Head Latent Attention}}},
- booktitle = {AAAI Conference on Artificial Intelligence},
- year = 2026,
- editor = {Sven Koenig, Chad Jenkins, Matthew E. Taylor},
- volume = 40,
- number = 27,
- pages = {22644--22652},
- month = jan,
- publisher = {AAAI Press},
- doi = {10.1609/aaai.v40i27.39425},
- issn = {2374-3468},
- isbn = {978-1-57735-906-7},
- url = {https://www.merl.com/publications/TR2026-018}
- }
Shenoy, V., Lohit, S., Mansour, H., Chellappa, R., Marks, T.K., "Recovering Pulse Waves from Video Using Deep Unrolling and Deep Equilibrium Models", IEEE Transactions on Image Processing, DOI: 10.1109/TIP.2026.3671653, Vol. 35, pp. 2755-2770, March 2026.
BibTeX TR2026-031 PDF
- @article{Shenoy2026mar,
- author = {Shenoy, Vineet and Lohit, Suhas and Mansour, Hassan and Chellappa, Rama and Marks, Tim K.},
- title = {{Recovering Pulse Waves from Video Using Deep Unrolling and Deep Equilibrium Models}},
- journal = {IEEE Transactions on Image Processing},
- year = 2026,
- volume = 35,
- pages = {2755--2770},
- month = mar,
- doi = {10.1109/TIP.2026.3671653},
- issn = {1941-0042},
- url = {https://www.merl.com/publications/TR2026-031}
- }
Kogashi, K., Cherian, A., Kuo, M.-Y.J., "MMHOI: Modeling Complex 3D Multi-Human Multi-Object Interactions", IEEE Winter Conference on Applications of Computer Vision (WACV), March 2026, pp. 1512-1521.
BibTeX TR2026-029 PDF Video Data
- @inproceedings{Kogashi2026mar,
- author = {Kogashi, Kaen and Cherian, Anoop and Kuo, Meng-Yu Jennifer},
- title = {{MMHOI: Modeling Complex 3D Multi-Human Multi-Object Interactions}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2026,
- pages = {1512--1521},
- month = mar,
- url = {https://www.merl.com/publications/TR2026-029}
- }
Hu, H., Liu, C., Li, N., Wang, Y., "Training Task Reasoning LLM Agents for Multi-turn Task Planning via Single-turn Reinforcement Learning", IEEE Control Systems Letters, DOI: 10.1109/LCSYS.2025.3642767, Vol. 9, pp. 2879-2884, February 2026.
BibTeX TR2026-026 PDF
- @article{Hu2026feb,
- author = {Hu, Hanjiang and Liu, Changliu and Li, Na and Wang, Yebin},
- title = {{Training Task Reasoning LLM Agents for Multi-turn Task Planning via Single-turn Reinforcement Learning}},
- journal = {IEEE Control Systems Letters},
- year = 2026,
- volume = 9,
- pages = {2879--2884},
- month = feb,
- doi = {10.1109/LCSYS.2025.3642767},
- url = {https://www.merl.com/publications/TR2026-026}
- }
Mumcu, F., Jones, M.J., Yilmaz, Y., Cherian, A., "Leveraging Multimodal LLM Descriptions of Activity for Explainable Semi-Supervised Video Anomaly Detection", Transactions on Machine Learning Research, February 2026.
BibTeX TR2026-027 PDF
- @article{Mumcu2026feb2,
- author = {Mumcu, Furkan and Jones, Michael J. and Yilmaz, Yasin and Cherian, Anoop},
- title = {{Leveraging Multimodal LLM Descriptions of Activity for Explainable Semi-Supervised Video Anomaly Detection}},
- journal = {Transactions on Machine Learning Research},
- year = 2026,
- month = feb,
- url = {https://www.merl.com/publications/TR2026-027}
- }
Wang, R., Wang, Y., Liu, J., Koike-Akino, T., "Quantum Diffusion Models for Few-Shot Learning", AAAI Conference on Artificial Intelligence, Shaukat Ali, Francisco Chicano, Alberto Moraglio, Eds., DOI: 10.1007/978-3-032-15931-1, January 2026, pp. 46-59.
BibTeX TR2025-025 PDF
- @inproceedings{Wang2025mar,
- author = {Wang, Ruhan and Wang, Ye and Liu, Jing and Koike-Akino, Toshiaki},
- title = {Quantum Diffusion Models for Few-Shot Learning},
- booktitle = {Quantum Computing and Artificial Intelligence: First International Workshop, QC+AI 2025},
- year = 2025,
- editor = {Shaukat Ali, Francisco Chicano, Alberto Moraglio},
- pages = {46--59},
- month = mar,
- publisher = {Springer Cham},
- doi = {10.1007/978-3-032-15931-1},
- issn = {1865-0929},
- isbn = {978-3-032-15931-1},
- url = {https://www.merl.com/publications/TR2025-025}
- }