Publications

4,009 major publications by members of the MERL staff.


  •  Bimbraw, K., Wang, Y., Liu, J., Koike-Akino, T., "GPT Sonography: Hand Gesture Decoding from Forearm Ultrasound Images via a Large Vision-Language Model", IEEE Access, May 2026.
    BibTeX TR2026-054 PDF
    • @article{Bimbraw2026may,
    • author = {Bimbraw, Keshav and Wang, Ye and Liu, Jing and Koike-Akino, Toshiaki},
    • title = {{GPT Sonography: Hand Gesture Decoding from Forearm Ultrasound Images via a Large Vision-Language Model}},
    • journal = {IEEE Access},
    • year = 2026,
    • month = may,
    • url = {https://www.merl.com/publications/TR2026-054}
    • }
  •  Ding, T., Xie, Y., Liang, Y., Chatterjee, M., Miraldo, P., Jiang, H., "LASER: Layer-wise Scale Alignment for Training-Free Streaming 4D Reconstruction", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2026.
    BibTeX TR2026-055 PDF
    • @inproceedings{Ding2026may,
    • author = {Ding, Tianye and Xie, Yiming and Liang, Yiqing and Chatterjee, Moitreya and Miraldo, Pedro and Jiang, Huaizu},
    • title = {{LASER: Layer-wise Scale Alignment for Training-Free Streaming 4D Reconstruction}},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2026,
    • month = may,
    • url = {https://www.merl.com/publications/TR2026-055}
    • }
  •  Lalit, M., Govindu, V., "Parallel Rigidity Matters for Bundle Adjustment", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2026.
    BibTeX TR2026-053 PDF
    • @inproceedings{Lalit2026may,
    • author = {Lalit, Manam and Govindu, Venu},
    • title = {{Parallel Rigidity Matters for Bundle Adjustment}},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2026,
    • month = may,
    • url = {https://www.merl.com/publications/TR2026-053}
    • }
  •  Piedade, V., Manam, L., Yamazaki, M., Miraldo, P., "Revisiting Monocular SLAM with Spatio-Temporal Scene Modeling", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2026.
    BibTeX TR2026-056 PDF
    • @inproceedings{Piedade2026may,
    • author = {Piedade, Valter and Manam, Lalit and Yamazaki, Masashi and Miraldo, Pedro},
    • title = {{Revisiting Monocular SLAM with Spatio-Temporal Scene Modeling}},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2026,
    • month = may,
    • url = {https://www.merl.com/publications/TR2026-056}
    • }
  •  Li, D., Zhang, J., Egger, B., Chatterjee, M., Lohit, S., Marks, T.K., Cherian, A., "AssemblyBench: Physics-Aware Assembly of Complex Industrial Objects", arXiv, May 2026.
    BibTeX arXiv
    • @article{Li2026may,
    • author = {Li, Danrui and Zhang, Jiahao and Egger, Bernhard and Chatterjee, Moitreya and Lohit, Suhas and Marks, Tim K. and Cherian, Anoop},
    • title = {{AssemblyBench: Physics-Aware Assembly of Complex Industrial Objects}},
    • journal = {arXiv},
    • year = 2026,
    • month = may,
    • url = {https://arxiv.org/abs/2605.12845}
    • }
  •  Ulvog, A.K., Rapp, J., Goyal, V.K., "FMCW Lidar Beyond Nyquist by Instantaneous Frequency Fitting", arXiv, May 2026.
    BibTeX arXiv
    • @article{Ulvog2026may,
    • author = {Ulvog, Alfred K. and Rapp, Joshua and Goyal, Vivek K},
    • title = {{FMCW Lidar Beyond Nyquist by Instantaneous Frequency Fitting}},
    • journal = {arXiv},
    • year = 2026,
    • month = may,
    • url = {https://arxiv.org/abs/2605.14039}
    • }
  •  Wang, Y., Liu, J., Koike-Akino, T., "Temper and Tilt Lead to SLOP: Reward Hacking Mitigation with Inference-Time Alignment", arXiv, May 2026.
    BibTeX arXiv
    • @article{Wang2026may,
    • author = {Wang, Ye and Liu, Jing and Koike-Akino, Toshiaki},
    • title = {{Temper and Tilt Lead to SLOP: Reward Hacking Mitigation with Inference-Time Alignment}},
    • journal = {arXiv},
    • year = 2026,
    • month = may,
    • url = {https://arxiv.org/abs/2605.13537}
    • }
  •  Mumcu, F., Jones, M.J., Cherian, A., Yilmaz, Y., "Is Video Anomaly Detection Misframed? Evidence from LLM-Based and Multi-Scene Models", arXiv, May 2026.
    BibTeX arXiv
    • @article{Mumcu2026may,
    • author = {Mumcu, Furkan and Jones, Michael J. and Cherian, Anoop and Yilmaz, Yasin},
    • title = {{Is Video Anomaly Detection Misframed? Evidence from LLM-Based and Multi-Scene Models}},
    • journal = {arXiv},
    • year = 2026,
    • month = may,
    • url = {https://arxiv.org/abs/2605.12725}
    • }
  •  Richter, J., Masuyama, Y., Boeddeker, C., Edo, T., Wichern, G., Le Roux, J., "Predictive-Generative Drift Decomposition for Speech Enhancement and Separation", arXiv, May 2026.
    BibTeX arXiv
    • @article{Richter2026may,
    • author = {{Richter, Julius and Masuyama, Yoshiki and Boeddeker, Christoph and Edo, Takahiro and Wichern, Gordon and Le Roux, Jonathan}},
    • title = {{Predictive-Generative Drift Decomposition for Speech Enhancement and Separation}},
    • journal = {arXiv},
    • year = 2026,
    • month = may,
    • url = {https://arxiv.org/abs/2605.06189}
    • }
  •  Cherian, A., Corcodel, R., Jain, S., Romeres, D., "LLMPhy: Parameter-Identifiable Physical Reasoning Combining Large Language Models and Physics Engines", International Conference on Artificial Intelligence and Statistics (AISTATS), May 2026.
    BibTeX TR2026-052 PDF
    • @inproceedings{Cherian2026may,
    • author = {Cherian, Anoop and Corcodel, Radu and Jain, Siddarth and Romeres, Diego},
    • title = {{LLMPhy: Parameter-Identifiable Physical Reasoning Combining Large Language Models and Physics Engines}},
    • booktitle = {International Conference on Artificial Intelligence and Statistics (AISTATS)},
    • year = 2026,
    • month = may,
    • url = {https://www.merl.com/publications/TR2026-052}
    • }
  •  Tandi, K., Ali, W.H., Rapp, J., Mansour, H., "Single View Camera-Based Dynamic Airflow Sensing", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2026.
    BibTeX TR2026-038 PDF
    • @inproceedings{Tandi2026may,
    • author = {Tandi, Kevin and Ali, Wael H. and Rapp, Joshua and Mansour, Hassan},
    • title = {{Single View Camera-Based Dynamic Airflow Sensing}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2026,
    • month = may,
    • url = {https://www.merl.com/publications/TR2026-038}
    • }
  •  Aihara, R., Masuyama, Y., Paissan, F., Germain, F.G., Wichern, G., Le Roux, J., "SUNAC: Source-aware Unified Neural Audio Codec", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2026.
    BibTeX TR2026-032 PDF
    • @inproceedings{Aihara2026may,
    • author = {Aihara, Ryo and Masuyama, Yoshiki and Paissan, Francesco and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{SUNAC: Source-aware Unified Neural Audio Codec}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2026,
    • month = may,
    • url = {https://www.merl.com/publications/TR2026-032}
    • }
  •  Han, J., Wang, R., Masuyama, Y., Delcroix, M., Rohdin, J., Du, J., Burget, L., "Spatially Aware Self-Supervised Models for Multi-Channel Neural Speaker Diarization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2026.
    BibTeX TR2026-047 PDF
    • @inproceedings{Han2026may,
    • author = {Han, Jiangyu and Wang, Ruoyu and Masuyama, Yoshiki and Delcroix, Marc and Rohdin, Johan and Du, Jun and Burget, Lukáš},
    • title = {{Spatially Aware Self-Supervised Models for Multi-Channel Neural Speaker Diarization}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2026,
    • month = may,
    • url = {https://www.merl.com/publications/TR2026-047}
    • }
  •  Kato, S., Wang, P., Fujihashi, T., Markham, A., "Heatmap-to-SMPL Multi-View Radar Transformer for Multi-Person 3D Pose Estimation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2026.
    BibTeX TR2026-040 PDF
    • @inproceedings{Kato2026may,
    • author = {Kato, Sorachi and Wang, Pu and Fujihashi, Takuya and Markham, Andrew},
    • title = {{Heatmap-to-SMPL Multi-View Radar Transformer for Multi-Person 3D Pose Estimation}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2026,
    • month = may,
    • url = {https://www.merl.com/publications/TR2026-040}
    • }
  •  Masuyama, Y., Germain, F.G., Wichern, G., Hori, C., Le Roux, J., "Velocity Potential Neural Field for Efficient Ambisonics Impulse Response Modeling", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2026.
    BibTeX TR2026-033 PDF
    • @inproceedings{Masuyama2026may,
    • author = {Masuyama, Yoshiki and Germain, François G and Wichern, Gordon and Hori, Chiori and {Le Roux}, Jonathan},
    • title = {{Velocity Potential Neural Field for Efficient Ambisonics Impulse Response Modeling}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2026,
    • month = may,
    • url = {https://www.merl.com/publications/TR2026-033}
    • }
  •  Masuyama, Y., Saijo, K., Paissan, F., Han, J., Delcroix, M., Aihara, R., Germain, F.G., Wichern, G., Le Roux, J., "FlexIO: Flexible Single- and Multi-Channel Speech Separation and Enhancement", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2026.
    BibTeX TR2026-034 PDF
    • @inproceedings{Masuyama2026may2,
    • author = {Masuyama, Yoshiki and Saijo, Kohei and Paissan, Francesco and Han, Jiangyu and Delcroix, Marc and Aihara, Ryo and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{FlexIO: Flexible Single- and Multi-Channel Speech Separation and Enhancement}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2026,
    • month = may,
    • url = {https://www.merl.com/publications/TR2026-034}
    • }
  •  Takahashi, R., Mansour, H., Boufounos, P.T., "DUAL-REGULARIZED ITERATIVE ADAPTIVE APPROACH FOR DOA SPECTRUM RECONSTRUCTION IN LIMITED ANGLE SECTOR", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2026.
    BibTeX TR2026-039 PDF
    • @inproceedings{Takahashi2026may,
    • author = {Takahashi, Ryuhei and Mansour, Hassan and Boufounos, Petros T.},
    • title = {{DUAL-REGULARIZED ITERATIVE ADAPTIVE APPROACH FOR DOA SPECTRUM RECONSTRUCTION IN LIMITED ANGLE SECTOR}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2026,
    • month = may,
    • url = {https://www.merl.com/publications/TR2026-039}
    • }
  •  Zhang, H., Ma, Y., Kitichotkul, R., Rapp, J., Boufounos, P.T., "ProxiCBO: A Consensus-based Method for Composite Optimization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2026.
    BibTeX TR2026-041 PDF
    • @inproceedings{Zhang2026may,
    • author = {Zhang, Haoyu and Ma, Yanting and Kitichotkul, Ruangrawee and Rapp, Joshua and Boufounos, Petros T.},
    • title = {{ProxiCBO: A Consensus-based Method for Composite Optimization}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2026,
    • month = may,
    • url = {https://www.merl.com/publications/TR2026-041}
    • }
  •  Aihara, R., Masuyama, Y., Germain, F.G., Wichern, G., Le Roux, J., "Exploring Disentangled Neural Speech Codecs from Self-Supervised Representations", IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW), May 2026.
    BibTeX TR2026-035 PDF
    • @inproceedings{Aihara2026may2,
    • author = {Aihara, Ryo and Masuyama, Yoshiki and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{Exploring Disentangled Neural Speech Codecs from Self-Supervised Representations}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)},
    • year = 2026,
    • month = may,
    • url = {https://www.merl.com/publications/TR2026-035}
    • }
  •  Kitichotkul, R., Rapp, J., Ma, Y., Mansour, H., "Unambiguous Range Extension for1 Doppler Single-Photon Lidar", Optics Express, DOI: 10.1364/​OE.592528, Vol. 34, No. 9, pp. 15933-15952, May 2026.
    BibTeX TR2026-050 PDF
    • @article{Kitichotkul2026apr,
    • author = {Kitichotkul, Ruangrawee and Rapp, Joshua and Ma, Yanting and Mansour, Hassan},
    • title = {{Unambiguous range extension for Doppler single-photon lidar}},
    • journal = {Optics Express},
    • year = 2026,
    • volume = 34,
    • number = 9,
    • pages = {15933--15952},
    • month = apr,
    • doi = {10.1364/OE.592528},
    • url = {https://www.merl.com/publications/TR2026-050}
    • }
  •  Rapp, J., Kitichotkul, R., Ma, Y., Mansour, H., "Velocity estimation with single-photon lidar", SPIE Conference on Advanced Photon Counting Techniques, April 2026.
    BibTeX TR2026-051 PDF
    • @inproceedings{Rapp2026apr,
    • author = {Rapp, Joshua and Kitichotkul, Ruangrawee and Ma, Yanting and Mansour, Hassan},
    • title = {{Velocity estimation with single-photon lidar}},
    • booktitle = {SPIE Conference on Advanced Photon Counting Techniques},
    • year = 2026,
    • month = apr,
    • url = {https://www.merl.com/publications/TR2026-051}
    • }
  •  Suzuki, K., Liu, J., Wang, Y., Hori, C., Brand, M., Romeres, D., Koike-Akino, T., "Embedding Morphology into Transformers for Cross-Robot Policy Learning", International Conference on Learning Representations (ICLR) Workshop, April 2026.
    BibTeX TR2026-048 PDF
    • @inproceedings{Suzuki2026apr,
    • author = {Suzuki, Kei and Liu, Jing and Wang, Ye and Hori, Chiori and Brand, Matthew and Romeres, Diego and Koike-Akino, Toshiaki},
    • title = {{Embedding Morphology into Transformers for Cross-Robot Policy Learning}},
    • booktitle = {International Conference on Learning Representations (ICLR) Workshop},
    • year = 2026,
    • month = apr,
    • url = {https://www.merl.com/publications/TR2026-048}
    • }
  •  Wang, Y., Liu, J., Koike-Akino, T., "Directional Embedding Smoothing for Robust Vision Language Models", International Conference on Learning Representations (ICLR) Workshop on Agents in the Wild, April 2026.
    BibTeX TR2026-049 PDF Presentation
    • @inproceedings{Wang2026apr4,
    • author = {{Wang, Ye and Liu, Jing and Koike-Akino, Toshiaki}},
    • title = {{Directional Embedding Smoothing for Robust Vision Language Models}},
    • booktitle = {International Conference on Learning Representations (ICLR) Workshop on Agents in the Wild},
    • year = 2026,
    • month = apr,
    • url = {https://www.merl.com/publications/TR2026-049}
    • }
  •  Zhang, Y., Corcodel, R., Hori, C., Cherian, A., Zhao, D., "SpinBench: 3D Rotation as a Lens on Spatial Reasoning in VLMs", International Conference on Learning Representations (ICLR) 2026, April 2026.
    BibTeX TR2026-045 PDF
    • @inproceedings{Zhang2026apr2,
    • author = {Zhang, Yuyou and Corcodel, Radu and Hori, Chiori and Cherian, Anoop and Zhao, Ding},
    • title = {{SpinBench: 3D Rotation as a Lens on Spatial Reasoning in VLMs}},
    • booktitle = {International Conference on Learning Representations (ICLR) 2026},
    • year = 2026,
    • month = apr,
    • url = {https://www.merl.com/publications/TR2026-045}
    • }
  •  Koike-Akino, T., Liu, J., Wang, Y., "TTQ: Activation-Aware Test-Time Quantization to Accelerate LLM Inference on the Fly", International Conference on Learning Representations (ICLR) Workshop, April 2026.
    BibTeX TR2026-044 PDF Presentation
    • @inproceedings{Koike-Akino2026apr,
    • author = {{Koike-Akino, Toshiaki and Liu, Jing and Wang, Ye}},
    • title = {{TTQ: Activation-Aware Test-Time Quantization to Accelerate LLM Inference on the Fly}},
    • booktitle = {International Conference on Learning Representations (ICLR) Workshop},
    • year = 2026,
    • month = apr,
    • url = {https://www.merl.com/publications/TR2026-044}
    • }