- Jeon, C.-B., Wichern, G., Germain, F.G., Le Roux, J., "Embracing Cacophony: Explaining and Improving Random Mixing in Music Source Separation", IEEE Open Journal of Signal Processing, DOI: 10.1109/OJSP.2025.3633567, Vol. 6, pp. 1179-1192, January 2026.
BibTeX TR2026-012 PDF- @article{Jeon2026jan,
- author = {Jeon, Chang-Bin and Wichern, Gordon and Germain, François G and {Le Roux}, Jonathan},
- title = {{Embracing Cacophony: Explaining and Improving Random Mixing in Music Source Separation}},
- journal = {IEEE Open Journal of Signal Processing},
- year = 2026,
- volume = 6,
- pages = {1179--1192},
- month = jan,
- doi = {10.1109/OJSP.2025.3633567},
- url = {https://www.merl.com/publications/TR2026-012}
- }
- Wilkinghoff, K., Yang, H., Ebbers, J., Germain, F.G., Wichern, G., Le Roux, J., "Local Density-Based Anomaly Score Normalization for Domain Generalization", IEEE Transactions on Audio, Speech and Language Processing, January 2026.
BibTeX TR2026-010 PDF Software- @article{Wilkinghoff2026jan,
- author = {Wilkinghoff, Kevin and Yang, Haici and Ebbers, Janek and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Local Density-Based Anomaly Score Normalization for Domain Generalization}},
- journal = {IEEE Transactions on Audio, Speech and Language Processing},
- year = 2026,
- month = jan,
- url = {https://www.merl.com/publications/TR2026-010}
- }
- Masuyama, Y., Wichern, G., Germain, F.G., Ick, C., Le Roux, J., "RANF: Neural Field-Based HRTF Spatial Upsampling with Retrieval Augmentation and Parameter Efficient Fine-Tuning", IEEE Open Journal of Signal Processing, DOI: 10.1109/OJSP.2025.3640517, Vol. 7, pp. 32-41, December 2025.
BibTeX TR2026-007 PDF Software- @article{Masuyama2025dec,
- author = {Masuyama, Yoshiki and Wichern, Gordon and Germain, François G and Ick, Christopher and {Le Roux}, Jonathan},
- title = {{RANF: Neural Field-Based HRTF Spatial Upsampling with Retrieval Augmentation and Parameter Efficient Fine-Tuning}},
- journal = {IEEE Open Journal of Signal Processing},
- year = 2025,
- volume = 7,
- pages = {32--41},
- month = dec,
- doi = {10.1109/OJSP.2025.3640517},
- url = {https://www.merl.com/publications/TR2026-007}
- }
- Masuyama, Y., Wichern, G., Germain, F.G., Ick, C., Le Roux, J., "SuDaField: Subject- and Dataset-Aware Neural Field for HRTF Modeling", IEEE Open Journal of Signal Processing, DOI: 10.1109/OJSP.2025.3627073, Vol. 6, pp. 1169-1178, December 2025.
BibTeX TR2026-009 PDF- @article{Masuyama2025dec2,
- author = {Masuyama, Yoshiki and Wichern, Gordon and Germain, François G and Ick, Christopher and {Le Roux}, Jonathan},
- title = {{SuDaField: Subject- and Dataset-Aware Neural Field for HRTF Modeling}},
- journal = {IEEE Open Journal of Signal Processing},
- year = 2025,
- volume = 6,
- pages = {1169--1178},
- month = dec,
- doi = {10.1109/OJSP.2025.3627073},
- url = {https://www.merl.com/publications/TR2026-009}
- }
- Hori, C., Masuyama, Y., Jain, S., Corcodel, R., Jha, D.K., Romeres, D., Le Roux, J., "Robot Confirmation Generation and Action Planning Using Long-context Q-Former Integrated with Multimodal LLM", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2025.
BibTeX TR2025-167 PDF- @inproceedings{Hori2025dec,
- author = {Hori, Chiori and Masuyama, Yoshiki and Jain, Siddarth and Corcodel, Radu and Jha, Devesh K. and Romeres, Diego and {Le Roux}, Jonathan},
- title = {{Robot Confirmation Generation and Action Planning Using Long-context Q-Former Integrated with Multimodal LLM}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2025,
- month = dec,
- url = {https://www.merl.com/publications/TR2025-167}
- }
- Aihara, R., Masuyama, Y., Paissan, F., Germain, F.G., Wichern, G., Le Roux, J., "SUNAC: Source-aware Unified Neural Audio Codec", arXiv, November 2025.
BibTeX arXiv- @article{Aihara2025nov,
- author = {Aihara, Ryo and Masuyama, Yoshiki and Paissan, Francesco and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{SUNAC: Source-aware Unified Neural Audio Codec}},
- journal = {arXiv},
- year = 2025,
- month = nov,
- url = {https://arxiv.org/abs/2511.16126}
- }
- Wilkinghoff, K., Fujimura, T., Imoto, K., Le Roux, J., Tan, Z.-H., Toda, T., "Handling Domain Shifts for Anomalous Sound Detection: A Review of DCASE-Related Work", Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE), DOI: 10.5281/zenodo.17251589, October 2025, pp. 20-24.
BibTeX TR2025-157 PDF- @inproceedings{Wilkinghoff2025oct,
- author = {Wilkinghoff, Kevin and Fujimura, Takuya and Imoto, Keisuke and {Le Roux}, Jonathan and Tan, Zheng-Hua and Toda, Tomoki},
- title = {{Handling Domain Shifts for Anomalous Sound Detection: A Review of DCASE-Related Work}},
- booktitle = {Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE)},
- year = 2025,
- pages = {20--24},
- month = oct,
- doi = {10.5281/zenodo.17251589},
- isbn = {978-84-09-77652-8},
- url = {https://www.merl.com/publications/TR2025-157}
- }
- Masuyama, Y., Saijo, K., Paissan, F., Han, J., Delcroix, M., Aihara, R., Germain, F.G., Wichern, G., Le Roux, J., "FlexIO: Flexible Single- and Multi-Channel Speech Separation and Enhancement", arXiv, October 2025.
BibTeX arXiv- @article{Masuyama2025oct2,
- author = {Masuyama, Yoshiki and Saijo, Kohei and Paissan, Francesco and Han, Jiangyu and Delcroix, Marc and Aihara, Ryo and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{FlexIO: Flexible Single- and Multi-Channel Speech Separation and Enhancement}},
- journal = {arXiv},
- year = 2025,
- month = oct,
- url = {https://arxiv.org/abs/2510.21485}
- }
- Masuyama, Y., Germain, F.G., Wichern, G., Ick, C., Le Roux, J., "Physics-Informed Direction-Aware Neural Acoustic Fields", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA66052.2025.11230918, October 2025.
BibTeX TR2025-142 PDF- @inproceedings{Masuyama2025oct,
- author = {Masuyama, Yoshiki and Germain, François G and Wichern, Gordon and Ick, Christopher and {Le Roux}, Jonathan},
- title = {{Physics-Informed Direction-Aware Neural Acoustic Fields}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2025,
- month = oct,
- doi = {10.1109/WASPAA66052.2025.11230918},
- url = {https://www.merl.com/publications/TR2025-142}
- }
- Paissan, F., Wichern, G., Masuyama, Y., Aihara, R., Germain, F.G., Saijo, K., Le Roux, J., "FasTUSS: Faster Task-Aware Unified Source Separation", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA66052.2025.11230943, October 2025.
BibTeX TR2025-143 PDF- @inproceedings{Paissan2025oct,
- author = {Paissan, Francesco and Wichern, Gordon and Masuyama, Yoshiki and Aihara, Ryo and Germain, François G and Saijo, Kohei and {Le Roux}, Jonathan},
- title = {{FasTUSS: Faster Task-Aware Unified Source Separation}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2025,
- month = oct,
- doi = {10.1109/WASPAA66052.2025.11230943},
- url = {https://www.merl.com/publications/TR2025-143}
- }
- Hussein, A., Khurana, S., Wichern, G., Germain, F.G., Le Roux, J., "HASRD: Hierarchical Acoustic and Semantic Representation Disentanglement", Interspeech, DOI: 10.21437/Interspeech.2025-2063, August 2025, pp. 5393-5397.
BibTeX TR2025-122 PDF- @inproceedings{Hussein2025aug,
- author = {Hussein, Amir and Khurana, Sameer and Wichern, Gordon and Germain, François G and {Le Roux}, Jonathan},
- title = {{HASRD: Hierarchical Acoustic and Semantic Representation Disentanglement}},
- booktitle = {Interspeech},
- year = 2025,
- pages = {5393--5397},
- month = aug,
- publisher = {ISCA},
- doi = {10.21437/Interspeech.2025-2063},
- url = {https://www.merl.com/publications/TR2025-122}
- }
- Ick, C., Wichern, G., Masuyama, Y., Germain, F.G., Le Roux, J., "Direction-Aware Neural Acoustic Fields for Few-Shot Interpolation of Ambisonic Impulse Responses", Interspeech, DOI: 10.21437/Interspeech.2025-1912, August 2025, pp. 933-937.
BibTeX TR2025-120 PDF- @inproceedings{Ick2025aug,
- author = {Ick, Christopher and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and {Le Roux}, Jonathan},
- title = {{Direction-Aware Neural Acoustic Fields for Few-Shot Interpolation of Ambisonic Impulse Responses}},
- booktitle = {Interspeech},
- year = 2025,
- pages = {933--937},
- month = aug,
- doi = {10.21437/Interspeech.2025-1912},
- url = {https://www.merl.com/publications/TR2025-120}
- }
- Khurana, S., Klement, D., Laurent, A., Bobos, D., Novosad, J., Gazdik, P., Zhang, E., Huang, Z., Hussein, A., Marxer, R., Masuyama, Y., Aihara, R., Hori, C., Germain, F.G., Wichern, G., Le Roux, J., "Factorized RVQ-GAN For Disentangled Speech Tokenization", Interspeech, DOI: 10.21437/Interspeech.2025-2612, August 2025, pp. 3514-3518.
BibTeX TR2025-123 PDF- @inproceedings{Khurana2025aug,
- author = {Khurana, Sameer and Klement, Dominik and Laurent, Antoine and Bobos, Dominik and Novosad, Juraj and Gazdik, Peter and Zhang, Ellen and Huang, Zilli and Hussein, Amir and Marxer, Ricard and Masuyama, Yoshiki and Aihara, Ryo and Hori, Chiori and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Factorized RVQ-GAN For Disentangled Speech Tokenization}},
- booktitle = {Interspeech},
- year = 2025,
- pages = {3514--3518},
- month = aug,
- publisher = {ISCA},
- doi = {10.21437/Interspeech.2025-2612},
- url = {https://www.merl.com/publications/TR2025-123}
- }
- Yang, H., Wichern, G., Aihara, R., Masuyama, Y., Khurana, S., Germain, F.G., Le Roux, J., "Investigating Continuous Autoregressive Generative Speech Enhancement", Interspeech, DOI: doi: 10.21437/Interspeech.2025-2335, August 2025, pp. 2360-2364.
BibTeX TR2025-119 PDF- @inproceedings{Yang2025aug,
- author = {Yang, Haici and Wichern, Gordon and Aihara, Ryo and Masuyama, Yoshiki and Khurana, Sameer and Germain, François G and {Le Roux}, Jonathan},
- title = {{Investigating Continuous Autoregressive Generative Speech Enhancement}},
- booktitle = {Interspeech},
- year = 2025,
- pages = {2360--2364},
- month = aug,
- publisher = {ISCA},
- doi = {doi: 10.21437/Interspeech.2025-2335},
- url = {https://www.merl.com/publications/TR2025-119}
- }
- Aihara, R., Masuyama, Y., Germain, F.G., Wichern, G., Le Roux, J., "Exploring Disentangled Neural Speech Codecs from Self-Supervised Representations", arXiv, August 2025.
BibTeX arXiv- @article{Aihara2025aug,
- author = {Aihara, Ryo and Masuyama, Yoshiki and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Exploring Disentangled Neural Speech Codecs from Self-Supervised Representations}},
- journal = {arXiv},
- year = 2025,
- month = aug,
- url = {https://arxiv.org/abs/2508.08399}
- }
- Ebbers, J., Germain, F.G., Wilkinghoff, K., Wichern, G., Le Roux, J., "No Class Left Behind: A Closer Look at Class Balancing for Audio Tagging", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10890706, April 2025.
BibTeX TR2025-037 PDF- @inproceedings{Ebbers2025mar,
- author = {Ebbers, Janek and Germain, François G and Wilkinghoff, Kevin and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{No Class Left Behind: A Closer Look at Class Balancing for Audio Tagging}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = mar,
- doi = {10.1109/ICASSP49660.2025.10890706},
- url = {https://www.merl.com/publications/TR2025-037}
- }
- Gruttadauria, E., Fontaine, M., Le Roux, J., Essid, S., "O-EENC-SD: Efficient Online End-to-End Neural Clustering for Speaker Diarization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10890837, April 2025.
BibTeX TR2025-031 PDF- @inproceedings{Gruttadauria2025mar,
- author = {Gruttadauria, Elio and Fontaine, Mathieu and {Le Roux}, Jonathan and Essid, Slim},
- title = {{{O-EENC-SD}: Efficient Online End-to-End Neural Clustering for Speaker Diarization}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = mar,
- doi = {10.1109/ICASSP49660.2025.10890837},
- url = {https://www.merl.com/publications/TR2025-031}
- }
- Hori, C., Kambara, M., Sugiura, K., Ota, K., Khurana, S., Jain, S., Corcodel, R., Jha, D.K., Romeres, D., Le Roux, J., "Interactive Robot Action Replanning using Multimodal LLM Trained from Human Demonstration Videos", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10887717, April 2025.
BibTeX TR2025-034 PDF- @inproceedings{Hori2025mar,
- author = {Hori, Chiori and Kambara, Motonari and Sugiura, Komei and Ota, Kei and Khurana, Sameer and Jain, Siddarth and Corcodel, Radu and Jha, Devesh K. and Romeres, Diego and {Le Roux}, Jonathan},
- title = {{Interactive Robot Action Replanning using Multimodal {LLM} Trained from Human Demonstration Videos}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = mar,
- doi = {10.1109/ICASSP49660.2025.10887717},
- url = {https://www.merl.com/publications/TR2025-034}
- }
- Masuyama, Y., Wichern, G., Germain, F.G., Ick, C., Le Roux, J., "Retrieval-Augmented Neural Field for HRTF Upsampling and Personalization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10889481, April 2025.
BibTeX TR2025-029 PDF Software- @inproceedings{Masuyama2025mar,
- author = {{{Masuyama, Yoshiki and Wichern, Gordon and Germain, François G and Ick, Christopher and Le Roux, Jonathan}}},
- title = {{{Retrieval-Augmented Neural Field for HRTF Upsampling and Personalization}}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = apr,
- doi = {10.1109/ICASSP49660.2025.10889481},
- url = {https://www.merl.com/publications/TR2025-029}
- }
- Saijo, K., Ebbers, J., Germain, F.G., Khurana, S., Wichern, G., Le Roux, J., "Leveraging Audio-Only Data for Text-Queried Target Sound Extraction", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10888769, April 2025.
BibTeX TR2025-033 PDF- @inproceedings{Saijo2025mar2,
- author = {Saijo, Kohei and Ebbers, Janek and Germain, François G and Khurana, Sameer and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Leveraging Audio-Only Data for Text-Queried Target Sound Extraction}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = mar,
- doi = {10.1109/ICASSP49660.2025.10888769},
- url = {https://www.merl.com/publications/TR2025-033}
- }
- Saijo, K., Ebbers, J., Germain, F.G., Wichern, G., Le Roux, J., "Task-Aware Unified Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10887819, April 2025.
BibTeX TR2025-032 PDF Software- @inproceedings{Saijo2025mar,
- author = {Saijo, Kohei and Ebbers, Janek and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Task-Aware Unified Source Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = mar,
- doi = {10.1109/ICASSP49660.2025.10887819},
- url = {https://www.merl.com/publications/TR2025-032}
- }
- Wilkinghoff, K., Yang, H., Ebbers, J., Germain, F.G., Wichern, G., Le Roux, J., "Keeping the Balance: Anomaly Score Calculation for Domain Generalization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10888402, April 2025.
BibTeX TR2025-030 PDF Software- @inproceedings{Wilkinghoff2025mar,
- author = {{{Wilkinghoff, Kevin and Yang, Haici and Ebbers, Janek and Germain, François G and Wichern, Gordon and Le Roux, Jonathan}}},
- title = {{{Keeping the Balance: Anomaly Score Calculation for Domain Generalization}}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = apr,
- doi = {10.1109/ICASSP49660.2025.10888402},
- url = {https://www.merl.com/publications/TR2025-030}
- }
- Ick, C., Wichern, G., Masuyama, Y., Germain, F.G., Le Roux, J., "Data Augmentation Using Neural Acoustic Fields With Retrieval-Augmented Pre-training", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP) Satellite Workshop on Generative Data Augmentation for Real-World Signal Processing Applications (GenDA), April 2025.
BibTeX TR2025-045 PDF- @inproceedings{Ick2025apr,
- author = {Ick, Christopher and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and {Le Roux}, Jonathan},
- title = {{Data Augmentation Using Neural Acoustic Fields With Retrieval-Augmented Pre-training}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP) Satellite Workshop on Generative Data Augmentation for Real-World Signal Processing Applications (GenDA)},
- year = 2025,
- month = apr,
- url = {https://www.merl.com/publications/TR2025-045}
- }
- Wilkinghoff, K., Fujimura, T., Imoto, K., Le Roux, J., Tan, Z.-H., Toda, T., "Handling Domain Shifts for Anomalous Sound Detection: A Review of DCASE-Related Work", arXiv, March 2025.
BibTeX arXiv- @article{Wilkinghoff2025mar2,
- author = {Wilkinghoff, Kevin and Fujimura, Takuya and Imoto, Keisuke and {Le Roux}, Jonathan and Tan, Zheng-Hua and Toda, Tomoki},
- title = {{Handling Domain Shifts for Anomalous Sound Detection: A Review of DCASE-Related Work}},
- journal = {arXiv},
- year = 2025,
- month = mar,
- url = {https://arxiv.org/abs/2503.10435}
- }
- Koo, J., Wichern, G., Germain, F.G., Khurana, S., Le Roux, J., "SMITIN: Self-Monitored Inference-Time INtervention for Generative Music Transformers", IEEE Open Journal of Signal Processing, DOI: 10.1109/OJSP.2025.3534686, Vol. 6, pp. 266-275, January 2025.
BibTeX TR2025-012 PDF Software- @article{Koo2025jan,
- author = {Koo, Junghyun and Wichern, Gordon and Germain, François G and Khurana, Sameer and {Le Roux}, Jonathan},
- title = {{SMITIN: Self-Monitored Inference-Time INtervention for Generative Music Transformers}},
- journal = {IEEE Open Journal of Signal Processing},
- year = 2025,
- volume = 6,
- pages = {266--275},
- month = jan,
- doi = {10.1109/OJSP.2025.3534686},
- issn = {2644-1322},
- url = {https://www.merl.com/publications/TR2025-012}
- }