Publications

86 / 3,887 publications found.


  •  Hussein, A., Khurana, S., Wichern, G., Germain, F.G., Le Roux, J., "HASRD: Hierarchical Acoustic and Semantic Representation Disentanglement", Interspeech, August 2025.
    BibTeX TR2025-122 PDF
    • @inproceedings{Hussein2025aug,
    • author = {Hussein, Amir and Khurana, Sameer and Wichern, Gordon and Germain, François G and {Le Roux}, Jonathan},
    • title = {{HASRD: Hierarchical Acoustic and Semantic Representation Disentanglement}},
    • booktitle = {Interspeech},
    • year = 2025,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2025-122}
    • }
  •  Ick, C., Wichern, G., Masuyama, Y., Germain, F.G., Le Roux, J., "Direction-Aware Neural Acoustic Fields for Few-Shot Interpolation of Ambisonic Impulse Responses", Interspeech, DOI: 10.21437/​Interspeech.2025-1912, August 2025, pp. 933-937.
    BibTeX TR2025-120 PDF
    • @inproceedings{Ick2025aug,
    • author = {Ick, Christopher and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and {Le Roux}, Jonathan},
    • title = {{Direction-Aware Neural Acoustic Fields for Few-Shot Interpolation of Ambisonic Impulse Responses}},
    • booktitle = {Interspeech},
    • year = 2025,
    • pages = {933--937},
    • month = aug,
    • doi = {10.21437/Interspeech.2025-1912},
    • url = {https://www.merl.com/publications/TR2025-120}
    • }
  •  Khurana, S., Klement, D., Laurent, A., Bobos, D., Novosad, J., Gazdik, P., Zhang, E., Huang, Z., Hussein, A., Marxer, R., Masuyama, Y., Aihara, R., Hori, C., Germain, F.G., Wichern, G., Le Roux, J., "Factorized RVQ-GAN For Disentangled Speech Tokenization", Interspeech, August 2025.
    BibTeX TR2025-123 PDF
    • @inproceedings{Khurana2025aug,
    • author = {Khurana, Sameer and Klement, Dominik and Laurent, Antoine and Bobos, Dominik and Novosad, Juraj and Gazdik, Peter and Zhang, Ellen and Huang, Zilli and Hussein, Amir and Marxer, Ricard and Masuyama, Yoshiki and Aihara, Ryo and Hori, Chiori and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{Factorized RVQ-GAN For Disentangled Speech Tokenization}},
    • booktitle = {Interspeech},
    • year = 2025,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2025-123}
    • }
  •  Yang, H., Wichern, G., Aihara, R., Masuyama, Y., Khurana, S., Germain, F.G., Le Roux, J., "Investigating Continuous Autoregressive Generative Speech Enhancement", Interspeech, August 2025.
    BibTeX TR2025-119 PDF
    • @inproceedings{Yang2025aug,
    • author = {Yang, Haici and Wichern, Gordon and Aihara, Ryo and Masuyama, Yoshiki and Khurana, Sameer and Germain, François G and {Le Roux}, Jonathan},
    • title = {{Investigating Continuous Autoregressive Generative Speech Enhancement}},
    • booktitle = {Interspeech},
    • year = 2025,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2025-119}
    • }
  •  Aihara, R., Masuyama, Y., Germain, F.G., Wichern, G., Le Roux, J., "Exploring Disentangled Neural Speech Codecs from Self-Supervised Representations", arXiv, August 2025.
    BibTeX arXiv
    • @article{Aihara2025aug,
    • author = {Aihara, Ryo and Masuyama, Yoshiki and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{Exploring Disentangled Neural Speech Codecs from Self-Supervised Representations}},
    • journal = {arXiv},
    • year = 2025,
    • month = aug,
    • url = {https://arxiv.org/abs/2508.08399}
    • }
  •  Steinmetz, C., Uhle, C., Everardo, F., Mitcheltree, C., McElveen, J.K., Jot, J.-M., Wichern, G., "Audio Signal Processing in the Artificial Intelligence Era: Challenges and Directions", Journal of the Audio Engineering Society, August 2025.
    BibTeX TR2025-116 PDF
    • @article{Steinmetz2025aug,
    • author = {Steinmetz, Christian and Uhle, Christian and Everardo, Flavio and Mitcheltree, Christopher and McElveen, J. Keith and Jot, Jean-Marc and Wichern, Gordon},
    • title = {{Audio Signal Processing in the Artificial Intelligence Era: Challenges and Directions}},
    • journal = {Journal of the Audio Engineering Society},
    • year = 2025,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2025-116}
    • }
  •  Paissan, F., Wichern, G., Masuyama, Y., Aihara, R., Germain, F.G., Saijo, K., Le Roux, J., "FasTUSS: Faster Task-Aware Unified Source Separation", arXiv, July 2025.
    BibTeX arXiv
    • @article{Paissan2025jul,
    • author = {Paissan, Francesco and Wichern, Gordon and Masuyama, Yoshiki and Aihara, Ryo and Germain, François G and Saijo, Kohei and {Le Roux}, Jonathan},
    • title = {{FasTUSS: Faster Task-Aware Unified Source Separation}},
    • journal = {arXiv},
    • year = 2025,
    • month = jul,
    • url = {https://www.arxiv.org/abs/2507.11435}
    • }
  •  Masuyama, Y., Germain, F.G., Wichern, G., Ick, C., Le Roux, J., "Physics-Informed Direction-Aware Neural Acoustic Fields", arXiv, July 2025.
    BibTeX arXiv
    • @article{Masuyama2025jul,
    • author = {Masuyama, Yoshiki and Germain, François G and Wichern, Gordon and Ick, Christopher and {Le Roux}, Jonathan},
    • title = {{Physics-Informed Direction-Aware Neural Acoustic Fields}},
    • journal = {arXiv},
    • year = 2025,
    • month = jul,
    • url = {https://arxiv.org/abs/2507.06826}
    • }
  •  Park, Y.-J., Germain, F.G., Liu, J., Wang, Y., Koike-Akino, T., Wichern, G., Azizan, N., Laughman, C.R., Chakrabarty, A., "Probabilistic Forecasting for Building Energy Systems using Time-Series Foundation Models", arXiv, May 2025.
    BibTeX arXiv
    • @article{Park2025may,
    • author = {Park, Young-Jin and Germain, François G and Liu, Jing and Wang, Ye and Koike-Akino, Toshiaki and Wichern, Gordon and Azizan, Navid and Laughman, Christopher R. and Chakrabarty, Ankush},
    • title = {{Probabilistic Forecasting for Building Energy Systems using Time-Series Foundation Models}},
    • journal = {arXiv},
    • year = 2025,
    • month = may,
    • url = {https://arxiv.org/abs/2506.00630}
    • }
  •  Araki, S., Ito, N., Haeb-Umbach, R., Wichern, G., Wang, Z.-Q., Mitsufuji, Y., "30+ Years of Source Separation Research: Achievements and Future Challenges", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49660.2025.10889006, April 2025, pp. 1-5.
    BibTeX TR2025-036 PDF
    • @inproceedings{Araki2025mar,
    • author = {Araki, Shoko and Ito, Nobutaka and Haeb-Umbach, Reinhold and Wichern, Gordon and Wang, Zhong-Qiu and Mitsufuji, Yuki},
    • title = {{30+ Years of Source Separation Research: Achievements and Future Challenges}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2025,
    • pages = {1--5},
    • month = mar,
    • doi = {10.1109/ICASSP49660.2025.10889006},
    • url = {https://www.merl.com/publications/TR2025-036}
    • }
  •  Ebbers, J., Germain, F.G., Wilkinghoff, K., Wichern, G., Le Roux, J., "No Class Left Behind: A Closer Look at Class Balancing for Audio Tagging", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49660.2025.10890706, April 2025.
    BibTeX TR2025-037 PDF
    • @inproceedings{Ebbers2025mar,
    • author = {Ebbers, Janek and Germain, François G and Wilkinghoff, Kevin and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{No Class Left Behind: A Closer Look at Class Balancing for Audio Tagging}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2025,
    • month = mar,
    • doi = {10.1109/ICASSP49660.2025.10890706},
    • url = {https://www.merl.com/publications/TR2025-037}
    • }
  •  Masuyama, Y., Wichern, G., Germain, F.G., Ick, C., Le Roux, J., "Retrieval-Augmented Neural Field for HRTF Upsampling and Personalization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49660.2025.10889481, April 2025.
    BibTeX TR2025-029 PDF Software
    • @inproceedings{Masuyama2025mar,
    • author = {Masuyama, Yoshiki and Wichern, Gordon and Germain, François G and Ick, Christopher and {Le Roux}, Jonathan},
    • title = {{Retrieval-Augmented Neural Field for HRTF Upsampling and Personalization}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2025,
    • month = mar,
    • doi = {10.1109/ICASSP49660.2025.10889481},
    • url = {https://www.merl.com/publications/TR2025-029}
    • }
  •  Saijo, K., Ebbers, J., Germain, F.G., Khurana, S., Wichern, G., Le Roux, J., "Leveraging Audio-Only Data for Text-Queried Target Sound Extraction", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49660.2025.10888769, April 2025.
    BibTeX TR2025-033 PDF
    • @inproceedings{Saijo2025mar2,
    • author = {Saijo, Kohei and Ebbers, Janek and Germain, François G and Khurana, Sameer and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{Leveraging Audio-Only Data for Text-Queried Target Sound Extraction}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2025,
    • month = mar,
    • doi = {10.1109/ICASSP49660.2025.10888769},
    • url = {https://www.merl.com/publications/TR2025-033}
    • }
  •  Saijo, K., Ebbers, J., Germain, F.G., Wichern, G., Le Roux, J., "Task-Aware Unified Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49660.2025.10887819, April 2025.
    BibTeX TR2025-032 PDF Software
    • @inproceedings{Saijo2025mar,
    • author = {Saijo, Kohei and Ebbers, Janek and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{Task-Aware Unified Source Separation}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2025,
    • month = mar,
    • doi = {10.1109/ICASSP49660.2025.10887819},
    • url = {https://www.merl.com/publications/TR2025-032}
    • }
  •  Wilkinghoff, K., Yang, H., Ebbers, J., Germain, F.G., Wichern, G., Le Roux, J., "Keeping the Balance: Anomaly Score Calculation for Domain Generalization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49660.2025.10888402, April 2025.
    BibTeX TR2025-030 PDF
    • @inproceedings{Wilkinghoff2025mar,
    • author = {Wilkinghoff, Kevin and Yang, Haici and Ebbers, Janek and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{Keeping the Balance: Anomaly Score Calculation for Domain Generalization}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2025,
    • month = mar,
    • doi = {10.1109/ICASSP49660.2025.10888402},
    • url = {https://www.merl.com/publications/TR2025-030}
    • }
  •  Ick, C., Wichern, G., Masuyama, Y., Germain, F.G., Le Roux, J., "Data Augmentation Using Neural Acoustic Fields With Retrieval-Augmented Pre-training", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP) Satellite Workshop on Generative Data Augmentation for Real-World Signal Processing Applications (GenDA), April 2025.
    BibTeX TR2025-045 PDF
    • @inproceedings{Ick2025apr,
    • author = {Ick, Christopher and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and {Le Roux}, Jonathan},
    • title = {{Data Augmentation Using Neural Acoustic Fields With Retrieval-Augmented Pre-training}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP) Satellite Workshop on Generative Data Augmentation for Real-World Signal Processing Applications (GenDA)},
    • year = 2025,
    • month = apr,
    • url = {https://www.merl.com/publications/TR2025-045}
    • }
  •  Koo, J., Wichern, G., Germain, F.G., Khurana, S., Le Roux, J., "SMITIN: Self-Monitored Inference-Time INtervention for Generative Music Transformers", IEEE Open Journal of Signal Processing, DOI: 10.1109/​OJSP.2025.3534686, Vol. 6, pp. 266-275, January 2025.
    BibTeX TR2025-012 PDF Software
    • @article{Koo2025jan,
    • author = {Koo, Junghyun and Wichern, Gordon and Germain, François G and Khurana, Sameer and {Le Roux}, Jonathan},
    • title = {{SMITIN: Self-Monitored Inference-Time INtervention for Generative Music Transformers}},
    • journal = {IEEE Open Journal of Signal Processing},
    • year = 2025,
    • volume = 6,
    • pages = {266--275},
    • month = jan,
    • doi = {10.1109/OJSP.2025.3534686},
    • issn = {2644-1322},
    • url = {https://www.merl.com/publications/TR2025-012}
    • }
  •  Chakrabarty, A., Wichern, G., Deshpande, V.M., Vinod, A.P., Berntorp, K., Laughman, C.R., "Meta-Learning for Physically-Constrained Neural System Identification", arXiv, January 2025.
    BibTeX arXiv
    • @article{Chakrabarty2025jan,
    • author = {Chakrabarty, Ankush and Wichern, Gordon and Deshpande, Vedang M. and Vinod, Abraham P. and Berntorp, Karl and Laughman, Christopher R.},
    • title = {{Meta-Learning for Physically-Constrained Neural System Identification}},
    • journal = {arXiv},
    • year = 2025,
    • month = jan,
    • url = {https://arxiv.org/abs/2501.06167v1}
    • }
  •  Chakrabarty, A., Deshpande, V.M., Wichern, G., Berntorp, K., "Physics-Constrained Meta-Learning for Online Adaptation and Estimation in Positioning Applications", IEEE Conference on Decision and Control (CDC), DOI: 10.1109/​CDC56724.2024.10886249, December 2024.
    BibTeX TR2024-180 PDF
    • @inproceedings{Chakrabarty2024dec,
    • author = {Chakrabarty, Ankush and Deshpande, Vedang M. and Wichern, Gordon and Berntorp, Karl},
    • title = {{Physics-Constrained Meta-Learning for Online Adaptation and Estimation in Positioning Applications}},
    • booktitle = {IEEE Conference on Decision and Control (CDC)},
    • year = 2024,
    • month = dec,
    • doi = {10.1109/CDC56724.2024.10886249},
    • url = {https://www.merl.com/publications/TR2024-180}
    • }
  •  Park, Y.-J., Germain, F.G., Liu, J., Wang, Y., Koike-Akino, T., Wichern, G., Laughman, C.R., Azizan, N., Chakrabarty, A., "Probabilistic Forecasting for Building Energy Systems: Are Time-Series Foundation Models The Answer?", Advances in Neural Information Processing Systems (NeurIPS), December 2024.
    BibTeX TR2025-001 PDF
    • @inproceedings{Park2024dec,
    • author = {{{Park, Young-Jin and Germain, François G and Liu, Jing and Wang, Ye and Koike-Akino, Toshiaki and Wichern, Gordon and Laughman, Christopher R. and Azizan, Navid and Chakrabarty, Ankush}}},
    • title = {{{Probabilistic Forecasting for Building Energy Systems: Are Time-Series Foundation Models The Answer?}}},
    • booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
    • year = 2024,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2025-001}
    • }
  •  Ick, C., Wichern, G., Masuyama, Y., Germain, F.G., Le Roux, J., "Spatially-Aware Losses for Enhanced Neural Acoustic Fields", NeurIPS 2024 Audio Imagination Workshop, December 2024.
    BibTeX TR2024-169 PDF
    • @inproceedings{Ick2024dec,
    • author = {Ick, Christopher and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and {Le Roux}, Jonathan},
    • title = {{Spatially-Aware Losses for Enhanced Neural Acoustic Fields}},
    • booktitle = {NeurIPS 2024 Audio Imagination Workshop},
    • year = 2024,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2024-169}
    • }
  •  Saijo, K., Wichern, G., Germain, F.G., Pan, Z., Le Roux, J., "TF-Locoformer: Transformer with Local Modeling by Convolution for Speech Separation and Enhancement", International Workshop on Acoustic Signal Enhancement (IWAENC), DOI: 10.1109/​IWAENC61483.2024.10694313, September 2024, pp. 205-209.
    BibTeX TR2024-126 PDF Software
    • @inproceedings{Saijo2024sep2,
    • author = {Saijo, Kohei and Wichern, Gordon and Germain, François G and Pan, Zexu and {Le Roux}, Jonathan},
    • title = {{TF-Locoformer: Transformer with Local Modeling by Convolution for Speech Separation and Enhancement}},
    • booktitle = {International Workshop on Acoustic Signal Enhancement (IWAENC)},
    • year = 2024,
    • pages = {205--209},
    • month = sep,
    • doi = {10.1109/IWAENC61483.2024.10694313},
    • issn = {2835-3439},
    • isbn = {979-8-3503-6185-8},
    • url = {https://www.merl.com/publications/TR2024-126}
    • }
  •  Ebbers, J., Germain, F.G., Wichern, G., Le Roux, J., "Sound Event Bounding Boxes", Interspeech, DOI: 10.21437/​Interspeech.2024-2075, September 2024, pp. 562-566.
    BibTeX TR2024-118 PDF Software
    • @inproceedings{Ebbers2024sep,
    • author = {Ebbers, Janek and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{Sound Event Bounding Boxes}},
    • booktitle = {Interspeech},
    • year = 2024,
    • pages = {562--566},
    • month = sep,
    • doi = {10.21437/Interspeech.2024-2075},
    • issn = {2958-1796},
    • url = {https://www.merl.com/publications/TR2024-118}
    • }
  •  Khurana, S., Hori, C., Laurent, A., Wichern, G., Le Roux, J., "ZeroST: Zero-Shot Speech Translation", Interspeech, DOI: 10.21437/​Interspeech.2024-1088, September 2024, pp. 392-396.
    BibTeX TR2024-122 PDF
    • @inproceedings{Khurana2024sep,
    • author = {Khurana, Sameer and Hori, Chiori and Laurent, Antoine and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{ZeroST: Zero-Shot Speech Translation}},
    • booktitle = {Interspeech},
    • year = 2024,
    • pages = {392--396},
    • month = sep,
    • doi = {10.21437/Interspeech.2024-1088},
    • issn = {2958-1796},
    • url = {https://www.merl.com/publications/TR2024-122}
    • }
  •  Pan, Z., Wichern, G., Germain, F.G., Saijo, K., Le Roux, J., "PARIS: Pseudo-AutoRegressIve Siamese Training for Online Speech Separation", Interspeech, DOI: 10.21437/​Interspeech.2024-1066, September 2024, pp. 582-586.
    BibTeX TR2024-124 PDF
    • @inproceedings{Pan2024sep,
    • author = {Pan, Zexu and Wichern, Gordon and Germain, François G and Saijo, Kohei and {Le Roux}, Jonathan},
    • title = {{PARIS}: Pseudo-AutoRegressIve Siamese Training for Online Speech Separation},
    • booktitle = {Interspeech},
    • year = 2024,
    • pages = {582--586},
    • month = sep,
    • doi = {10.21437/Interspeech.2024-1066},
    • issn = {2958-1796},
    • url = {https://www.merl.com/publications/TR2024-124}
    • }