Publications

8 / 3,840 publications found.


  •  Ick, C., Wichern, G., Masuyama, Y., Germain, F.G., Le Roux, J., "Direction-Aware Neural Acoustic Fields for Few-Shot Interpolation of Ambisonic Impulse Responses", arXiv, May 2025.
    BibTeX arXiv
    • @article{Ick2025may,
    • author = {Ick, Christopher and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and {Le Roux}, Jonathan},
    • title = {{Direction-Aware Neural Acoustic Fields for Few-Shot Interpolation of Ambisonic Impulse Responses}},
    • journal = {arXiv},
    • year = 2025,
    • month = may,
    • url = {https://arxiv.org/abs/2505.13617}
    • }
  •  Masuyama, Y., Chang, X., Zhang, W., Cornell, S., Wang, Z.-Q., Ono, N., Qian, Y., Watanabe, S., "An End-to-End Integration of Speech Separation and Recognition with Self-Supervised Learning Representation", Computer Speech & Language, DOI: 10.1016/​j.csl.2025.101813, Vol. 95, pp. 101813, May 2025.
    BibTeX TR2025-054 PDF
    • @article{Masuyama2025may,
    • author = {Masuyama, Yoshiki and Chang, Xuankai and Zhang, Wangyou and Cornell, Samuele and Wang, Zhong-Qiu and Ono, Nobutaka and Qian, Yanmin and Watanabe, Shinji},
    • title = {{An End-to-End Integration of Speech Separation and Recognition with Self-Supervised Learning Representation}},
    • journal = {Computer Speech \& Language},
    • year = 2025,
    • volume = 95,
    • pages = 101813,
    • month = may,
    • doi = {10.1016/j.csl.2025.101813},
    • issn = {0885-2308},
    • url = {https://www.merl.com/publications/TR2025-054}
    • }
  •  Masuyama, Y., Wichern, G., Germain, F.G., Ick, C., Le Roux, J., "Retrieval-Augmented Neural Field for HRTF Upsampling and Personalization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49660.2025.10889481, April 2025.
    BibTeX TR2025-029 PDF Software
    • @inproceedings{Masuyama2025mar,
    • author = {Masuyama, Yoshiki and Wichern, Gordon and Germain, François G and Ick, Christopher and {Le Roux}, Jonathan},
    • title = {{Retrieval-Augmented Neural Field for HRTF Upsampling and Personalization}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2025,
    • month = mar,
    • doi = {10.1109/ICASSP49660.2025.10889481},
    • url = {https://www.merl.com/publications/TR2025-029}
    • }
  •  Ick, C., Wichern, G., Masuyama, Y., Germain, F.G., Le Roux, J., "Data Augmentation Using Neural Acoustic Fields With Retrieval-Augmented Pre-training", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP) Satellite Workshop on Generative Data Augmentation for Real-World Signal Processing Applications (GenDA), April 2025.
    BibTeX TR2025-045 PDF
    • @inproceedings{Ick2025apr,
    • author = {Ick, Christopher and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and {Le Roux}, Jonathan},
    • title = {{Data Augmentation Using Neural Acoustic Fields With Retrieval-Augmented Pre-training}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP) Satellite Workshop on Generative Data Augmentation for Real-World Signal Processing Applications (GenDA)},
    • year = 2025,
    • month = apr,
    • url = {https://www.merl.com/publications/TR2025-045}
    • }
  •  Tian, J., Shi, J., Chen, W., Arora, S., Masuyama, Y., Takashi, M., Wu, Y., Peng, J., Bharadwaj, S., Zhao, Y., Cornell, S., Peng, Y., Yue, X., Yang, C.-H.H., Neubig, G., Watanabe, S., "ESPnet-SpeechLM: An Open Speech Language Model Toolkit", NAACL-HLT (the system demonstration track), Dziri, N. and Ren, S. X. and Diao, S., Eds., March 2025, pp. 116-124.
    BibTeX TR2025-038 PDF
    • @inproceedings{Tian2025mar,
    • author = {Tian, Jinchuan and Shi, Jiatong and Chen, William and Arora, Siddhant and Masuyama, Yoshiki and Takashi, Maekaku and Wu, Yihan and Peng, Junyi and Bharadwaj, Shikhar and Zhao, Yiwen and Cornell, Samuele and Peng, Yifan and Yue, Xiang and Yang, Chao-Han H. and Neubig, Graham and Watanabe, Shinji},
    • title = {{ESPnet-SpeechLM: An Open Speech Language Model Toolkit}},
    • booktitle = {NAACL-HLT (the system demonstration track)},
    • year = 2025,
    • editor = {Dziri, N. and Ren, S. X. and Diao, S.},
    • pages = {116--124},
    • month = mar,
    • publisher = {Association for Computational Linguistics},
    • url = {https://www.merl.com/publications/TR2025-038}
    • }
  •  Ick, C., Wichern, G., Masuyama, Y., Germain, F.G., Le Roux, J., "Spatially-Aware Losses for Enhanced Neural Acoustic Fields", NeurIPS 2024 Audio Imagination Workshop, December 2024.
    BibTeX TR2024-169 PDF
    • @inproceedings{Ick2024dec,
    • author = {Ick, Christopher and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and {Le Roux}, Jonathan},
    • title = {{Spatially-Aware Losses for Enhanced Neural Acoustic Fields}},
    • booktitle = {NeurIPS 2024 Audio Imagination Workshop},
    • year = 2024,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2024-169}
    • }
  •  Masuyama, Y., Wichern, G., Germain, F.G., Pan, Z., Khurana, S., Hori, C., Le Roux, J., "NIIRF: Neural IIR Filter Field for HRTF Upsampling and Personalization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10448477, March 2024, pp. 1016-1020.
    BibTeX TR2024-026 PDF Software
    • @inproceedings{Masuyama2024mar,
    • author = {Masuyama, Yoshiki and Wichern, Gordon and Germain, François G and Pan, Zexu and Khurana, Sameer and Hori, Chiori and {Le Roux}, Jonathan},
    • title = {{NIIRF: Neural IIR Filter Field for HRTF Upsampling and Personalization}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {1016--1020},
    • month = mar,
    • doi = {10.1109/ICASSP48485.2024.10448477},
    • url = {https://www.merl.com/publications/TR2024-026}
    • }
  •  Pan, Z., Wichern, G., Masuyama, Y., Germain, F.G., Khurana, S., Hori, C., Le Roux, J., "Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/​ASRU57964.2023.10389618, December 2023.
    BibTeX TR2023-152 PDF Video
    • @inproceedings{Pan2023dec2,
    • author = {Pan, Zexu and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and Khurana, Sameer and Hori, Chiori and {Le Roux}, Jonathan},
    • title = {{Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction}},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2023,
    • month = dec,
    • doi = {10.1109/ASRU57964.2023.10389618},
    • isbn = {979-8-3503-0689-7},
    • url = {https://www.merl.com/publications/TR2023-152}
    • }