Publications

57 / 3,739 publications found.


  •  Chen, K., Wichern, G., Germain, F., Le Roux, J., "Pac-HuBERT: Self-Supervised Music Source Separation via Primitive Auditory Clustering and Hidden-Unit BERT", IEEE ICASSP Satellite Workshop on Self-supervision in Audio, Speech and Beyond (SASB), DOI: 10.1109/​ICASSPW59220.2023.10193575, May 2023.
    BibTeX TR2023-030 PDF
    • @inproceedings{Chen2023may,
    • author = {Chen, Ke and Wichern, Gordon and Germain, Francois and Le Roux, Jonathan},
    • title = {Pac-HuBERT: Self-Supervised Music Source Separation via Primitive Auditory Clustering and Hidden-Unit BERT},
    • booktitle = {IEEE ICASSP Satellite Workshop on Self-supervision in Audio, Speech and Beyond (SASB)},
    • year = 2023,
    • month = may,
    • doi = {10.1109/ICASSPW59220.2023.10193575},
    • isbn = {979-8-3503-0261-5},
    • url = {https://www.merl.com/publications/TR2023-030}
    • }
  •  Wang, S., Guo, J., Wang, P., Parsons, K., Orlik, P.V., Nagai, Y., Sumi, T., Pathak, P., ,, "X-Disco: Cross-technology Neighbor Discovery", IEEE International Conference on Sensing, Communication, and Networking, September 2022.
    BibTeX TR2022-119 PDF
    • @inproceedings{Wang2022sep2,
    • author = {Wang, Shuai and Guo, Jianlin and Wang, Pu and Parsons, Kieran and Orlik, Philip V. and Nagai, Yukimasa and Sumi, Takenori and Pathak, Parth and},
    • title = {X-Disco: Cross-technology Neighbor Discovery},
    • booktitle = {IEEE International Conference on Sensing, Communication, and Networking},
    • year = 2022,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2022-119}
    • }
  •  Yu, J., Wang, P., Koike-Akino, T., Orlik, P.V., "Multi-Modal Recurrent Fusion for Indoor Localization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP43922.2022.9746071, April 2022.
    BibTeX TR2022-018 PDF
    • @inproceedings{Yu2022apr,
    • author = {Yu, Jianyuan and Wang, Pu and Koike-Akino, Toshiaki and Orlik, Philip V.},
    • title = {Multi-Modal Recurrent Fusion for Indoor Localization},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2022,
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP43922.2022.9746071},
    • issn = {2379-190X},
    • isbn = {978-1-6654-0540-9},
    • url = {https://www.merl.com/publications/TR2022-018}
    • }
  •  Li, M., Chen, S., Zhang, Y., "Graph Cross Networks with Vertex Infomax Pooling", Advances in Neural Information Processing Systems (NeurIPS), December 2020.
    BibTeX TR2020-167 PDF
    • @inproceedings{Li2020dec,
    • author = {Li, Maosen and Chen, Siheng and Zhang, Ya},
    • title = {Graph Cross Networks with Vertex Infomax Pooling},
    • booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
    • year = 2020,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2020-167}
    • }
  •  Sari, L., Moritz, N., Hori, T., Le Roux, J., "Unsupervised Speaker Adaptation Using Attention-Based Speaker Memory For End-To-End ASR", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9054249, April 2020, pp. 7384-7388.
    BibTeX TR2020-037 PDF Video Presentation
    • @inproceedings{Sari2020apr,
    • author = {Sari, Leda and Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Unsupervised Speaker Adaptation Using Attention-Based Speaker Memory For End-To-End ASR},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {7384--7388},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054249},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-037}
    • }
  •  Ma, T., Komatsu, T., Wang, B., Wang, Y., Lin, C., "Observer Designs for Simultaneous Temperature and Loss Estimation for Electric Motors: A Comparative Study", IEEE Industrial Electronics Society (IECON), DOI: 10.1109/​IECON.2019.8927182, October 2019, pp. 1234-1241.
    BibTeX TR2019-122 PDF
    • @inproceedings{Ma2019oct,
    • author = {Ma, Tong and Komatsu, Taiga and Wang, Bingnan and Wang, Yebin and Lin, Chungwei},
    • title = {Observer Designs for Simultaneous Temperature and Loss Estimation for Electric Motors: A Comparative Study},
    • booktitle = {IEEE Industrial Electronics Society (IECON)},
    • year = 2019,
    • pages = {1234--1241},
    • month = oct,
    • doi = {10.1109/IECON.2019.8927182},
    • issn = {1553-572X},
    • url = {https://www.merl.com/publications/TR2019-122}
    • }
  •  Karafiat, M., Baskar, M.K., Watanabe, S., Hori, T., Wiesner, M., Cernocky, J.H., "Analysis of Multilingual Sequence-to-Sequence Speech Recognition Systems", Interspeech, DOI: 10.21437/​Interspeech.2019-2355/​/​, September 2019, pp. 2019-2355.
    BibTeX TR2019-103 PDF
    • @inproceedings{Karafiat2019sep,
    • author = {Karafiat, Martin and Baskar, Murali Karthick and Watanabe, Shinji and Hori, Takaaki and Wiesner, Matthew and Cernocky, Jan, Honza},
    • title = {Analysis of Multilingual Sequence-to-Sequence Speech Recognition Systems},
    • booktitle = {Interspeech},
    • year = 2019,
    • pages = {2019--2355},
    • month = sep,
    • doi = {10.21437/Interspeech.2019-2355//},
    • url = {https://www.merl.com/publications/TR2019-103}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Unidirectional Neural Network Architectures for End-to-End Automatic Speech Recognition", Interspeech, DOI: 10.21437/​Interspeech.2019-2837, September 2019, pp. 76-80.
    BibTeX TR2019-098 PDF
    • @inproceedings{Moritz2019sep,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Unidirectional Neural Network Architectures for End-to-End Automatic Speech Recognition},
    • booktitle = {Interspeech},
    • year = 2019,
    • pages = {76--80},
    • month = sep,
    • doi = {10.21437/Interspeech.2019-2837},
    • url = {https://www.merl.com/publications/TR2019-098}
    • }
  •  Cho, J., Watanabe, S., Hori, T., Baskar, M.K., Inaguma, H., Villalba, J., Dehak, N., "Language Model Integration Based on Memory Control for Sequence to Sequence Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2019.8683380, May 2019.
    BibTeX TR2019-007 PDF
    • @inproceedings{Cho2019may,
    • author = {Cho, Jaejin and Watanabe, Shinji and Hori, Takaaki and Baskar, Murali Karthick and Inaguma, Hirofumi and Villalba, Jesus and Dehak, Najim},
    • title = {Language Model Integration Based on Memory Control for Sequence to Sequence Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • month = may,
    • doi = {10.1109/ICASSP.2019.8683380},
    • url = {https://www.merl.com/publications/TR2019-007}
    • }
  •  Hayashi, T., Watanabe, S., Zhang, Y., Toda, T., Hori, T., Astudillo, R., Takeda, K., "Back-Translation-Style Data Augmentation for End-to-End ASR", IEEE Spoken Language Technology Workshop (SLT), DOI: 10.1109/​SLT.2018.8639619, December 2018.
    BibTeX TR2018-174 PDF
    • @inproceedings{Hayashi2018dec,
    • author = {Hayashi, Tomoki and Watanabe, Shinji and Zhang, Yu and Toda, Tomoki and Hori, Takaaki and Astudillo, Ramon and Takeda, Kazuya},
    • title = {Back-Translation-Style Data Augmentation for End-to-End ASR},
    • booktitle = {IEEE Spoken Language Technology Workshop (SLT)},
    • year = 2018,
    • month = dec,
    • doi = {10.1109/SLT.2018.8639619},
    • url = {https://www.merl.com/publications/TR2018-174}
    • }
  •  Seki, H., Hori, T., Watanabe, S., Le Roux, J., Hershey, J., "A Purely End-to-end System for Multi-speaker Speech Recognition", Annual Meeting of the Association for Computational Linguistics (ACL), July 2018, pp. 2620-2630.
    BibTeX TR2018-104 PDF Video
    • @inproceedings{Seki2018jul,
    • author = {Seki, Hiroshi and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan and Hershey, John},
    • title = {A Purely End-to-end System for Multi-speaker Speech Recognition},
    • booktitle = {Annual Meeting of the Association for Computational Linguistics (ACL)},
    • year = 2018,
    • pages = {2620--2630},
    • month = jul,
    • publisher = {Elsevier},
    • url = {https://www.merl.com/publications/TR2018-104}
    • }
  •  Ochiai, T., Watanabe, S., Katagiri, S., Hori, T., Hershey, J.R., "Speaker Adaptation for Multichannel End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2018.8462161, April 2018, pp. 6707-6711.
    BibTeX TR2018-006 PDF
    • @inproceedings{Ochiai2018apr,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Katagiri, Shigeru and Hori, Takaaki and Hershey, John R.},
    • title = {Speaker Adaptation for Multichannel End-to-End Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {6707--6711},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8462161},
    • url = {https://www.merl.com/publications/TR2018-006}
    • }
  •  Zhang, Z., Kag, A., Sullivan, A., Saligrama, V., "Equilibrated Recurrent Neural Network: Neuronal Time-Delayed Self-Feedback Improves Accuracy and Stability", arXiv, March 2019.
    BibTeX arXiv
    • @article{Zhang2018mar2,
    • author = {Zhang, Ziming and Kag, Anil and Sullivan, Alan and Saligrama, Venkatesh},
    • title = {Equilibrated Recurrent Neural Network: Neuronal Time-Delayed Self-Feedback Improves Accuracy and Stability},
    • journal = {arXiv},
    • year = 2018,
    • month = mar,
    • url = {https://arxiv.org/abs/1903.00755}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., Xiao, X., "Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/​JSTSP.2017.2764276, Vol. 11, No. 8, pp. 1274-1288, October 2017.
    BibTeX TR2017-192 PDF
    • @article{Ochiai2017oct2,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R. and Xiao, Xiong},
    • title = {Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1274--1288},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2764276},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-192}
    • }
  •  Watanabe, S., Hori, T., Kim, S., Hershey, J.R., Hayashi, T., "Hybrid CTC/Attention Architecture for End-to-End Speech Recognition", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/​JSTSP.2017.2763455, Vol. 11, No. 8, pp. 1240-1253, October 2017.
    BibTeX TR2017-190 PDF Video
    • @article{Watanabe2017oct,
    • author = {Watanabe, Shinji and Hori, Takaaki and Kim, Suyoun and Hershey, John R. and Hayashi, Tomoki},
    • title = {Hybrid CTC/Attention Architecture for End-to-End Speech Recognition},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1240--1253},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2763455},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-190}
    • }
  •  Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "Duration-Controlled LSTM for Polyphonic Sound Event Detection", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2017.2740002, Vol. 25, No. 11, August 2017.
    BibTeX TR2017-150 PDF
    • @article{Hayashi2017aug,
    • author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and Le Roux, Jonathan and Takeda, Kazuya},
    • title = {Duration-Controlled LSTM for Polyphonic Sound Event Detection},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2017,
    • volume = 25,
    • number = 11,
    • month = aug,
    • doi = {10.1109/TASLP.2017.2740002},
    • issn = {2329-9304},
    • url = {https://www.merl.com/publications/TR2017-150}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., "Multichannel End-to-end Speech Recognition", International Conference on Machine Learning (ICML), August 2017.
    BibTeX TR2017-107 PDF
    • @inproceedings{Ochiai2017aug,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R.},
    • title = {Multichannel End-to-end Speech Recognition},
    • booktitle = {International Conference on Machine Learning (ICML)},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-107}
    • }
  •  Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-014 PDF
    • @inproceedings{Hayashi2017mar,
    • author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and Le Roux, Jonathan and Takeda, Kazuya},
    • title = {BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-014}
    • }
  •  Meng, Z., Watanabe, S., Hershey, J.R., Erdogan, H., "Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-012 PDF
    • @inproceedings{Meng2017mar,
    • author = {Meng, Zhong and Watanabe, Shinji and Hershey, John R. and Erdogan, Hakan},
    • title = {Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-012}
    • }
  •  Wisdom, S., Powers, T., Hershey, J.R., Le Roux, J., Atlas, L., "Full-Capacity Unitary Recurrent Neural Networks", Advances in Neural Information Processing Systems (NIPS), December 2016.
    BibTeX TR2016-155 PDF
    • @inproceedings{Wisdom2016dec,
    • author = {Wisdom, Scott and Powers, Thomas and Hershey, John R. and Le Roux, Jonathan and Atlas, Les},
    • title = {Full-Capacity Unitary Recurrent Neural Networks},
    • booktitle = {Advances in Neural Information Processing Systems (NIPS)},
    • year = 2016,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2016-155}
    • }
  •  Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "Bidirectional LSTM-HMM Hybrid System for Polyphonic Sound Event Detection", Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE), September 2016, pp. 35-39.
    BibTeX TR2016-114 PDF
    • @inproceedings{Hayashi2016sep,
    • author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and Le Roux, Jonathan and Takeda, Kazuya},
    • title = {Bidirectional LSTM-HMM Hybrid System for Polyphonic Sound Event Detection},
    • booktitle = {Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE)},
    • year = 2016,
    • pages = {35--39},
    • month = sep,
    • url = {https://www.merl.com/publications/TR2016-114}
    • }
  •  Shinozaki, T., Watanabe, S., "Structure Discovery of Deep Neural Network Based on Evolutionary Algorithms", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2015.7178918, April 2015, pp. 4979-4983.
    BibTeX TR2015-032 PDF
    • @inproceedings{Shinozaki2015apr,
    • author = {Shinozaki, T. and Watanabe, S.},
    • title = {Structure Discovery of Deep Neural Network Based on Evolutionary Algorithms},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2015,
    • pages = {4979--4983},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP.2015.7178918},
    • url = {https://www.merl.com/publications/TR2015-032}
    • }
  •  Weng, C., Yu, D., Watanabe, S., Juang, B.-H.F., "Recurrent Deep Neural Networks for Robust Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2014.6854661, May 2014, pp. 5532-5536.
    BibTeX TR2014-023 PDF
    • @inproceedings{Weng2014may,
    • author = {Weng, C. and Yu, D. and Watanabe, S. and Juang, B.-H.F.},
    • title = {Recurrent Deep Neural Networks for Robust Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2014,
    • pages = {5532--5536},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP.2014.6854661},
    • url = {https://www.merl.com/publications/TR2014-023}
    • }
  •  Watanabe, S., Nakamura, A., Juang, B.-H., "Structural Bayesian Linear Regression for Hidden Markov Models", Journal of Signal Processing Systems, DOI: 10.1007/​s11265-013-0785-8, Vol. 74, No. 3, pp. 341-358, March 2014.
    BibTeX TR2013-071 PDF
    • @article{Watanabe2013aug,
    • author = {Watanabe, S. and Nakamura, A. and Juang, B.-H.},
    • title = {Structural Bayesian Linear Regression for Hidden Markov Models},
    • journal = {Journal of Signal Processing Systems},
    • year = 2014,
    • volume = 74,
    • number = 3,
    • pages = {341--358},
    • month = mar,
    • doi = {10.1007/s11265-013-0785-8},
    • issn = {1939-8018},
    • url = {https://www.merl.com/publications/TR2013-071}
    • }
  •  Gales, M., Watanabe, S., Fosler-Lussier, E., "Structured Discriminative Models For Speech Recognition", IEEE Signal Processing Magazine, Vol. 29, No. 6, pp. 70-81, November 2012.
    BibTeX TR2012-072 PDF
    • @article{Gales2012nov,
    • author = {Gales, M. and Watanabe, S. and Fosler-Lussier, E.},
    • title = {Structured Discriminative Models For Speech Recognition},
    • journal = {IEEE Signal Processing Magazine},
    • year = 2012,
    • volume = 29,
    • number = 6,
    • pages = {70--81},
    • month = nov,
    • url = {https://www.merl.com/publications/TR2012-072}
    • }