Publications

Liu, B., Koike-Akino, T., Wang, Y., Parsons, K., "Variational Quantum Compressed Sensing for Joint User and Channel State Acquisition in Grant-Free Device Access Systems", IEEE International Conference on Communications (ICC), DOI: 10.1109/ICC45855.2022.9838445, May 2022.
BibTeX TR2022-052 PDF Video Presentation
- @inproceedings{Liu2022may3,
- author = {Liu, Bryan and Koike-Akino, Toshiaki and Wang, Ye and Parsons, Kieran},
- title = {{Variational Quantum Compressed Sensing for Joint User and Channel State Acquisition in Grant-Free Device Access Systems}},
- booktitle = {IEEE International Conference on Communications (ICC)},
- year = 2022,
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICC45855.2022.9838445},
- issn = {1938-1883},
- isbn = {978-1-5386-8347-7},
- url = {https://www.merl.com/publications/TR2022-052}
- }
Jung, M., Kojima, K., Koike-Akino, T., Wang, Y., Zhu, D., Brand, M., "Finding the Right Deep Neural Network Model for Efficient Design of Tunable Nanophotonic Devices", Conference on Lasers and Electro-Optics (CLEO), DOI: 10.1364/CLEO_SI.2022.SW5E.6, May 2022.
BibTeX TR2022-047 PDF Video Presentation
- @inproceedings{Jung2022may,
- author = {Jung, Minwoo and Kojima, Keisuke and Koike-Akino, Toshiaki and Wang, Ye and Zhu, Dayu and Brand, Matthew},
- title = {{Finding the Right Deep Neural Network Model for Efficient Design of Tunable Nanophotonic Devices}},
- booktitle = {Conference on Lasers and Electro-Optics (CLEO)},
- year = 2022,
- month = may,
- publisher = {Optica},
- doi = {10.1364/CLEO_SI.2022.SW5E.6},
- isbn = {978-1-957171-05-0},
- url = {https://www.merl.com/publications/TR2022-047}
- }
Koike-Akino, T., Kojima, K., Wang, Y., "AutoML Hyperparameter Tuning of Generative DNN Architecture for Nanophotonic Device Design", Conference on Lasers and Electro-Optics (CLEO), DOI: 10.1364/CLEO_AT.2022.JW3A.44, May 2022.
BibTeX TR2022-046 PDF Presentation
- @inproceedings{Koike-Akino2022may3,
- author = {Koike-Akino, Toshiaki and Kojima, Keisuke and Wang, Ye},
- title = {{AutoML Hyperparameter Tuning of Generative DNN Architecture for Nanophotonic Device Design}},
- booktitle = {Conference on Lasers and Electro-Optics (CLEO)},
- year = 2022,
- month = may,
- publisher = {Optica},
- doi = {10.1364/CLEO_AT.2022.JW3A.44},
- isbn = {978-1-957171-05-0},
- url = {https://www.merl.com/publications/TR2022-046}
- }
Chang, X., Moritz, N., Hori, T., Watanabe, S., Le Roux, J., "Extended Graph Temporal Classification for Multi-Speaker End-to-End ASR", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9747375, April 2022, pp. 7322-7326.
BibTeX TR2022-021 PDF
- @inproceedings{Chang2022apr,
- author = {Chang, Xuankai and Moritz, Niko and Hori, Takaaki and Watanabe, Shinji and {Le Roux}, Jonathan},
- title = {{Extended Graph Temporal Classification for Multi-Speaker End-to-End ASR}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {7322--7326},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP43922.2022.9747375},
- url = {https://www.merl.com/publications/TR2022-021}
- }
Higuchi, Y., Moritz, N., Le Roux, J., Hori, T., "Advancing Momentum Pseudo-Labeling with Conformer and Initialization Strategy", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9746275, April 2022, pp. 7672-7676.
BibTeX TR2022-026 PDF
- @inproceedings{Higuchi2022apr,
- author = {Higuchi, Yosuke and Moritz, Niko and {Le Roux}, Jonathan and Hori, Takaaki},
- title = {{Advancing Momentum Pseudo-Labeling with Conformer and Initialization Strategy}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {7672--7676},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP43922.2022.9746275},
- url = {https://www.merl.com/publications/TR2022-026}
- }
Moritz, N., Hori, T., Watanabe, S., Le Roux, J., "Sequence Transduction with Graph-based Supervision", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9747788, April 2022, pp. 7212-7216.
BibTeX TR2022-024 PDF
- @inproceedings{Moritz2022apr,
- author = {Moritz, Niko and Hori, Takaaki and Watanabe, Shinji and {Le Roux}, Jonathan},
- title = {{Sequence Transduction with Graph-based Supervision}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {7212--7216},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP43922.2022.9747788},
- url = {https://www.merl.com/publications/TR2022-024}
- }
Peng, K.-C., "Iterative Self Knowledge Distillation -- From Pothole Classification To Fine-Grained And COVID Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Gan, W.-S. and Ma, K. K., Eds., DOI: 10.1109/ICASSP43922.2022.9746470, April 2022, pp. 3139-3143.
BibTeX TR2022-020 PDF Video Presentation
- @inproceedings{Peng2022apr,
- author = {Peng, Kuan-Chuan},
- title = {{Iterative Self Knowledge Distillation --- From Pothole Classification To Fine-Grained And COVID Recognition}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- editor = {Gan, W.-S. and Ma, K. K.},
- pages = {3139--3143},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP43922.2022.9746470},
- issn = {1520-6149},
- isbn = {978-1-6654-0541-6},
- url = {https://www.merl.com/publications/TR2022-020}
- }
Petermann, D., Wichern, G., Wang, Z.-Q., Le Roux, J., "The Cocktail Fork Problem: Three-Stem Audio Separation for Real-World Soundtracks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9746005, April 2022, pp. 526-530.
BibTeX TR2022-022 PDF Video Data Software
- @inproceedings{Petermann2022apr,
- author = {Petermann, Darius and Wichern, Gordon and Wang, Zhong-Qiu and {Le Roux}, Jonathan},
- title = {{The Cocktail Fork Problem: Three-Stem Audio Separation for Real-World Soundtracks}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {526--530},
- month = apr,
- doi = {10.1109/ICASSP43922.2022.9746005},
- url = {https://www.merl.com/publications/TR2022-022}
- }
Shah, A.P., Geng, S., Gao, P., Cherian, A., Hori, T., Marks, T.K., Le Roux, J., Hori, C., "Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), April 2022, pp. 7732-7736.
BibTeX TR2022-019 PDF
- @inproceedings{Shah2022apr,
- author = {Shah, Ankit Parag and Geng, Shijie and Gao, Peng and Cherian, Anoop and Hori, Takaaki and Marks, Tim K. and {Le Roux}, Jonathan and Hori, Chiori},
- title = {{Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {7732--7736},
- month = apr,
- publisher = {IEEE},
- issn = {1520-6149},
- isbn = {978-1-6654-0540-9},
- url = {https://www.merl.com/publications/TR2022-019}
- }
Slizovskaia, O., Wichern, G., Wang, Z.-Q., Le Roux, J., "Locate This, Not That: Class-Conditioned Sound Event DOA Estimation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9747604, April 2022, pp. 711-715.
BibTeX TR2022-023 PDF
- @inproceedings{Slizovskaia2022mar,
- author = {Slizovskaia, Olga and Wichern, Gordon and Wang, Zhong-Qiu and {Le Roux}, Jonathan},
- title = {{Locate This, Not That: Class-Conditioned Sound Event DOA Estimation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {711--715},
- month = apr,
- doi = {10.1109/ICASSP43922.2022.9747604},
- url = {https://www.merl.com/publications/TR2022-023}
- }
Hori, C., Shah, A.P., Geng, S., Gao, P., Cherian, A., Hori, T., Le Roux, J., Marks, T.K., "Overview of Audio Visual Scene-Aware Dialog with Reasoning Track for Natural Language Generation in DSTC10", The 10th Dialog System Technology Challenge Workshop at AAAI, February 2022.
BibTeX TR2022-016 PDF
- @inproceedings{Hori2022feb,
- author = {Hori, Chiori and Shah, Ankit Parag and Geng, Shijie and Gao, Peng and Cherian, Anoop and Hori, Takaaki and {Le Roux}, Jonathan and Marks, Tim K.},
- title = {{Overview of Audio Visual Scene-Aware Dialog with Reasoning Track for Natural Language Generation in DSTC10}},
- booktitle = {The 10th Dialog System Technology Challenge Workshop at AAAI},
- year = 2022,
- month = feb,
- url = {https://www.merl.com/publications/TR2022-016}
- }
Shah, A.P., Hori, T., Le Roux, J., Hori, C., "DSTC10-AVSD Submission System with Reasoning using Audio-Visual Transformers with Joint Student-Teacher Learning", The 10th Dialog System Technology Challenge Workshop at AAAI 2022, February 2022.
BibTeX TR2022-025 PDF
- @inproceedings{Shah2022feb,
- author = {Shah, Ankit Parag and Hori, Takaaki and {Le Roux}, Jonathan and Hori, Chiori},
- title = {{DSTC10-AVSD Submission System with Reasoning using Audio-Visual Transformers with Joint Student-Teacher Learning}},
- booktitle = {The 10th Dialog System Technology Challenge Workshop at AAAI 2022},
- year = 2022,
- month = feb,
- url = {https://www.merl.com/publications/TR2022-025}
- }
Cao, W., Benosman, M., Zhang, X., Ma, R., "Domain Knowledge-Based Automated Analog Circuit Design with Deep Reinforcement Learning", AAAI Conference on Artificial Intelligence, February 2022.
BibTeX TR2022-017 PDF
- @inproceedings{Cao2022feb,
- author = {Cao, Weidong and Benosman, Mouhacine and Zhang, Xuan and Ma, Rui},
- title = {{Domain Knowledge-Based Automated Analog Circuit Design with Deep Reinforcement Learning}},
- booktitle = {AAAI Conference on Artificial Intelligence},
- year = 2022,
- month = feb,
- publisher = {AAAI},
- url = {https://www.merl.com/publications/TR2022-017}
- }
Cherian, A., Hori, C., Marks, T.K., Le Roux, J., "(2.5+1)D Spatio-Temporal Scene Graphs for Video Question Answering", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v36i1.19922, February 2022, pp. 444-453.
BibTeX TR2022-014 PDF Video Presentation
- @inproceedings{Cherian2022feb,
- author = {Cherian, Anoop and Hori, Chiori and Marks, Tim K. and {Le Roux}, Jonathan},
- title = {{(2.5+1)D Spatio-Temporal Scene Graphs for Video Question Answering}},
- booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
- year = 2022,
- pages = {444--453},
- month = feb,
- doi = {10.1609/aaai.v36i1.19922},
- url = {https://www.merl.com/publications/TR2022-014}
- }
Ke, L., Peng, K.-C., Lyu, S., "Towards To-a-T Spatio-Temporal Focus for Skeleton-Based Action Recognition", AAAI Conference on Artificial Intelligence, February 2022.
BibTeX TR2022-015 PDF Presentation
- @inproceedings{Ke2022feb,
- author = {Ke, Lipeng and Peng, Kuan-Chuan and Lyu, Siwei},
- title = {{Towards To-a-T Spatio-Temporal Focus for Skeleton-Based Action Recognition}},
- booktitle = {AAAI Conference on Artificial Intelligence},
- year = 2022,
- month = feb,
- url = {https://www.merl.com/publications/TR2022-015}
- }
Shah, A., Sra, S., Chellappa, R., Cherian, A., "Max-Margin Contrastive Learning", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v36i8.20796, February 2022, pp. 8220-8230.
BibTeX TR2022-013 PDF
- @inproceedings{Shah2022feb,
- author = {Shah, Anshul and Sra, Suvrit and Chellappa, Rama and Cherian, Anoop},
- title = {{Max-Margin Contrastive Learning}},
- booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
- year = 2022,
- pages = {8220--8230},
- month = feb,
- doi = {10.1609/aaai.v36i8.20796},
- url = {https://www.merl.com/publications/TR2022-013}
- }
Medin, S.C., Egger, B., Cherian, A., Wang, Y., Tenenbaum, J.B., Liu, X., Marks, T.K., "MOST-GAN: 3D Morphable StyleGAN for Disentangled Face Image Manipulation", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v36i2.20091, February 2022, pp. 1962-1971.
BibTeX TR2022-011 PDF Video Data Presentation
- @inproceedings{Medin2022feb,
- author = {Medin, Safa C. and Egger, Bernhard and Cherian, Anoop and Wang, Ye and Tenenbaum, Joshua B. and Liu, Xiaoming and Marks, Tim K.},
- title = {{MOST-GAN: 3D Morphable StyleGAN for Disentangled Face Image Manipulation}},
- booktitle = {AAAI Conference on Artificial Intelligence},
- year = 2022,
- pages = {1962--1971},
- month = feb,
- doi = {10.1609/aaai.v36i2.20091},
- url = {https://www.merl.com/publications/TR2022-011}
- }
Lohit, S., Jones, M.J., "Model Compression Using Optimal Transport", IEEE Winter Conference on Applications of Computer Vision (WACV), January 2022.
BibTeX TR2022-006 PDF Presentation
- @inproceedings{Lohit2022jan,
- author = {Lohit, Suhas and Jones, Michael J.},
- title = {{Model Compression Using Optimal Transport}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2022,
- month = jan,
- publisher = {CVF OpenAccess},
- url = {https://www.merl.com/publications/TR2022-006}
- }
Wang, Z.-Q., Wichern, G., Le Roux, J., "Convolutive Prediction for Monaural Speech Dereverberation and Noisy-Reverberant Speaker Separation", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2021.3129363, Vol. 29, pp. 3476-3490, December 2021.
BibTeX TR2021-144 PDF
- @article{Wang2021dec,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Convolutive Prediction for Monaural Speech Dereverberation and Noisy-Reverberant Speaker Separation}},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2021,
- volume = 29,
- pages = {3476--3490},
- month = dec,
- doi = {10.1109/TASLP.2021.3129363},
- url = {https://www.merl.com/publications/TR2021-144}
- }
Yu, X., van Baar, J., Chen, S., Sullivan, A., "Keypoint-aligned 3D Human Shape Recovery from A Single Imagewith Bilayer-Graph", International Conference on 3D Vision (3DV), DOI: 10.1109/3DV53792.2021.00060, December 2021, pp. 505-514.
BibTeX TR2021-143 PDF
- @inproceedings{Yu2021dec,
- author = {Yu, Xin and {van Baar}, Jeroen and Chen, Siheng and Sullivan, Alan},
- title = {{Keypoint-aligned 3D Human Shape Recovery from A Single Imagewith Bilayer-Graph}},
- booktitle = {International Conference on 3D Vision (3DV)},
- year = 2021,
- pages = {505--514},
- month = dec,
- doi = {10.1109/3DV53792.2021.00060},
- url = {https://www.merl.com/publications/TR2021-143}
- }
Wang, Z.-Q., Wichern, G., Le Roux, J., "On The Compensation Between Magnitude and Phase in Speech Separation", IEEE Signal Processing Letters, DOI: 10.1109/LSP.2021.3116502, Vol. 28, pp. 2018-2022, November 2021.
BibTeX TR2021-137 PDF
- @article{Wang2021nov2,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{On The Compensation Between Magnitude and Phase in Speech Separation}},
- journal = {IEEE Signal Processing Letters},
- year = 2021,
- volume = 28,
- pages = {2018--2022},
- month = nov,
- doi = {10.1109/LSP.2021.3116502},
- url = {https://www.merl.com/publications/TR2021-137}
- }
Demir, A., Koike-Akino, T., Wang, Y., Erdogmus, D., Haruna, M., "EEG-GNN: Graph Neural Networks for Classification of Electroencephalogram (EEG) Signals", International IEEE EMBS Conference on Neural Engineering, DOI: 10.1109/EMBC46164.2021.9630194, October 2021.
BibTeX TR2021-136 PDF Video Presentation
- @inproceedings{Demir2021oct,
- author = {Demir, Andac and Koike-Akino, Toshiaki and Wang, Ye and Erdogmus, Deniz and Haruna, Masaki},
- title = {{EEG-GNN: Graph Neural Networks for Classification of Electroencephalogram (EEG) Signals}},
- booktitle = {International IEEE EMBS Conference on Neural Engineering},
- year = 2021,
- month = oct,
- publisher = {IEEE},
- doi = {10.1109/EMBC46164.2021.9630194},
- issn = {2694-0604},
- isbn = {978-1-7281-1179-7},
- url = {https://www.merl.com/publications/TR2021-136}
- }
Rakin, A.S., Wang, Y., Aeron, S., Koike-Akino, T., Moulin, P., Parsons, K., "Towards Universal Adversarial Examples and Defenses", IEEE Information Theory Workshop, DOI: 10.1109/ITW48936.2021.9611439, October 2021.
BibTeX TR2021-125 PDF Video
- @inproceedings{Rakin2021oct,
- author = {Rakin, Adnan S and Wang, Ye and Aeron, Shuchin and Koike-Akino, Toshiaki and Moulin, Pierre and Parsons, Kieran},
- title = {{Towards Universal Adversarial Examples and Defenses}},
- booktitle = {IEEE Information Theory Workshop},
- year = 2021,
- month = oct,
- publisher = {IEEE},
- doi = {10.1109/ITW48936.2021.9611439},
- isbn = {978-1-6654-0312-2},
- url = {https://www.merl.com/publications/TR2021-125}
- }
Wang, Z.-Q., Wichern, G., Le Roux, J., "Convolutive Prediction for Reverberant Speech Separation", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA52581.2021.9632667, October 2021, pp. 56-60.
BibTeX TR2021-127 PDF
- @inproceedings{Wang2021oct4,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Convolutive Prediction for Reverberant Speech Separation}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2021,
- pages = {56--60},
- month = oct,
- publisher = {IEEE},
- doi = {10.1109/WASPAA52581.2021.9632667},
- url = {https://www.merl.com/publications/TR2021-127}
- }
Wichern, G., Chakrabarty, A., Wang, Z.-Q., Le Roux, J., "Anomalous sound detection using attentive neural processes", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA52581.2021.9632762, October 2021, pp. 186-190.
BibTeX TR2021-129 PDF
- @inproceedings{Wichern2021oct,
- author = {Wichern, Gordon and Chakrabarty, Ankush and Wang, Zhong-Qiu and {Le Roux}, Jonathan},
- title = {{Anomalous sound detection using attentive neural processes}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2021,
- pages = {186--190},
- month = oct,
- publisher = {IEEE},
- doi = {10.1109/WASPAA52581.2021.9632762},
- url = {https://www.merl.com/publications/TR2021-129}
- }