@inproceedings{yu-etal-2024-speech,
title = "Speech Sense Disambiguation: Tackling Homophone Ambiguity in End-to-End Speech Translation",
author = "Yu, Tengfei and
Liu, Xuebo and
Ding, Liang and
Chen, Kehai and
Tao, Dacheng and
Zhang, Min",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.acl-long.435",
doi = "10.18653/v1/2024.acl-long.435",
pages = "8020--8035",
abstract = "End-to-end speech translation (ST) presents notable disambiguation challenges as it necessitates simultaneous cross-modal and cross-lingual transformations. While word sense disambiguation is an extensively investigated topic in textual machine translation, the exploration of disambiguation strategies for ST models remains limited. Addressing this gap, this paper introduces the concept of speech sense disambiguation (SSD), specifically emphasizing homophones - words pronounced identically but with different meanings. To facilitate this, we first create a comprehensive homophone dictionary and an annotated dataset rich with homophone information established based on speech-text alignment. Building on this unique dictionary, we introduce AmbigST, an innovative homophone-aware contrastive learning approach that integrates a homophone-aware masking strategy. Our experiments on different MuST-C and CoVoST ST benchmarks demonstrate that AmbigST sets new performance standards. Specifically, it achieves SOTA results on BLEU scores for English to German, Spanish, and French ST tasks, underlining its effectiveness in reducing speech sense ambiguity. Data, code and scripts are freely available at https://github.com/ytf-philp/AmbigST.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yu-etal-2024-speech">
<titleInfo>
<title>Speech Sense Disambiguation: Tackling Homophone Ambiguity in End-to-End Speech Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tengfei</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuebo</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liang</namePart>
<namePart type="family">Ding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kehai</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dacheng</namePart>
<namePart type="family">Tao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>End-to-end speech translation (ST) presents notable disambiguation challenges as it necessitates simultaneous cross-modal and cross-lingual transformations. While word sense disambiguation is an extensively investigated topic in textual machine translation, the exploration of disambiguation strategies for ST models remains limited. Addressing this gap, this paper introduces the concept of speech sense disambiguation (SSD), specifically emphasizing homophones - words pronounced identically but with different meanings. To facilitate this, we first create a comprehensive homophone dictionary and an annotated dataset rich with homophone information established based on speech-text alignment. Building on this unique dictionary, we introduce AmbigST, an innovative homophone-aware contrastive learning approach that integrates a homophone-aware masking strategy. Our experiments on different MuST-C and CoVoST ST benchmarks demonstrate that AmbigST sets new performance standards. Specifically, it achieves SOTA results on BLEU scores for English to German, Spanish, and French ST tasks, underlining its effectiveness in reducing speech sense ambiguity. Data, code and scripts are freely available at https://github.com/ytf-philp/AmbigST.</abstract>
<identifier type="citekey">yu-etal-2024-speech</identifier>
<identifier type="doi">10.18653/v1/2024.acl-long.435</identifier>
<location>
<url>https://aclanthology.org/2024.acl-long.435</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>8020</start>
<end>8035</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Speech Sense Disambiguation: Tackling Homophone Ambiguity in End-to-End Speech Translation
%A Yu, Tengfei
%A Liu, Xuebo
%A Ding, Liang
%A Chen, Kehai
%A Tao, Dacheng
%A Zhang, Min
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F yu-etal-2024-speech
%X End-to-end speech translation (ST) presents notable disambiguation challenges as it necessitates simultaneous cross-modal and cross-lingual transformations. While word sense disambiguation is an extensively investigated topic in textual machine translation, the exploration of disambiguation strategies for ST models remains limited. Addressing this gap, this paper introduces the concept of speech sense disambiguation (SSD), specifically emphasizing homophones - words pronounced identically but with different meanings. To facilitate this, we first create a comprehensive homophone dictionary and an annotated dataset rich with homophone information established based on speech-text alignment. Building on this unique dictionary, we introduce AmbigST, an innovative homophone-aware contrastive learning approach that integrates a homophone-aware masking strategy. Our experiments on different MuST-C and CoVoST ST benchmarks demonstrate that AmbigST sets new performance standards. Specifically, it achieves SOTA results on BLEU scores for English to German, Spanish, and French ST tasks, underlining its effectiveness in reducing speech sense ambiguity. Data, code and scripts are freely available at https://github.com/ytf-philp/AmbigST.
%R 10.18653/v1/2024.acl-long.435
%U https://aclanthology.org/2024.acl-long.435
%U https://doi.org/10.18653/v1/2024.acl-long.435
%P 8020-8035
Markdown (Informal)
[Speech Sense Disambiguation: Tackling Homophone Ambiguity in End-to-End Speech Translation](https://aclanthology.org/2024.acl-long.435) (Yu et al., ACL 2024)
ACL