@inproceedings{phan-etal-2023-enriching,
title = "Enriching Biomedical Knowledge for Low-resource Language Through Large-scale Translation",
author = "Phan, Long and
Dang, Tai and
Tran, Hieu and
Trinh, Trieu H. and
Phan, Vy and
Chau, Lam D. and
Luong, Minh-Thang",
editor = "Vlachos, Andreas and
Augenstein, Isabelle",
booktitle = "Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.eacl-main.228",
doi = "10.18653/v1/2023.eacl-main.228",
pages = "3131--3142",
abstract = "Biomedical data and benchmarks are highly valuable yet very limited in low-resource languages other than English, such as Vietnamese. In this paper, we use a state-of-the-art translation model in English-Vietnamese to translate and produce both pretrained and supervised data in the biomedical domains. Thanks to such large-scale translation, we introduce ViPubmedT5, a pretrained Encoder-Decoder Transformer model trained on 20 million translated abstracts from the high-quality public PubMed corpus. ViPubMedT5 demonstrates state-of-the-art results on two different biomedical benchmarks in summarization and acronym disambiguation. Further, we release ViMedNLI - a new NLP task in Vietnamese translated from MedNLI using the recently public En-vi translation model and carefully refined by human experts, with evaluations of existing methods against ViPubmedT5.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="phan-etal-2023-enriching">
<titleInfo>
<title>Enriching Biomedical Knowledge for Low-resource Language Through Large-scale Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Long</namePart>
<namePart type="family">Phan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tai</namePart>
<namePart type="family">Dang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hieu</namePart>
<namePart type="family">Tran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trieu</namePart>
<namePart type="given">H</namePart>
<namePart type="family">Trinh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vy</namePart>
<namePart type="family">Phan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lam</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Chau</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minh-Thang</namePart>
<namePart type="family">Luong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Biomedical data and benchmarks are highly valuable yet very limited in low-resource languages other than English, such as Vietnamese. In this paper, we use a state-of-the-art translation model in English-Vietnamese to translate and produce both pretrained and supervised data in the biomedical domains. Thanks to such large-scale translation, we introduce ViPubmedT5, a pretrained Encoder-Decoder Transformer model trained on 20 million translated abstracts from the high-quality public PubMed corpus. ViPubMedT5 demonstrates state-of-the-art results on two different biomedical benchmarks in summarization and acronym disambiguation. Further, we release ViMedNLI - a new NLP task in Vietnamese translated from MedNLI using the recently public En-vi translation model and carefully refined by human experts, with evaluations of existing methods against ViPubmedT5.</abstract>
<identifier type="citekey">phan-etal-2023-enriching</identifier>
<identifier type="doi">10.18653/v1/2023.eacl-main.228</identifier>
<location>
<url>https://aclanthology.org/2023.eacl-main.228</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>3131</start>
<end>3142</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Enriching Biomedical Knowledge for Low-resource Language Through Large-scale Translation
%A Phan, Long
%A Dang, Tai
%A Tran, Hieu
%A Trinh, Trieu H.
%A Phan, Vy
%A Chau, Lam D.
%A Luong, Minh-Thang
%Y Vlachos, Andreas
%Y Augenstein, Isabelle
%S Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F phan-etal-2023-enriching
%X Biomedical data and benchmarks are highly valuable yet very limited in low-resource languages other than English, such as Vietnamese. In this paper, we use a state-of-the-art translation model in English-Vietnamese to translate and produce both pretrained and supervised data in the biomedical domains. Thanks to such large-scale translation, we introduce ViPubmedT5, a pretrained Encoder-Decoder Transformer model trained on 20 million translated abstracts from the high-quality public PubMed corpus. ViPubMedT5 demonstrates state-of-the-art results on two different biomedical benchmarks in summarization and acronym disambiguation. Further, we release ViMedNLI - a new NLP task in Vietnamese translated from MedNLI using the recently public En-vi translation model and carefully refined by human experts, with evaluations of existing methods against ViPubmedT5.
%R 10.18653/v1/2023.eacl-main.228
%U https://aclanthology.org/2023.eacl-main.228
%U https://doi.org/10.18653/v1/2023.eacl-main.228
%P 3131-3142
Markdown (Informal)
[Enriching Biomedical Knowledge for Low-resource Language Through Large-scale Translation](https://aclanthology.org/2023.eacl-main.228) (Phan et al., EACL 2023)
ACL