@inproceedings{hung-huang-2022-preliminary,
title = "A Preliminary Study on {M}andarin-{H}akka neural machine translation using small-sized data",
author = "Hung, Yi-Hsiang and
Huang, Yi-Chin",
editor = "Chang, Yung-Chun and
Huang, Yi-Chin",
booktitle = "Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022)",
month = nov,
year = "2022",
address = "Taipei, Taiwan",
publisher = "The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)",
url = "https://aclanthology.org/2022.rocling-1.38",
pages = "307--315",
abstract = "In this study, we implemented a machine translation system using the Convolutional Neural Network with Attention mechanism for translating Mandarin to Sixan-accent Hakka. Specifically, to cope with the different idioms or terms used between Northern and Southern Sixan-accent, we analyzed the corpus differences and lexicon definition, and then separated the various word usages for training exclusive models for each accent. Besides, since the collected Hakka corpora are relatively limited, the unseen words frequently occurred during real-world translation. In our system, we selected suitable thresholds for each model based on the model verification to reject non-suitable translated words. Then, by applying the proposed algorithm, which adopted the forced Hakka idioms/terms segmentation and the common Mandarin word substitution, the resultant translation sentences become more intelligible. Therefore, the proposed system achieved promising results using small-sized data. This system could be used for Hakka language teaching and also the front-end of Mandarin and Hakka code-switching speech synthesis systems.",
language = "Chinese",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hung-huang-2022-preliminary">
<titleInfo>
<title>A Preliminary Study on Mandarin-Hakka neural machine translation using small-sized data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yi-Hsiang</namePart>
<namePart type="family">Hung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi-Chin</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">Chinese</languageTerm>
<languageTerm type="code" authority="iso639-2b">chi</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yung-Chun</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi-Chin</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
<place>
<placeTerm type="text">Taipei, Taiwan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this study, we implemented a machine translation system using the Convolutional Neural Network with Attention mechanism for translating Mandarin to Sixan-accent Hakka. Specifically, to cope with the different idioms or terms used between Northern and Southern Sixan-accent, we analyzed the corpus differences and lexicon definition, and then separated the various word usages for training exclusive models for each accent. Besides, since the collected Hakka corpora are relatively limited, the unseen words frequently occurred during real-world translation. In our system, we selected suitable thresholds for each model based on the model verification to reject non-suitable translated words. Then, by applying the proposed algorithm, which adopted the forced Hakka idioms/terms segmentation and the common Mandarin word substitution, the resultant translation sentences become more intelligible. Therefore, the proposed system achieved promising results using small-sized data. This system could be used for Hakka language teaching and also the front-end of Mandarin and Hakka code-switching speech synthesis systems.</abstract>
<identifier type="citekey">hung-huang-2022-preliminary</identifier>
<location>
<url>https://aclanthology.org/2022.rocling-1.38</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>307</start>
<end>315</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Preliminary Study on Mandarin-Hakka neural machine translation using small-sized data
%A Hung, Yi-Hsiang
%A Huang, Yi-Chin
%Y Chang, Yung-Chun
%Y Huang, Yi-Chin
%S Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022)
%D 2022
%8 November
%I The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
%C Taipei, Taiwan
%G Chinese
%F hung-huang-2022-preliminary
%X In this study, we implemented a machine translation system using the Convolutional Neural Network with Attention mechanism for translating Mandarin to Sixan-accent Hakka. Specifically, to cope with the different idioms or terms used between Northern and Southern Sixan-accent, we analyzed the corpus differences and lexicon definition, and then separated the various word usages for training exclusive models for each accent. Besides, since the collected Hakka corpora are relatively limited, the unseen words frequently occurred during real-world translation. In our system, we selected suitable thresholds for each model based on the model verification to reject non-suitable translated words. Then, by applying the proposed algorithm, which adopted the forced Hakka idioms/terms segmentation and the common Mandarin word substitution, the resultant translation sentences become more intelligible. Therefore, the proposed system achieved promising results using small-sized data. This system could be used for Hakka language teaching and also the front-end of Mandarin and Hakka code-switching speech synthesis systems.
%U https://aclanthology.org/2022.rocling-1.38
%P 307-315
Markdown (Informal)
[A Preliminary Study on Mandarin-Hakka neural machine translation using small-sized data](https://aclanthology.org/2022.rocling-1.38) (Hung & Huang, ROCLING 2022)
ACL