@inproceedings{sakaguchi-etal-2024-identifying,
title = "Identifying Source Language Expressions for Pre-editing in Machine Translation",
author = "Sakaguchi, Norizo and
Murawaki, Yugo and
Chu, Chenhui and
Kurohashi, Sadao",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.755",
pages = "8605--8616",
abstract = "Machine translation-mediated communication can benefit from pre-editing source language texts to ensure accurate transmission of intended meaning in the target language. The primary challenge lies in identifying source language expressions that pose difficulties in translation. In this paper, we hypothesize that such expressions tend to be distinctive features of texts originally written in the source language (native language) rather than translations generated from the target language into the source language (machine translation). To identify such expressions, we train a neural classifier to distinguish native language from machine translation, and subsequently isolate the expressions that contribute to the model{'}s prediction of native language. Our manual evaluation revealed that our method successfully identified characteristic expressions of the native language, despite the noise and the inherent nuances of the task. We also present case studies where we edit the identified expressions to improve translation quality.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sakaguchi-etal-2024-identifying">
<titleInfo>
<title>Identifying Source Language Expressions for Pre-editing in Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Norizo</namePart>
<namePart type="family">Sakaguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yugo</namePart>
<namePart type="family">Murawaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenhui</namePart>
<namePart type="family">Chu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sadao</namePart>
<namePart type="family">Kurohashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Machine translation-mediated communication can benefit from pre-editing source language texts to ensure accurate transmission of intended meaning in the target language. The primary challenge lies in identifying source language expressions that pose difficulties in translation. In this paper, we hypothesize that such expressions tend to be distinctive features of texts originally written in the source language (native language) rather than translations generated from the target language into the source language (machine translation). To identify such expressions, we train a neural classifier to distinguish native language from machine translation, and subsequently isolate the expressions that contribute to the model’s prediction of native language. Our manual evaluation revealed that our method successfully identified characteristic expressions of the native language, despite the noise and the inherent nuances of the task. We also present case studies where we edit the identified expressions to improve translation quality.</abstract>
<identifier type="citekey">sakaguchi-etal-2024-identifying</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.755</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>8605</start>
<end>8616</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Identifying Source Language Expressions for Pre-editing in Machine Translation
%A Sakaguchi, Norizo
%A Murawaki, Yugo
%A Chu, Chenhui
%A Kurohashi, Sadao
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F sakaguchi-etal-2024-identifying
%X Machine translation-mediated communication can benefit from pre-editing source language texts to ensure accurate transmission of intended meaning in the target language. The primary challenge lies in identifying source language expressions that pose difficulties in translation. In this paper, we hypothesize that such expressions tend to be distinctive features of texts originally written in the source language (native language) rather than translations generated from the target language into the source language (machine translation). To identify such expressions, we train a neural classifier to distinguish native language from machine translation, and subsequently isolate the expressions that contribute to the model’s prediction of native language. Our manual evaluation revealed that our method successfully identified characteristic expressions of the native language, despite the noise and the inherent nuances of the task. We also present case studies where we edit the identified expressions to improve translation quality.
%U https://aclanthology.org/2024.lrec-main.755
%P 8605-8616
Markdown (Informal)
[Identifying Source Language Expressions for Pre-editing in Machine Translation](https://aclanthology.org/2024.lrec-main.755) (Sakaguchi et al., LREC-COLING 2024)
ACL