@inproceedings{morishita-etal-2022-domain,
title = "Domain Adaptation of Machine Translation with Crowdworkers",
author = "Morishita, Makoto and
Suzuki, Jun and
Nagata, Masaaki",
editor = "Li, Yunyao and
Lazaridou, Angeliki",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = dec,
year = "2022",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-industry.62",
doi = "10.18653/v1/2022.emnlp-industry.62",
pages = "606--618",
abstract = "Although a machine translation model trained with a large in-domain parallel corpus achieves remarkable results, it still works poorly when no in-domain data are available. This situation restricts the applicability of machine translation when the target domain{'}s data are limited. However, there is great demand for high-quality domain-specific machine translation models for many domains. We propose a framework that efficiently and effectively collects parallel sentences in a target domain from the web with the help of crowdworkers.With the collected parallel data, we can quickly adapt a machine translation model to the target domain. Our experiments show that the proposed method can collect target-domain parallel data over a few days at a reasonable cost. We tested it with five domains, and the domain-adapted model improved the BLEU scores to +19.7 by an average of +7.8 points compared to a general-purpose translation model.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="morishita-etal-2022-domain">
<titleInfo>
<title>Domain Adaptation of Machine Translation with Crowdworkers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Makoto</namePart>
<namePart type="family">Morishita</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun</namePart>
<namePart type="family">Suzuki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masaaki</namePart>
<namePart type="family">Nagata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yunyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angeliki</namePart>
<namePart type="family">Lazaridou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Although a machine translation model trained with a large in-domain parallel corpus achieves remarkable results, it still works poorly when no in-domain data are available. This situation restricts the applicability of machine translation when the target domain’s data are limited. However, there is great demand for high-quality domain-specific machine translation models for many domains. We propose a framework that efficiently and effectively collects parallel sentences in a target domain from the web with the help of crowdworkers.With the collected parallel data, we can quickly adapt a machine translation model to the target domain. Our experiments show that the proposed method can collect target-domain parallel data over a few days at a reasonable cost. We tested it with five domains, and the domain-adapted model improved the BLEU scores to +19.7 by an average of +7.8 points compared to a general-purpose translation model.</abstract>
<identifier type="citekey">morishita-etal-2022-domain</identifier>
<identifier type="doi">10.18653/v1/2022.emnlp-industry.62</identifier>
<location>
<url>https://aclanthology.org/2022.emnlp-industry.62</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>606</start>
<end>618</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Domain Adaptation of Machine Translation with Crowdworkers
%A Morishita, Makoto
%A Suzuki, Jun
%A Nagata, Masaaki
%Y Li, Yunyao
%Y Lazaridou, Angeliki
%S Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F morishita-etal-2022-domain
%X Although a machine translation model trained with a large in-domain parallel corpus achieves remarkable results, it still works poorly when no in-domain data are available. This situation restricts the applicability of machine translation when the target domain’s data are limited. However, there is great demand for high-quality domain-specific machine translation models for many domains. We propose a framework that efficiently and effectively collects parallel sentences in a target domain from the web with the help of crowdworkers.With the collected parallel data, we can quickly adapt a machine translation model to the target domain. Our experiments show that the proposed method can collect target-domain parallel data over a few days at a reasonable cost. We tested it with five domains, and the domain-adapted model improved the BLEU scores to +19.7 by an average of +7.8 points compared to a general-purpose translation model.
%R 10.18653/v1/2022.emnlp-industry.62
%U https://aclanthology.org/2022.emnlp-industry.62
%U https://doi.org/10.18653/v1/2022.emnlp-industry.62
%P 606-618
Markdown (Informal)
[Domain Adaptation of Machine Translation with Crowdworkers](https://aclanthology.org/2022.emnlp-industry.62) (Morishita et al., EMNLP 2022)
ACL
- Makoto Morishita, Jun Suzuki, and Masaaki Nagata. 2022. Domain Adaptation of Machine Translation with Crowdworkers. In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track, pages 606–618, Abu Dhabi, UAE. Association for Computational Linguistics.