@inproceedings{kanerva-etal-2021-finnish,
title = "{F}innish Paraphrase Corpus",
author = {Kanerva, Jenna and
Ginter, Filip and
Chang, Li-Hsin and
Rastas, Iiro and
Skantsi, Valtteri and
Kilpel{\"a}inen, Jemina and
Kupari, Hanna-Mari and
Saarni, Jenna and
Sev{\'o}n, Maija and
Tarkka, Otto},
editor = "Dobnik, Simon and
{\O}vrelid, Lilja",
booktitle = "Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)",
month = may # " 31--2 " # jun,
year = "2021",
address = "Reykjavik, Iceland (Online)",
publisher = {Link{\"o}ping University Electronic Press, Sweden},
url = "https://aclanthology.org/2021.nodalida-main.29",
pages = "288--298",
abstract = "In this paper, we introduce the first fully manually annotated paraphrase corpus for Finnish containing 53,572 paraphrase pairs harvested from alternative subtitles and news headings. Out of all paraphrase pairs in our corpus 98{\%} are manually classified to be paraphrases at least in their given context, if not in all contexts. Additionally, we establish a manual candidate selection method and demonstrate its feasibility in high quality paraphrase selection in terms of both cost and quality.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kanerva-etal-2021-finnish">
<titleInfo>
<title>Finnish Paraphrase Corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jenna</namePart>
<namePart type="family">Kanerva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Filip</namePart>
<namePart type="family">Ginter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Li-Hsin</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iiro</namePart>
<namePart type="family">Rastas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valtteri</namePart>
<namePart type="family">Skantsi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jemina</namePart>
<namePart type="family">Kilpeläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hanna-Mari</namePart>
<namePart type="family">Kupari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jenna</namePart>
<namePart type="family">Saarni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maija</namePart>
<namePart type="family">Sevón</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Otto</namePart>
<namePart type="family">Tarkka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-may 31–2 jun</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Dobnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lilja</namePart>
<namePart type="family">Øvrelid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Linköping University Electronic Press, Sweden</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we introduce the first fully manually annotated paraphrase corpus for Finnish containing 53,572 paraphrase pairs harvested from alternative subtitles and news headings. Out of all paraphrase pairs in our corpus 98% are manually classified to be paraphrases at least in their given context, if not in all contexts. Additionally, we establish a manual candidate selection method and demonstrate its feasibility in high quality paraphrase selection in terms of both cost and quality.</abstract>
<identifier type="citekey">kanerva-etal-2021-finnish</identifier>
<location>
<url>https://aclanthology.org/2021.nodalida-main.29</url>
</location>
<part>
<date>2021-may 31–2 jun</date>
<extent unit="page">
<start>288</start>
<end>298</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Finnish Paraphrase Corpus
%A Kanerva, Jenna
%A Ginter, Filip
%A Chang, Li-Hsin
%A Rastas, Iiro
%A Skantsi, Valtteri
%A Kilpeläinen, Jemina
%A Kupari, Hanna-Mari
%A Saarni, Jenna
%A Sevón, Maija
%A Tarkka, Otto
%Y Dobnik, Simon
%Y Øvrelid, Lilja
%S Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)
%D 2021
%8 may 31–2 jun
%I Linköping University Electronic Press, Sweden
%C Reykjavik, Iceland (Online)
%F kanerva-etal-2021-finnish
%X In this paper, we introduce the first fully manually annotated paraphrase corpus for Finnish containing 53,572 paraphrase pairs harvested from alternative subtitles and news headings. Out of all paraphrase pairs in our corpus 98% are manually classified to be paraphrases at least in their given context, if not in all contexts. Additionally, we establish a manual candidate selection method and demonstrate its feasibility in high quality paraphrase selection in terms of both cost and quality.
%U https://aclanthology.org/2021.nodalida-main.29
%P 288-298
Markdown (Informal)
[Finnish Paraphrase Corpus](https://aclanthology.org/2021.nodalida-main.29) (Kanerva et al., NoDaLiDa 2021)
ACL
- Jenna Kanerva, Filip Ginter, Li-Hsin Chang, Iiro Rastas, Valtteri Skantsi, Jemina Kilpeläinen, Hanna-Mari Kupari, Jenna Saarni, Maija Sevón, and Otto Tarkka. 2021. Finnish Paraphrase Corpus. In Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa), pages 288–298, Reykjavik, Iceland (Online). Linköping University Electronic Press, Sweden.