@inproceedings{nakanishi-etal-2018-answerable,
title = "Answerable or Not: Devising a Dataset for Extending Machine Reading Comprehension",
author = "Nakanishi, Mao and
Kobayashi, Tetsunori and
Hayashi, Yoshihiko",
editor = "Bender, Emily M. and
Derczynski, Leon and
Isabelle, Pierre",
booktitle = "Proceedings of the 27th International Conference on Computational Linguistics",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/C18-1083",
pages = "973--983",
abstract = "Machine-reading comprehension (MRC) has recently attracted attention in the fields of natural language processing and machine learning. One of the problematic presumptions with current MRC technologies is that each question is assumed to be answerable by looking at a given text passage. However, to realize human-like language comprehension ability, a machine should also be able to distinguish not-answerable questions (NAQs) from answerable questions. To develop this functionality, a dataset incorporating hard-to-detect NAQs is vital; however, its manual construction would be expensive. This paper proposes a dataset creation method that alters an existing MRC dataset, the Stanford Question Answering Dataset, and describes the resulting dataset. The value of this dataset is likely to increase if each NAQ in the dataset is properly classified with the difficulty of identifying it as an NAQ. This difficulty level would allow researchers to evaluate a machine{'}s NAQ detection performance more precisely. Therefore, we propose a method for automatically assigning difficulty level labels, which measures the similarity between a question and the target text passage. Our NAQ detection experiments demonstrate that the resulting dataset, having difficulty level annotations, is valid and potentially useful in the development of advanced MRC models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nakanishi-etal-2018-answerable">
<titleInfo>
<title>Answerable or Not: Devising a Dataset for Extending Machine Reading Comprehension</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mao</namePart>
<namePart type="family">Nakanishi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tetsunori</namePart>
<namePart type="family">Kobayashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoshihiko</namePart>
<namePart type="family">Hayashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 27th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Bender</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leon</namePart>
<namePart type="family">Derczynski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Isabelle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Santa Fe, New Mexico, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Machine-reading comprehension (MRC) has recently attracted attention in the fields of natural language processing and machine learning. One of the problematic presumptions with current MRC technologies is that each question is assumed to be answerable by looking at a given text passage. However, to realize human-like language comprehension ability, a machine should also be able to distinguish not-answerable questions (NAQs) from answerable questions. To develop this functionality, a dataset incorporating hard-to-detect NAQs is vital; however, its manual construction would be expensive. This paper proposes a dataset creation method that alters an existing MRC dataset, the Stanford Question Answering Dataset, and describes the resulting dataset. The value of this dataset is likely to increase if each NAQ in the dataset is properly classified with the difficulty of identifying it as an NAQ. This difficulty level would allow researchers to evaluate a machine’s NAQ detection performance more precisely. Therefore, we propose a method for automatically assigning difficulty level labels, which measures the similarity between a question and the target text passage. Our NAQ detection experiments demonstrate that the resulting dataset, having difficulty level annotations, is valid and potentially useful in the development of advanced MRC models.</abstract>
<identifier type="citekey">nakanishi-etal-2018-answerable</identifier>
<location>
<url>https://aclanthology.org/C18-1083</url>
</location>
<part>
<date>2018-08</date>
<extent unit="page">
<start>973</start>
<end>983</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Answerable or Not: Devising a Dataset for Extending Machine Reading Comprehension
%A Nakanishi, Mao
%A Kobayashi, Tetsunori
%A Hayashi, Yoshihiko
%Y Bender, Emily M.
%Y Derczynski, Leon
%Y Isabelle, Pierre
%S Proceedings of the 27th International Conference on Computational Linguistics
%D 2018
%8 August
%I Association for Computational Linguistics
%C Santa Fe, New Mexico, USA
%F nakanishi-etal-2018-answerable
%X Machine-reading comprehension (MRC) has recently attracted attention in the fields of natural language processing and machine learning. One of the problematic presumptions with current MRC technologies is that each question is assumed to be answerable by looking at a given text passage. However, to realize human-like language comprehension ability, a machine should also be able to distinguish not-answerable questions (NAQs) from answerable questions. To develop this functionality, a dataset incorporating hard-to-detect NAQs is vital; however, its manual construction would be expensive. This paper proposes a dataset creation method that alters an existing MRC dataset, the Stanford Question Answering Dataset, and describes the resulting dataset. The value of this dataset is likely to increase if each NAQ in the dataset is properly classified with the difficulty of identifying it as an NAQ. This difficulty level would allow researchers to evaluate a machine’s NAQ detection performance more precisely. Therefore, we propose a method for automatically assigning difficulty level labels, which measures the similarity between a question and the target text passage. Our NAQ detection experiments demonstrate that the resulting dataset, having difficulty level annotations, is valid and potentially useful in the development of advanced MRC models.
%U https://aclanthology.org/C18-1083
%P 973-983
Markdown (Informal)
[Answerable or Not: Devising a Dataset for Extending Machine Reading Comprehension](https://aclanthology.org/C18-1083) (Nakanishi et al., COLING 2018)
ACL