@inproceedings{kasanishi-etal-2023-scireviewgen,
title = "{S}ci{R}eview{G}en: A Large-scale Dataset for Automatic Literature Review Generation",
author = "Kasanishi, Tetsu and
Isonuma, Masaru and
Mori, Junichiro and
Sakata, Ichiro",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.418",
doi = "10.18653/v1/2023.findings-acl.418",
pages = "6695--6715",
abstract = "Automatic literature review generation is one of the most challenging tasks in natural language processing. Although large language models have tackled literature review generation, the absence of large-scale datasets has been a stumbling block to the progress. We release SciReviewGen, consisting of over 10,000 literature reviews and 690,000 papers cited in the reviews. Based on the dataset, we evaluate recent transformer-based summarization models on the literature review generation task, including Fusion-in-Decoder extended for literature review generation. Human evaluation results show that some machine-generated summaries are comparable to human-written reviews, while revealing the challenges of automatic literature review generation such as hallucinations and a lack of detailed information. Our dataset and code are available at [\url{https://github.com/tetsu9923/SciReviewGen}](\url{https://github.com/tetsu9923/SciReviewGen}).",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kasanishi-etal-2023-scireviewgen">
<titleInfo>
<title>SciReviewGen: A Large-scale Dataset for Automatic Literature Review Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tetsu</namePart>
<namePart type="family">Kasanishi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masaru</namePart>
<namePart type="family">Isonuma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junichiro</namePart>
<namePart type="family">Mori</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ichiro</namePart>
<namePart type="family">Sakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automatic literature review generation is one of the most challenging tasks in natural language processing. Although large language models have tackled literature review generation, the absence of large-scale datasets has been a stumbling block to the progress. We release SciReviewGen, consisting of over 10,000 literature reviews and 690,000 papers cited in the reviews. Based on the dataset, we evaluate recent transformer-based summarization models on the literature review generation task, including Fusion-in-Decoder extended for literature review generation. Human evaluation results show that some machine-generated summaries are comparable to human-written reviews, while revealing the challenges of automatic literature review generation such as hallucinations and a lack of detailed information. Our dataset and code are available at [https://github.com/tetsu9923/SciReviewGen](https://github.com/tetsu9923/SciReviewGen).</abstract>
<identifier type="citekey">kasanishi-etal-2023-scireviewgen</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.418</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.418</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>6695</start>
<end>6715</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SciReviewGen: A Large-scale Dataset for Automatic Literature Review Generation
%A Kasanishi, Tetsu
%A Isonuma, Masaru
%A Mori, Junichiro
%A Sakata, Ichiro
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F kasanishi-etal-2023-scireviewgen
%X Automatic literature review generation is one of the most challenging tasks in natural language processing. Although large language models have tackled literature review generation, the absence of large-scale datasets has been a stumbling block to the progress. We release SciReviewGen, consisting of over 10,000 literature reviews and 690,000 papers cited in the reviews. Based on the dataset, we evaluate recent transformer-based summarization models on the literature review generation task, including Fusion-in-Decoder extended for literature review generation. Human evaluation results show that some machine-generated summaries are comparable to human-written reviews, while revealing the challenges of automatic literature review generation such as hallucinations and a lack of detailed information. Our dataset and code are available at [https://github.com/tetsu9923/SciReviewGen](https://github.com/tetsu9923/SciReviewGen).
%R 10.18653/v1/2023.findings-acl.418
%U https://aclanthology.org/2023.findings-acl.418
%U https://doi.org/10.18653/v1/2023.findings-acl.418
%P 6695-6715
Markdown (Informal)
[SciReviewGen: A Large-scale Dataset for Automatic Literature Review Generation](https://aclanthology.org/2023.findings-acl.418) (Kasanishi et al., Findings 2023)
ACL