@inproceedings{guo-etal-2023-m2c,
title = "{M}2{C}: Towards Automatic Multimodal Manga Complement",
author = "Guo, Hongcheng and
Wang, Boyang and
Bai, Jiaqi and
Liu, Jiaheng and
Yang, Jian and
Li, Zhoujun",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.661",
doi = "10.18653/v1/2023.findings-emnlp.661",
pages = "9876--9882",
abstract = "Multimodal manga analysis focuses on enhancing manga understanding with visual and textual features, which has attracted considerable attention from both natural language processing and computer vision communities. Currently, most comics are hand-drawn and prone to problems such as missing pages, text contamination, and text aging, resulting in missing comic text content and seriously hindering human comprehension. In other words, the Multimodal Manga Complement (\textbf{M2C}) task has not been investigated, which aims to handle the aforementioned issues by providing a shared semantic space for vision and language understanding. To this end, we first propose the Multimodal Manga Complement task by establishing a new M2C benchmark dataset covering two languages. First, we design a manga argumentation method called MCoT to mine event knowledge in comics with large language models. Then, an effective baseline FVP-M$^{2}$ using fine-grained visual prompts is proposed to support manga complement. Extensive experimental results show the effectiveness of FVP-M$^{2}$ method for Multimodal Mange Complement.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="guo-etal-2023-m2c">
<titleInfo>
<title>M2C: Towards Automatic Multimodal Manga Complement</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hongcheng</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Boyang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiaqi</namePart>
<namePart type="family">Bai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiaheng</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jian</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhoujun</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Multimodal manga analysis focuses on enhancing manga understanding with visual and textual features, which has attracted considerable attention from both natural language processing and computer vision communities. Currently, most comics are hand-drawn and prone to problems such as missing pages, text contamination, and text aging, resulting in missing comic text content and seriously hindering human comprehension. In other words, the Multimodal Manga Complement (M2C) task has not been investigated, which aims to handle the aforementioned issues by providing a shared semantic space for vision and language understanding. To this end, we first propose the Multimodal Manga Complement task by establishing a new M2C benchmark dataset covering two languages. First, we design a manga argumentation method called MCoT to mine event knowledge in comics with large language models. Then, an effective baseline FVP-M² using fine-grained visual prompts is proposed to support manga complement. Extensive experimental results show the effectiveness of FVP-M² method for Multimodal Mange Complement.</abstract>
<identifier type="citekey">guo-etal-2023-m2c</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.661</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.661</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>9876</start>
<end>9882</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T M2C: Towards Automatic Multimodal Manga Complement
%A Guo, Hongcheng
%A Wang, Boyang
%A Bai, Jiaqi
%A Liu, Jiaheng
%A Yang, Jian
%A Li, Zhoujun
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F guo-etal-2023-m2c
%X Multimodal manga analysis focuses on enhancing manga understanding with visual and textual features, which has attracted considerable attention from both natural language processing and computer vision communities. Currently, most comics are hand-drawn and prone to problems such as missing pages, text contamination, and text aging, resulting in missing comic text content and seriously hindering human comprehension. In other words, the Multimodal Manga Complement (M2C) task has not been investigated, which aims to handle the aforementioned issues by providing a shared semantic space for vision and language understanding. To this end, we first propose the Multimodal Manga Complement task by establishing a new M2C benchmark dataset covering two languages. First, we design a manga argumentation method called MCoT to mine event knowledge in comics with large language models. Then, an effective baseline FVP-M² using fine-grained visual prompts is proposed to support manga complement. Extensive experimental results show the effectiveness of FVP-M² method for Multimodal Mange Complement.
%R 10.18653/v1/2023.findings-emnlp.661
%U https://aclanthology.org/2023.findings-emnlp.661
%U https://doi.org/10.18653/v1/2023.findings-emnlp.661
%P 9876-9882
Markdown (Informal)
[M2C: Towards Automatic Multimodal Manga Complement](https://aclanthology.org/2023.findings-emnlp.661) (Guo et al., Findings 2023)
ACL
- Hongcheng Guo, Boyang Wang, Jiaqi Bai, Jiaheng Liu, Jian Yang, and Zhoujun Li. 2023. M2C: Towards Automatic Multimodal Manga Complement. In Findings of the Association for Computational Linguistics: EMNLP 2023, pages 9876–9882, Singapore. Association for Computational Linguistics.