@inproceedings{zhu-etal-2024-doc2soargraph,
title = "{D}oc2{S}oar{G}raph: Discrete Reasoning over Visually-Rich Table-Text Documents via Semantic-Oriented Hierarchical Graphs",
author = "Zhu, Fengbin and
Wang, Chao and
Feng, Fuli and
Ren, Zifeng and
Li, Moxin and
Chua, Tat-Seng",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.456",
pages = "5119--5131",
abstract = "Table-text document (e.g., financial reports) understanding has attracted increasing attention in recent two years. TAT-DQA is a realistic setting for the understanding of visually-rich table-text documents, which involves answering associated questions requiring discrete reasoning. Most existing work relies on token-level semantics, falling short in the reasoning across document elements such as quantities and dates. To address this limitation, we propose a novel Doc2SoarGraph model that exploits element-level semantics and employs Semantic-oriented hierarchical Graph structures to capture the differences and correlations among different elements within the given document and question. Extensive experiments on the TAT-DQA dataset reveal that our model surpasses the state-of-the-art conventional method (i.e., MHST) and large language model (i.e., ChatGPT) by 17.73 and 6.49 points respectively in terms of Exact Match (EM) metric, demonstrating exceptional effectiveness.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhu-etal-2024-doc2soargraph">
<titleInfo>
<title>Doc2SoarGraph: Discrete Reasoning over Visually-Rich Table-Text Documents via Semantic-Oriented Hierarchical Graphs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fengbin</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chao</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fuli</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zifeng</namePart>
<namePart type="family">Ren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Moxin</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tat-Seng</namePart>
<namePart type="family">Chua</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Table-text document (e.g., financial reports) understanding has attracted increasing attention in recent two years. TAT-DQA is a realistic setting for the understanding of visually-rich table-text documents, which involves answering associated questions requiring discrete reasoning. Most existing work relies on token-level semantics, falling short in the reasoning across document elements such as quantities and dates. To address this limitation, we propose a novel Doc2SoarGraph model that exploits element-level semantics and employs Semantic-oriented hierarchical Graph structures to capture the differences and correlations among different elements within the given document and question. Extensive experiments on the TAT-DQA dataset reveal that our model surpasses the state-of-the-art conventional method (i.e., MHST) and large language model (i.e., ChatGPT) by 17.73 and 6.49 points respectively in terms of Exact Match (EM) metric, demonstrating exceptional effectiveness.</abstract>
<identifier type="citekey">zhu-etal-2024-doc2soargraph</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.456</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>5119</start>
<end>5131</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Doc2SoarGraph: Discrete Reasoning over Visually-Rich Table-Text Documents via Semantic-Oriented Hierarchical Graphs
%A Zhu, Fengbin
%A Wang, Chao
%A Feng, Fuli
%A Ren, Zifeng
%A Li, Moxin
%A Chua, Tat-Seng
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F zhu-etal-2024-doc2soargraph
%X Table-text document (e.g., financial reports) understanding has attracted increasing attention in recent two years. TAT-DQA is a realistic setting for the understanding of visually-rich table-text documents, which involves answering associated questions requiring discrete reasoning. Most existing work relies on token-level semantics, falling short in the reasoning across document elements such as quantities and dates. To address this limitation, we propose a novel Doc2SoarGraph model that exploits element-level semantics and employs Semantic-oriented hierarchical Graph structures to capture the differences and correlations among different elements within the given document and question. Extensive experiments on the TAT-DQA dataset reveal that our model surpasses the state-of-the-art conventional method (i.e., MHST) and large language model (i.e., ChatGPT) by 17.73 and 6.49 points respectively in terms of Exact Match (EM) metric, demonstrating exceptional effectiveness.
%U https://aclanthology.org/2024.lrec-main.456
%P 5119-5131
Markdown (Informal)
[Doc2SoarGraph: Discrete Reasoning over Visually-Rich Table-Text Documents via Semantic-Oriented Hierarchical Graphs](https://aclanthology.org/2024.lrec-main.456) (Zhu et al., LREC-COLING 2024)
ACL