@inproceedings{vukovic-etal-2022-dialogue,
title = "Dialogue Term Extraction using Transfer Learning and Topological Data Analysis",
author = "Vukovic, Renato and
Heck, Michael and
Ruppik, Benjamin and
van Niekerk, Carel and
Zibrowius, Marcus and
Gasic, Milica",
editor = "Lemon, Oliver and
Hakkani-Tur, Dilek and
Li, Junyi Jessy and
Ashrafzadeh, Arash and
Garcia, Daniel Hern{\'a}ndez and
Alikhani, Malihe and
Vandyke, David and
Du{\v{s}}ek, Ond{\v{r}}ej",
booktitle = "Proceedings of the 23rd Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = sep,
year = "2022",
address = "Edinburgh, UK",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.sigdial-1.53",
doi = "10.18653/v1/2022.sigdial-1.53",
pages = "564--581",
abstract = "Goal oriented dialogue systems were originally designed as a natural language interface to a fixed data-set of entities that users might inquire about, further described by domain, slots and values. As we move towards adaptable dialogue systems where knowledge about domains, slots and values may change, there is an increasing need to automatically extract these terms from raw dialogues or related non-dialogue data on a large scale. In this paper, we take an important step in this direction by exploring different features that can enable systems to discover realisations of domains, slots and values in dialogues in a purely data-driven fashion. The features that we examine stem from word embeddings, language modelling features, as well as topological features of the word embedding space. To examine the utility of each feature set, we train a seed model based on the widely used MultiWOZ data-set. Then, we apply this model to a different corpus, the Schema-guided dialogue data-set. Our method outperforms the previously proposed approach that relies solely on word embeddings. We also demonstrate that each of the features is responsible for discovering different kinds of content. We believe our results warrant further research towards ontology induction, and continued harnessing of topological data analysis for dialogue and natural language processing research.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vukovic-etal-2022-dialogue">
<titleInfo>
<title>Dialogue Term Extraction using Transfer Learning and Topological Data Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Renato</namePart>
<namePart type="family">Vukovic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Heck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Ruppik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carel</namePart>
<namePart type="family">van Niekerk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcus</namePart>
<namePart type="family">Zibrowius</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Milica</namePart>
<namePart type="family">Gasic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Annual Meeting of the Special Interest Group on Discourse and Dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oliver</namePart>
<namePart type="family">Lemon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dilek</namePart>
<namePart type="family">Hakkani-Tur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junyi</namePart>
<namePart type="given">Jessy</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arash</namePart>
<namePart type="family">Ashrafzadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="given">Hernández</namePart>
<namePart type="family">Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malihe</namePart>
<namePart type="family">Alikhani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Vandyke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Dušek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Edinburgh, UK</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Goal oriented dialogue systems were originally designed as a natural language interface to a fixed data-set of entities that users might inquire about, further described by domain, slots and values. As we move towards adaptable dialogue systems where knowledge about domains, slots and values may change, there is an increasing need to automatically extract these terms from raw dialogues or related non-dialogue data on a large scale. In this paper, we take an important step in this direction by exploring different features that can enable systems to discover realisations of domains, slots and values in dialogues in a purely data-driven fashion. The features that we examine stem from word embeddings, language modelling features, as well as topological features of the word embedding space. To examine the utility of each feature set, we train a seed model based on the widely used MultiWOZ data-set. Then, we apply this model to a different corpus, the Schema-guided dialogue data-set. Our method outperforms the previously proposed approach that relies solely on word embeddings. We also demonstrate that each of the features is responsible for discovering different kinds of content. We believe our results warrant further research towards ontology induction, and continued harnessing of topological data analysis for dialogue and natural language processing research.</abstract>
<identifier type="citekey">vukovic-etal-2022-dialogue</identifier>
<identifier type="doi">10.18653/v1/2022.sigdial-1.53</identifier>
<location>
<url>https://aclanthology.org/2022.sigdial-1.53</url>
</location>
<part>
<date>2022-09</date>
<extent unit="page">
<start>564</start>
<end>581</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Dialogue Term Extraction using Transfer Learning and Topological Data Analysis
%A Vukovic, Renato
%A Heck, Michael
%A Ruppik, Benjamin
%A van Niekerk, Carel
%A Zibrowius, Marcus
%A Gasic, Milica
%Y Lemon, Oliver
%Y Hakkani-Tur, Dilek
%Y Li, Junyi Jessy
%Y Ashrafzadeh, Arash
%Y Garcia, Daniel Hernández
%Y Alikhani, Malihe
%Y Vandyke, David
%Y Dušek, Ondřej
%S Proceedings of the 23rd Annual Meeting of the Special Interest Group on Discourse and Dialogue
%D 2022
%8 September
%I Association for Computational Linguistics
%C Edinburgh, UK
%F vukovic-etal-2022-dialogue
%X Goal oriented dialogue systems were originally designed as a natural language interface to a fixed data-set of entities that users might inquire about, further described by domain, slots and values. As we move towards adaptable dialogue systems where knowledge about domains, slots and values may change, there is an increasing need to automatically extract these terms from raw dialogues or related non-dialogue data on a large scale. In this paper, we take an important step in this direction by exploring different features that can enable systems to discover realisations of domains, slots and values in dialogues in a purely data-driven fashion. The features that we examine stem from word embeddings, language modelling features, as well as topological features of the word embedding space. To examine the utility of each feature set, we train a seed model based on the widely used MultiWOZ data-set. Then, we apply this model to a different corpus, the Schema-guided dialogue data-set. Our method outperforms the previously proposed approach that relies solely on word embeddings. We also demonstrate that each of the features is responsible for discovering different kinds of content. We believe our results warrant further research towards ontology induction, and continued harnessing of topological data analysis for dialogue and natural language processing research.
%R 10.18653/v1/2022.sigdial-1.53
%U https://aclanthology.org/2022.sigdial-1.53
%U https://doi.org/10.18653/v1/2022.sigdial-1.53
%P 564-581
Markdown (Informal)
[Dialogue Term Extraction using Transfer Learning and Topological Data Analysis](https://aclanthology.org/2022.sigdial-1.53) (Vukovic et al., SIGDIAL 2022)
ACL