@inproceedings{tian-etal-2023-idioms,
title = "How Are Idioms Processed Inside Transformer Language Models?",
author = "Tian, Ye and
James, Isobel and
Son, Hye",
editor = "Palmer, Alexis and
Camacho-collados, Jose",
booktitle = "Proceedings of the 12th Joint Conference on Lexical and Computational Semantics (*SEM 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.starsem-1.16",
doi = "10.18653/v1/2023.starsem-1.16",
pages = "174--179",
abstract = "Idioms such as {``}call it a day{''} and {``}piece of cake,{''} are prevalent in natural language. How do Transformer language models process idioms? This study examines this question by analysing three models - BERT, Multilingual BERT, and DistilBERT. We compare the embeddings of idiomatic and literal expressions across all layers of the networks at both the sentence and word levels. Additionally, we investigate the attention directed from other sentence tokens towards a word within an idiom as opposed to in a literal context. Results indicate that while the three models exhibit slightly different internal mechanisms, they all represent idioms distinctively compared to literal language, with attention playing a critical role. These findings suggest that idioms are semantically and syntactically idiosyncratic, not only for humans but also for language models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tian-etal-2023-idioms">
<titleInfo>
<title>How Are Idioms Processed Inside Transformer Language Models?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ye</namePart>
<namePart type="family">Tian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isobel</namePart>
<namePart type="family">James</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hye</namePart>
<namePart type="family">Son</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Joint Conference on Lexical and Computational Semantics (*SEM 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jose</namePart>
<namePart type="family">Camacho-collados</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Idioms such as “call it a day” and “piece of cake,” are prevalent in natural language. How do Transformer language models process idioms? This study examines this question by analysing three models - BERT, Multilingual BERT, and DistilBERT. We compare the embeddings of idiomatic and literal expressions across all layers of the networks at both the sentence and word levels. Additionally, we investigate the attention directed from other sentence tokens towards a word within an idiom as opposed to in a literal context. Results indicate that while the three models exhibit slightly different internal mechanisms, they all represent idioms distinctively compared to literal language, with attention playing a critical role. These findings suggest that idioms are semantically and syntactically idiosyncratic, not only for humans but also for language models.</abstract>
<identifier type="citekey">tian-etal-2023-idioms</identifier>
<identifier type="doi">10.18653/v1/2023.starsem-1.16</identifier>
<location>
<url>https://aclanthology.org/2023.starsem-1.16</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>174</start>
<end>179</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How Are Idioms Processed Inside Transformer Language Models?
%A Tian, Ye
%A James, Isobel
%A Son, Hye
%Y Palmer, Alexis
%Y Camacho-collados, Jose
%S Proceedings of the 12th Joint Conference on Lexical and Computational Semantics (*SEM 2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F tian-etal-2023-idioms
%X Idioms such as “call it a day” and “piece of cake,” are prevalent in natural language. How do Transformer language models process idioms? This study examines this question by analysing three models - BERT, Multilingual BERT, and DistilBERT. We compare the embeddings of idiomatic and literal expressions across all layers of the networks at both the sentence and word levels. Additionally, we investigate the attention directed from other sentence tokens towards a word within an idiom as opposed to in a literal context. Results indicate that while the three models exhibit slightly different internal mechanisms, they all represent idioms distinctively compared to literal language, with attention playing a critical role. These findings suggest that idioms are semantically and syntactically idiosyncratic, not only for humans but also for language models.
%R 10.18653/v1/2023.starsem-1.16
%U https://aclanthology.org/2023.starsem-1.16
%U https://doi.org/10.18653/v1/2023.starsem-1.16
%P 174-179
Markdown (Informal)
[How Are Idioms Processed Inside Transformer Language Models?](https://aclanthology.org/2023.starsem-1.16) (Tian et al., *SEM 2023)
ACL