@article{macavaney-etal-2022-abnirml,
title = "{ABNIRML}: Analyzing the Behavior of Neural {IR} Models",
author = "MacAvaney, Sean and
Feldman, Sergey and
Goharian, Nazli and
Downey, Doug and
Cohan, Arman",
editor = "Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "10",
year = "2022",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2022.tacl-1.13",
doi = "10.1162/tacl_a_00457",
pages = "224--239",
abstract = "Pretrained contextualized language models such as BERT and T5 have established a new state-of-the-art for ad-hoc search. However, it is not yet well understood why these methods are so effective, what makes some variants more effective than others, and what pitfalls they may have. We present a new comprehensive framework for Analyzing the Behavior of Neural IR ModeLs (ABNIRML), which includes new types of diagnostic probes that allow us to test several characteristics{---}such as writing styles, factuality, sensitivity to paraphrasing and word order{---}that are not addressed by previous techniques. To demonstrate the value of the framework, we conduct an extensive empirical study that yields insights into the factors that contribute to the neural model{'}s gains, and identify potential unintended biases the models exhibit. Some of our results confirm conventional wisdom, for example, that recent neural ranking models rely less on exact term overlap with the query, and instead leverage richer linguistic information, evidenced by their higher sensitivity to word and sentence order. Other results are more surprising, such as that some models (e.g., T5 and ColBERT) are biased towards factually correct (rather than simply relevant) texts. Further, some characteristics vary even for the same base language model, and other characteristics can appear due to random variations during model training.1",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="macavaney-etal-2022-abnirml">
<titleInfo>
<title>ABNIRML: Analyzing the Behavior of Neural IR Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="family">MacAvaney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergey</namePart>
<namePart type="family">Feldman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nazli</namePart>
<namePart type="family">Goharian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Doug</namePart>
<namePart type="family">Downey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arman</namePart>
<namePart type="family">Cohan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Pretrained contextualized language models such as BERT and T5 have established a new state-of-the-art for ad-hoc search. However, it is not yet well understood why these methods are so effective, what makes some variants more effective than others, and what pitfalls they may have. We present a new comprehensive framework for Analyzing the Behavior of Neural IR ModeLs (ABNIRML), which includes new types of diagnostic probes that allow us to test several characteristics—such as writing styles, factuality, sensitivity to paraphrasing and word order—that are not addressed by previous techniques. To demonstrate the value of the framework, we conduct an extensive empirical study that yields insights into the factors that contribute to the neural model’s gains, and identify potential unintended biases the models exhibit. Some of our results confirm conventional wisdom, for example, that recent neural ranking models rely less on exact term overlap with the query, and instead leverage richer linguistic information, evidenced by their higher sensitivity to word and sentence order. Other results are more surprising, such as that some models (e.g., T5 and ColBERT) are biased towards factually correct (rather than simply relevant) texts. Further, some characteristics vary even for the same base language model, and other characteristics can appear due to random variations during model training.1</abstract>
<identifier type="citekey">macavaney-etal-2022-abnirml</identifier>
<identifier type="doi">10.1162/tacl_a_00457</identifier>
<location>
<url>https://aclanthology.org/2022.tacl-1.13</url>
</location>
<part>
<date>2022</date>
<detail type="volume"><number>10</number></detail>
<extent unit="page">
<start>224</start>
<end>239</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T ABNIRML: Analyzing the Behavior of Neural IR Models
%A MacAvaney, Sean
%A Feldman, Sergey
%A Goharian, Nazli
%A Downey, Doug
%A Cohan, Arman
%J Transactions of the Association for Computational Linguistics
%D 2022
%V 10
%I MIT Press
%C Cambridge, MA
%F macavaney-etal-2022-abnirml
%X Pretrained contextualized language models such as BERT and T5 have established a new state-of-the-art for ad-hoc search. However, it is not yet well understood why these methods are so effective, what makes some variants more effective than others, and what pitfalls they may have. We present a new comprehensive framework for Analyzing the Behavior of Neural IR ModeLs (ABNIRML), which includes new types of diagnostic probes that allow us to test several characteristics—such as writing styles, factuality, sensitivity to paraphrasing and word order—that are not addressed by previous techniques. To demonstrate the value of the framework, we conduct an extensive empirical study that yields insights into the factors that contribute to the neural model’s gains, and identify potential unintended biases the models exhibit. Some of our results confirm conventional wisdom, for example, that recent neural ranking models rely less on exact term overlap with the query, and instead leverage richer linguistic information, evidenced by their higher sensitivity to word and sentence order. Other results are more surprising, such as that some models (e.g., T5 and ColBERT) are biased towards factually correct (rather than simply relevant) texts. Further, some characteristics vary even for the same base language model, and other characteristics can appear due to random variations during model training.1
%R 10.1162/tacl_a_00457
%U https://aclanthology.org/2022.tacl-1.13
%U https://doi.org/10.1162/tacl_a_00457
%P 224-239
Markdown (Informal)
[ABNIRML: Analyzing the Behavior of Neural IR Models](https://aclanthology.org/2022.tacl-1.13) (MacAvaney et al., TACL 2022)
ACL