@inproceedings{davidson-etal-2021-improved,
title = "Improved Named Entity Recognition for Noisy Call Center Transcripts",
author = "Davidson, Sam and
Hosier, Jordan and
Zhou, Yu and
Gurbani, Vijay",
editor = "Xu, Wei and
Ritter, Alan and
Baldwin, Tim and
Rahimi, Afshin",
booktitle = "Proceedings of the Seventh Workshop on Noisy User-generated Text (W-NUT 2021)",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.wnut-1.40",
doi = "10.18653/v1/2021.wnut-1.40",
pages = "361--370",
abstract = "We explore the application of state-of-the-art NER algorithms to ASR-generated call center transcripts. Previous work in this domain focused on the use of a BiLSTM-CRF model which relied on Flair embeddings; however, such a model is unwieldy in terms of latency and memory consumption. In a production environment, end users require low-latency models which can be readily integrated into existing pipelines. To that end, we present two different models which can be utilized based on the latency and accuracy requirements of the user. First, we propose a set of models which utilize state-of-the-art Transformer language models (RoBERTa) to develop a high-accuracy NER system trained on custom annotated set of call center transcripts. We then use our best-performing Transformer-based model to label a large number of transcripts, which we use to pretrain a BiLSTM-CRF model and further fine-tune on our annotated dataset. We show that this model, while not as accurate as its Transformer-based counterpart, is highly effective in identifying items which require redaction for privacy law compliance. Further, we propose a new general annotation scheme for NER in the call-center environment.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="davidson-etal-2021-improved">
<titleInfo>
<title>Improved Named Entity Recognition for Noisy Call Center Transcripts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sam</namePart>
<namePart type="family">Davidson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Hosier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vijay</namePart>
<namePart type="family">Gurbani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Workshop on Noisy User-generated Text (W-NUT 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Afshin</namePart>
<namePart type="family">Rahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We explore the application of state-of-the-art NER algorithms to ASR-generated call center transcripts. Previous work in this domain focused on the use of a BiLSTM-CRF model which relied on Flair embeddings; however, such a model is unwieldy in terms of latency and memory consumption. In a production environment, end users require low-latency models which can be readily integrated into existing pipelines. To that end, we present two different models which can be utilized based on the latency and accuracy requirements of the user. First, we propose a set of models which utilize state-of-the-art Transformer language models (RoBERTa) to develop a high-accuracy NER system trained on custom annotated set of call center transcripts. We then use our best-performing Transformer-based model to label a large number of transcripts, which we use to pretrain a BiLSTM-CRF model and further fine-tune on our annotated dataset. We show that this model, while not as accurate as its Transformer-based counterpart, is highly effective in identifying items which require redaction for privacy law compliance. Further, we propose a new general annotation scheme for NER in the call-center environment.</abstract>
<identifier type="citekey">davidson-etal-2021-improved</identifier>
<identifier type="doi">10.18653/v1/2021.wnut-1.40</identifier>
<location>
<url>https://aclanthology.org/2021.wnut-1.40</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>361</start>
<end>370</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improved Named Entity Recognition for Noisy Call Center Transcripts
%A Davidson, Sam
%A Hosier, Jordan
%A Zhou, Yu
%A Gurbani, Vijay
%Y Xu, Wei
%Y Ritter, Alan
%Y Baldwin, Tim
%Y Rahimi, Afshin
%S Proceedings of the Seventh Workshop on Noisy User-generated Text (W-NUT 2021)
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online
%F davidson-etal-2021-improved
%X We explore the application of state-of-the-art NER algorithms to ASR-generated call center transcripts. Previous work in this domain focused on the use of a BiLSTM-CRF model which relied on Flair embeddings; however, such a model is unwieldy in terms of latency and memory consumption. In a production environment, end users require low-latency models which can be readily integrated into existing pipelines. To that end, we present two different models which can be utilized based on the latency and accuracy requirements of the user. First, we propose a set of models which utilize state-of-the-art Transformer language models (RoBERTa) to develop a high-accuracy NER system trained on custom annotated set of call center transcripts. We then use our best-performing Transformer-based model to label a large number of transcripts, which we use to pretrain a BiLSTM-CRF model and further fine-tune on our annotated dataset. We show that this model, while not as accurate as its Transformer-based counterpart, is highly effective in identifying items which require redaction for privacy law compliance. Further, we propose a new general annotation scheme for NER in the call-center environment.
%R 10.18653/v1/2021.wnut-1.40
%U https://aclanthology.org/2021.wnut-1.40
%U https://doi.org/10.18653/v1/2021.wnut-1.40
%P 361-370
Markdown (Informal)
[Improved Named Entity Recognition for Noisy Call Center Transcripts](https://aclanthology.org/2021.wnut-1.40) (Davidson et al., WNUT 2021)
ACL