@inproceedings{kumar-etal-2022-partially,
title = "Partially Humanizing Weak Supervision: Towards a Better Low Resource Pipeline for Spoken Language Understanding",
author = "Kumar, Ayush and
Tripathi, Rishabh and
Vepa, Jithendra",
editor = "Dragut, Eduard and
Li, Yunyao and
Popa, Lucian and
Vucetic, Slobodan and
Srivastava, Shashank",
booktitle = "Proceedings of the Fourth Workshop on Data Science with Human-in-the-Loop (Language Advances)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.dash-1.9",
pages = "64--73",
abstract = "Weak Supervised Learning (WSL) is a popular technique to develop machine learning models in absence of labeled training data. WSL involves training over noisy labels which are traditionally obtained from hand-engineered semantic rules and task-specific pre-trained models. Such rules offer limited coverage and generalization over tasks. On the other hand, pre-trained models are available only for limited tasks. Thus, obtaining weak labels is a bottleneck in weak supervised learning. In this work, we propose to utilize the prompting paradigm to generate weak labels for the underlying tasks. We show that task-agnostic prompts are generalizable and can be used to obtain noisy labels for different Spoken Language Understanding (SLU) tasks such as sentiment classification, disfluency detection and emotion classification. These prompts can additionally be updated with human-in-the-loop to add task-specific contexts, thus providing flexibility to design task-specific prompts. Our proposed WSL pipeline outperforms other competitive low-resource benchmarks on zero and few-shot learning by more than 4{\%} on Macro-F1 and a conventional rule-based WSL baseline by more than 5{\%} across all the benchmark datasets. We demonstrate that prompt-based methods save nearly 75{\%} of time in a weak-supervised framework and generate more reliable labels for the above SLU tasks and thus can be used as a universal strategy to obtain weak labels.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kumar-etal-2022-partially">
<titleInfo>
<title>Partially Humanizing Weak Supervision: Towards a Better Low Resource Pipeline for Spoken Language Understanding</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ayush</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rishabh</namePart>
<namePart type="family">Tripathi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jithendra</namePart>
<namePart type="family">Vepa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Data Science with Human-in-the-Loop (Language Advances)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eduard</namePart>
<namePart type="family">Dragut</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucian</namePart>
<namePart type="family">Popa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Slobodan</namePart>
<namePart type="family">Vucetic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shashank</namePart>
<namePart type="family">Srivastava</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Weak Supervised Learning (WSL) is a popular technique to develop machine learning models in absence of labeled training data. WSL involves training over noisy labels which are traditionally obtained from hand-engineered semantic rules and task-specific pre-trained models. Such rules offer limited coverage and generalization over tasks. On the other hand, pre-trained models are available only for limited tasks. Thus, obtaining weak labels is a bottleneck in weak supervised learning. In this work, we propose to utilize the prompting paradigm to generate weak labels for the underlying tasks. We show that task-agnostic prompts are generalizable and can be used to obtain noisy labels for different Spoken Language Understanding (SLU) tasks such as sentiment classification, disfluency detection and emotion classification. These prompts can additionally be updated with human-in-the-loop to add task-specific contexts, thus providing flexibility to design task-specific prompts. Our proposed WSL pipeline outperforms other competitive low-resource benchmarks on zero and few-shot learning by more than 4% on Macro-F1 and a conventional rule-based WSL baseline by more than 5% across all the benchmark datasets. We demonstrate that prompt-based methods save nearly 75% of time in a weak-supervised framework and generate more reliable labels for the above SLU tasks and thus can be used as a universal strategy to obtain weak labels.</abstract>
<identifier type="citekey">kumar-etal-2022-partially</identifier>
<location>
<url>https://aclanthology.org/2022.dash-1.9</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>64</start>
<end>73</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Partially Humanizing Weak Supervision: Towards a Better Low Resource Pipeline for Spoken Language Understanding
%A Kumar, Ayush
%A Tripathi, Rishabh
%A Vepa, Jithendra
%Y Dragut, Eduard
%Y Li, Yunyao
%Y Popa, Lucian
%Y Vucetic, Slobodan
%Y Srivastava, Shashank
%S Proceedings of the Fourth Workshop on Data Science with Human-in-the-Loop (Language Advances)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F kumar-etal-2022-partially
%X Weak Supervised Learning (WSL) is a popular technique to develop machine learning models in absence of labeled training data. WSL involves training over noisy labels which are traditionally obtained from hand-engineered semantic rules and task-specific pre-trained models. Such rules offer limited coverage and generalization over tasks. On the other hand, pre-trained models are available only for limited tasks. Thus, obtaining weak labels is a bottleneck in weak supervised learning. In this work, we propose to utilize the prompting paradigm to generate weak labels for the underlying tasks. We show that task-agnostic prompts are generalizable and can be used to obtain noisy labels for different Spoken Language Understanding (SLU) tasks such as sentiment classification, disfluency detection and emotion classification. These prompts can additionally be updated with human-in-the-loop to add task-specific contexts, thus providing flexibility to design task-specific prompts. Our proposed WSL pipeline outperforms other competitive low-resource benchmarks on zero and few-shot learning by more than 4% on Macro-F1 and a conventional rule-based WSL baseline by more than 5% across all the benchmark datasets. We demonstrate that prompt-based methods save nearly 75% of time in a weak-supervised framework and generate more reliable labels for the above SLU tasks and thus can be used as a universal strategy to obtain weak labels.
%U https://aclanthology.org/2022.dash-1.9
%P 64-73
Markdown (Informal)
[Partially Humanizing Weak Supervision: Towards a Better Low Resource Pipeline for Spoken Language Understanding](https://aclanthology.org/2022.dash-1.9) (Kumar et al., DaSH 2022)
ACL