@inproceedings{xiao-etal-2022-datalab,
title = "{D}ata{L}ab: A Platform for Data Analysis and Intervention",
author = "Xiao, Yang and
Fu, Jinlan and
Yuan, Weizhe and
Viswanathan, Vijay and
Liu, Zhoumianze and
Liu, Yixin and
Neubig, Graham and
Liu, Pengfei",
editor = "Basile, Valerio and
Kozareva, Zornitsa and
Stajner, Sanja",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics: System Demonstrations",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.acl-demo.18",
doi = "10.18653/v1/2022.acl-demo.18",
pages = "182--195",
abstract = "Despite data{'}s crucial role in machine learning, most existing tools and research tend to focus on systems on top of existing data rather than how to interpret and manipulate data. In this paper, we propose DataLab, a unified data-oriented platform that not only allows users to interactively analyze the characteristics of data but also provides a standardized interface so that many data processing operations can be provided within a unified interface. Additionally, in view of the ongoing surge in the proliferation of datasets, DataLab has features for dataset recommendation and global vision analysis that help researchers form a better view of the data ecosystem. So far, DataLab covers 1,300 datasets and 3,583 of its transformed version, where 313 datasets support different types of analysis (e.g., with respect to gender bias) with the help of 119M samples annotated by 318 feature functions. DataLab is under active development and will be supported going forward. We have released a web platform, web API, Python SDK, and PyPI published package, which hopefully, can meet the diverse needs of researchers.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xiao-etal-2022-datalab">
<titleInfo>
<title>DataLab: A Platform for Data Analysis and Intervention</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Xiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinlan</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weizhe</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vijay</namePart>
<namePart type="family">Viswanathan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhoumianze</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yixin</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pengfei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Valerio</namePart>
<namePart type="family">Basile</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanja</namePart>
<namePart type="family">Stajner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Despite data’s crucial role in machine learning, most existing tools and research tend to focus on systems on top of existing data rather than how to interpret and manipulate data. In this paper, we propose DataLab, a unified data-oriented platform that not only allows users to interactively analyze the characteristics of data but also provides a standardized interface so that many data processing operations can be provided within a unified interface. Additionally, in view of the ongoing surge in the proliferation of datasets, DataLab has features for dataset recommendation and global vision analysis that help researchers form a better view of the data ecosystem. So far, DataLab covers 1,300 datasets and 3,583 of its transformed version, where 313 datasets support different types of analysis (e.g., with respect to gender bias) with the help of 119M samples annotated by 318 feature functions. DataLab is under active development and will be supported going forward. We have released a web platform, web API, Python SDK, and PyPI published package, which hopefully, can meet the diverse needs of researchers.</abstract>
<identifier type="citekey">xiao-etal-2022-datalab</identifier>
<identifier type="doi">10.18653/v1/2022.acl-demo.18</identifier>
<location>
<url>https://aclanthology.org/2022.acl-demo.18</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>182</start>
<end>195</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DataLab: A Platform for Data Analysis and Intervention
%A Xiao, Yang
%A Fu, Jinlan
%A Yuan, Weizhe
%A Viswanathan, Vijay
%A Liu, Zhoumianze
%A Liu, Yixin
%A Neubig, Graham
%A Liu, Pengfei
%Y Basile, Valerio
%Y Kozareva, Zornitsa
%Y Stajner, Sanja
%S Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics: System Demonstrations
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F xiao-etal-2022-datalab
%X Despite data’s crucial role in machine learning, most existing tools and research tend to focus on systems on top of existing data rather than how to interpret and manipulate data. In this paper, we propose DataLab, a unified data-oriented platform that not only allows users to interactively analyze the characteristics of data but also provides a standardized interface so that many data processing operations can be provided within a unified interface. Additionally, in view of the ongoing surge in the proliferation of datasets, DataLab has features for dataset recommendation and global vision analysis that help researchers form a better view of the data ecosystem. So far, DataLab covers 1,300 datasets and 3,583 of its transformed version, where 313 datasets support different types of analysis (e.g., with respect to gender bias) with the help of 119M samples annotated by 318 feature functions. DataLab is under active development and will be supported going forward. We have released a web platform, web API, Python SDK, and PyPI published package, which hopefully, can meet the diverse needs of researchers.
%R 10.18653/v1/2022.acl-demo.18
%U https://aclanthology.org/2022.acl-demo.18
%U https://doi.org/10.18653/v1/2022.acl-demo.18
%P 182-195
Markdown (Informal)
[DataLab: A Platform for Data Analysis and Intervention](https://aclanthology.org/2022.acl-demo.18) (Xiao et al., ACL 2022)
ACL
- Yang Xiao, Jinlan Fu, Weizhe Yuan, Vijay Viswanathan, Zhoumianze Liu, Yixin Liu, Graham Neubig, and Pengfei Liu. 2022. DataLab: A Platform for Data Analysis and Intervention. In Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics: System Demonstrations, pages 182–195, Dublin, Ireland. Association for Computational Linguistics.