@inproceedings{fang-etal-2024-code,
title = "i-Code Studio: A Configurable and Composable Framework for Integrative {AI}",
author = "Fang, Yuwei and
Khademi, Mahmoud and
Zhu, Chenguang and
Yang, Ziyi and
Pryzant, Reid and
Xu, Yichong and
Qian, Yao and
Yoshioka, Takuya and
Yuan, Lu and
Zeng, Michael and
Huang, Xuedong",
editor = "Hernandez Farias, Delia Irazu and
Hope, Tom and
Li, Manling",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-demo.2",
doi = "10.18653/v1/2024.emnlp-demo.2",
pages = "14--24",
abstract = "Artificial General Intelligence (AGI) requires comprehensive understanding and generation capabilities for a variety of tasks spanning different modalities and functionalities. Integrative AI is one important direction to approach AGI, through combining multiple models to tackle complex multimodal tasks. However, there is a lack of a flexible and composable platform to facilitate efficient and effective model composition and coordination. In this paper, we propose the i-Code Studio, a configurable and composable framework for Integrative AI. The i-Code Studio orchestrates multiple pre-trained models in a finetuning-free fashion to conduct complex multimodal tasks. Instead of simple model composition, the i-Code Studio provides an integrative, flexible, and composable setting for developers to quickly and easily compose cutting-edge services and technologies tailored to their specific requirements. The i-Code Studio achieves impressive results on a variety of zero-shot multimodal tasks, such as video-to-text retrieval, speech-to-speech translation, and visual question answering. We also demonstrate how to quickly build a multimodal agent based on the i-Code Studio that can communicate and personalize for users. The project page with demonstrations and code is at https://i-code-studio.github.io/.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fang-etal-2024-code">
<titleInfo>
<title>i-Code Studio: A Configurable and Composable Framework for Integrative AI</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuwei</namePart>
<namePart type="family">Fang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mahmoud</namePart>
<namePart type="family">Khademi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenguang</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ziyi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reid</namePart>
<namePart type="family">Pryzant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yichong</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yao</namePart>
<namePart type="family">Qian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Takuya</namePart>
<namePart type="family">Yoshioka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Zeng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuedong</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Delia</namePart>
<namePart type="given">Irazu</namePart>
<namePart type="family">Hernandez Farias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Hope</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manling</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Artificial General Intelligence (AGI) requires comprehensive understanding and generation capabilities for a variety of tasks spanning different modalities and functionalities. Integrative AI is one important direction to approach AGI, through combining multiple models to tackle complex multimodal tasks. However, there is a lack of a flexible and composable platform to facilitate efficient and effective model composition and coordination. In this paper, we propose the i-Code Studio, a configurable and composable framework for Integrative AI. The i-Code Studio orchestrates multiple pre-trained models in a finetuning-free fashion to conduct complex multimodal tasks. Instead of simple model composition, the i-Code Studio provides an integrative, flexible, and composable setting for developers to quickly and easily compose cutting-edge services and technologies tailored to their specific requirements. The i-Code Studio achieves impressive results on a variety of zero-shot multimodal tasks, such as video-to-text retrieval, speech-to-speech translation, and visual question answering. We also demonstrate how to quickly build a multimodal agent based on the i-Code Studio that can communicate and personalize for users. The project page with demonstrations and code is at https://i-code-studio.github.io/.</abstract>
<identifier type="citekey">fang-etal-2024-code</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-demo.2</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-demo.2</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>14</start>
<end>24</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T i-Code Studio: A Configurable and Composable Framework for Integrative AI
%A Fang, Yuwei
%A Khademi, Mahmoud
%A Zhu, Chenguang
%A Yang, Ziyi
%A Pryzant, Reid
%A Xu, Yichong
%A Qian, Yao
%A Yoshioka, Takuya
%A Yuan, Lu
%A Zeng, Michael
%A Huang, Xuedong
%Y Hernandez Farias, Delia Irazu
%Y Hope, Tom
%Y Li, Manling
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: System Demonstrations
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F fang-etal-2024-code
%X Artificial General Intelligence (AGI) requires comprehensive understanding and generation capabilities for a variety of tasks spanning different modalities and functionalities. Integrative AI is one important direction to approach AGI, through combining multiple models to tackle complex multimodal tasks. However, there is a lack of a flexible and composable platform to facilitate efficient and effective model composition and coordination. In this paper, we propose the i-Code Studio, a configurable and composable framework for Integrative AI. The i-Code Studio orchestrates multiple pre-trained models in a finetuning-free fashion to conduct complex multimodal tasks. Instead of simple model composition, the i-Code Studio provides an integrative, flexible, and composable setting for developers to quickly and easily compose cutting-edge services and technologies tailored to their specific requirements. The i-Code Studio achieves impressive results on a variety of zero-shot multimodal tasks, such as video-to-text retrieval, speech-to-speech translation, and visual question answering. We also demonstrate how to quickly build a multimodal agent based on the i-Code Studio that can communicate and personalize for users. The project page with demonstrations and code is at https://i-code-studio.github.io/.
%R 10.18653/v1/2024.emnlp-demo.2
%U https://aclanthology.org/2024.emnlp-demo.2
%U https://doi.org/10.18653/v1/2024.emnlp-demo.2
%P 14-24
Markdown (Informal)
[i-Code Studio: A Configurable and Composable Framework for Integrative AI](https://aclanthology.org/2024.emnlp-demo.2) (Fang et al., EMNLP 2024)
ACL
- Yuwei Fang, Mahmoud Khademi, Chenguang Zhu, Ziyi Yang, Reid Pryzant, Yichong Xu, Yao Qian, Takuya Yoshioka, Lu Yuan, Michael Zeng, and Xuedong Huang. 2024. i-Code Studio: A Configurable and Composable Framework for Integrative AI. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pages 14–24, Miami, Florida, USA. Association for Computational Linguistics.