2018
Aguilar, Gustavo; AlGhamdi, Fahad; Soto, Victor; Diab, Mona; Hirschberg, Julia; Solorio, Thamar
Named Entity Recognition on Code-Switched Data: Overview of the CALCS 2018 Shared Task Inproceedings
In: for Computational Linguistics, Association (Ed.): Proceedings of the Third Workshop on Computational Approaches to Linguistic Code-Switching, Association for Computational Linguistics, Melbourne, Australia, 2018.
Abstract | Links | BibTeX | Tags: Code-Switching, English-Spanish, Modern Standard Arabic-Egyptian, NER, shared task, Social Media
@inproceedings{aguilar@calcs2018,
title = {Named Entity Recognition on Code-Switched Data: Overview of the CALCS 2018 Shared Task},
author = {Gustavo Aguilar and Fahad AlGhamdi and Victor Soto and Mona Diab and Julia Hirschberg and Thamar Solorio},
editor = {Association for Computational Linguistics },
url = {http://www.aclweb.org/anthology/W18-3219},
year = {2018},
date = {2018-07-15},
booktitle = {Proceedings of the Third Workshop on Computational Approaches to Linguistic Code-Switching},
publisher = {Association for Computational Linguistics},
address = {Melbourne, Australia},
abstract = {In the third shared task of the Computational Approaches to Linguistic CodeSwitching (CALCS) workshop, we focus on Named Entity Recognition (NER) on code-switched social-media data. We divide the shared task into two competitions based on the English-Spanish (ENG-SPA) and Modern Standard Arabic-Egyptian (MSA-EGY) language pairs. We use Twitter data and 9 entity types to establish a new dataset for code-switched NER benchmarks. In addition to the CS phenomenon, the diversity of the entities and the social media challenges make the task considerably hard to process. As a result, the best scores of the competitions are 63.76% and 71.61% for ENG-SPA and MSA-EGY, respectively. We present the scores of 9 participants and discuss the most common challenges among submissions.},
keywords = {Code-Switching, English-Spanish, Modern Standard Arabic-Egyptian, NER, shared task, Social Media},
pubstate = {published},
tppubtype = {inproceedings}
}
In the third shared task of the Computational Approaches to Linguistic CodeSwitching (CALCS) workshop, we focus on Named Entity Recognition (NER) on code-switched social-media data. We divide the shared task into two competitions based on the English-Spanish (ENG-SPA) and Modern Standard Arabic-Egyptian (MSA-EGY) language pairs. We use Twitter data and 9 entity types to establish a new dataset for code-switched NER benchmarks. In addition to the CS phenomenon, the diversity of the entities and the social media challenges make the task considerably hard to process. As a result, the best scores of the competitions are 63.76% and 71.61% for ENG-SPA and MSA-EGY, respectively. We present the scores of 9 participants and discuss the most common challenges among submissions.