2018 |
Aguilar, Gustavo; AlGhamdi, Fahad; Soto, Victor; Diab, Mona; Hirschberg, Julia; Solorio, Thamar Named Entity Recognition on Code-Switched Data: Overview of the CALCS 2018 Shared Task Inproceedings for Linguistics, Association Computational (Ed.): Proceedings of the Third Workshop on Computational Approaches to Linguistic Code-Switching, Association for Computational Linguistics, Melbourne, Australia, 2018. Abstract | Links | BibTeX | Tags: Code-Switching, English-Spanish, Modern Standard Arabic-Egyptian, NER, shared task, Social Media @inproceedings{aguilar@calcs2018, title = {Named Entity Recognition on Code-Switched Data: Overview of the CALCS 2018 Shared Task}, author = {Gustavo Aguilar and Fahad AlGhamdi and Victor Soto and Mona Diab and Julia Hirschberg and Thamar Solorio}, editor = {Association for Computational Linguistics }, url = {http://www.aclweb.org/anthology/W18-3219}, year = {2018}, date = {2018-07-15}, booktitle = {Proceedings of the Third Workshop on Computational Approaches to Linguistic Code-Switching}, publisher = {Association for Computational Linguistics}, address = {Melbourne, Australia}, abstract = {In the third shared task of the Computational Approaches to Linguistic CodeSwitching (CALCS) workshop, we focus on Named Entity Recognition (NER) on code-switched social-media data. We divide the shared task into two competitions based on the English-Spanish (ENG-SPA) and Modern Standard Arabic-Egyptian (MSA-EGY) language pairs. We use Twitter data and 9 entity types to establish a new dataset for code-switched NER benchmarks. In addition to the CS phenomenon, the diversity of the entities and the social media challenges make the task considerably hard to process. As a result, the best scores of the competitions are 63.76% and 71.61% for ENG-SPA and MSA-EGY, respectively. We present the scores of 9 participants and discuss the most common challenges among submissions.}, keywords = {Code-Switching, English-Spanish, Modern Standard Arabic-Egyptian, NER, shared task, Social Media}, pubstate = {published}, tppubtype = {inproceedings} } In the third shared task of the Computational Approaches to Linguistic CodeSwitching (CALCS) workshop, we focus on Named Entity Recognition (NER) on code-switched social-media data. We divide the shared task into two competitions based on the English-Spanish (ENG-SPA) and Modern Standard Arabic-Egyptian (MSA-EGY) language pairs. We use Twitter data and 9 entity types to establish a new dataset for code-switched NER benchmarks. In addition to the CS phenomenon, the diversity of the entities and the social media challenges make the task considerably hard to process. As a result, the best scores of the competitions are 63.76% and 71.61% for ENG-SPA and MSA-EGY, respectively. We present the scores of 9 participants and discuss the most common challenges among submissions. |