2020
Aguilar, Gustavo; Solorio, Thamar
From English to Code-Switching: Transfer Learning with Strong Morphological Clues Conference
The 58th Annual Meeting of the Association for Computational Linguistics, ACL, 2020.
Abstract | Links | BibTeX | Tags: Code-Switching, Transfer learning
@conference{aguilar20_cs-elmo,
title = {From English to Code-Switching: Transfer Learning with Strong Morphological Clues},
author = {Gustavo Aguilar and Thamar Solorio},
editor = {ACL},
url = {https://www.aclweb.org/anthology/2020.acl-main.716.pdf},
year = {2020},
date = {2020-06-19},
booktitle = {The 58th Annual Meeting of the Association for Computational Linguistics},
publisher = {ACL},
abstract = {Linguistic Code-switching (CS) is still an understudied phenomenon in natural language processing. The NLP community has mostly focused on monolingual and multi-lingual scenarios, but little attention has been given to CS in particular. This is partly because of the lack of resources and annotated data, despite its increasing occurrence in social media platforms. In this paper, we aim at adapting monolingual models to code-switched text in various tasks. Specifically, we transfer English knowledge from a pre-trained ELMo model to different code-switched language pairs (i.e., Nepali-English, Spanish-English, and Hindi-English) using the task of language identification. Our method, CS-ELMo, is an extension of ELMo with a simple yet effective position-aware attention mechanism inside its character convolutions. We show the effectiveness of this transfer learning step by outperforming multilingual BERT and homologous CS-unaware ELMo models and establishing a new state of the art in CS tasks, such as NER and POS tagging. Our technique can be expanded to more English-paired code-switched languages, providing more resources to the CS community.},
keywords = {Code-Switching, Transfer learning},
pubstate = {published},
tppubtype = {conference}
}
2019
Maharjan, Suraj; Mave, Deepthi; Shrestha, Prasha; Montes, Manuel; Gonzalez, Fabio A; Solorio, Thamar
Jointly Learning Author and Annotated Character N-gram Embeddings: A Case Study in Literary Text Conference
In Proceedings of the 2019 Conference on Recent Advances in Natural Language Processing (RANLP), ACL, Varna, Bulgaria, 2019.
Abstract | Links | BibTeX | Tags: Authorship Attribution, Book Likability Prediction, Multitask, Neural Language Model, Transfer learning
@conference{Maharjan2019,
title = {Jointly Learning Author and Annotated Character N-gram Embeddings: A Case Study in Literary Text},
author = {Suraj Maharjan and Deepthi Mave and Prasha Shrestha and Manuel Montes and Fabio A Gonzalez and Thamar Solorio},
url = {https://www.aclweb.org/anthology/R19-1080/},
year = {2019},
date = {2019-09-02},
booktitle = {In Proceedings of the 2019 Conference on Recent Advances in Natural Language Processing (RANLP)},
pages = {684-692},
publisher = {ACL},
address = {Varna, Bulgaria},
abstract = {An author's way of presenting a story through his/her writing style has a great impact on whether the story will be liked by readers or not. In this paper, we learn representations for authors of literary texts together with representations for character n-grams annotated with their functional roles. We train a neural character n-gram based language model using an external corpus of literary texts and transfer learned representations for use in downstream tasks. We show that augmenting the knowledge from external works of authors produces results competitive with other style-based methods for book likability prediction, genre classification, and authorship attribution.},
keywords = {Authorship Attribution, Book Likability Prediction, Multitask, Neural Language Model, Transfer learning},
pubstate = {published},
tppubtype = {conference}
}