2022
Franck Dernoncourt Siva Uday Sampreeth Chebolu, Nedim Lipka
Survey of Aspect-based Sentiment Analysis Datasets Journal Article
In: arXiv e-prints, pp. arXiv: 2204.05232, 2022.
Abstract | Links | BibTeX | Tags: Sentiment analysis
@article{nokey,
title = {Survey of Aspect-based Sentiment Analysis Datasets},
author = {Siva Uday Sampreeth Chebolu, Franck Dernoncourt, Nedim Lipka, Thamar Solorio},
url = {https://ui.adsabs.harvard.edu/abs/2022arXiv220405232U/abstract},
year = {2022},
date = {2022-04-01},
journal = {arXiv e-prints},
pages = {arXiv: 2204.05232},
abstract = {Aspect-based sentiment analysis (ABSA) is a natural language processing problem that requires analyzing user-generated reviews in order to determine: a) The target entity being reviewed, b) The high-level aspect to which it belongs, and c) The sentiment expressed toward the targets and the aspects. Numerous yet scattered corpora for ABSA make it difficult for researchers to quickly identify corpora best suited for a specific ABSA subtask. This study aims to present a database of corpora that can be used to train and assess autonomous ABSA systems. Additionally, we provide an overview of the major corpora concerning the various ABSA and its subtasks and highlight several corpus features that researchers should consider when selecting a corpus. We conclude that further large-scale ABSA corpora are required. Additionally, because each corpus is constructed differently, it is time-consuming for researchers to },
keywords = {Sentiment analysis},
pubstate = {published},
tppubtype = {article}
}
2021
Franck Dernoncourt Siva Uday Sampreeth Chebolu, Nedim Lipka
Exploring Conditional Text Generation for Aspect-Based Sentiment Analysis Journal Article
In: arXiv e-prints, pp. arXiv: 2110.02334, 2021.
Abstract | Links | BibTeX | Tags: Sentiment analysis
@article{arXive-prints,
title = {Exploring Conditional Text Generation for Aspect-Based Sentiment Analysis},
author = {Siva Uday Sampreeth Chebolu, Franck Dernoncourt, Nedim Lipka, Thamar Solorio
},
url = {https://ui.adsabs.harvard.edu/abs/2021arXiv211002334U/abstract},
year = {2021},
date = {2021-10-01},
urldate = {2021-10-01},
journal = {arXiv e-prints},
pages = {arXiv: 2110.02334},
abstract = {Aspect-based sentiment analysis (ABSA) is an NLP task that entails processing user-generated reviews to determine (i) the target being evaluated,(ii) the aspect category to which it belongs, and (iii) the sentiment expressed towards the target and aspect pair. In this article, we propose transforming ABSA into an abstract summary-like conditional text generation task that uses targets, aspects, and polarities to generate auxiliary statements. To demonstrate the efficacy of our task formulation and a proposed system, we fine-tune a pre-trained model for conditional text generation tasks to get new state-of-the-art results on a few restaurant domains and urban neighborhoods domain benchmark datasets.},
keywords = {Sentiment analysis},
pubstate = {published},
tppubtype = {article}
}
2020
Patwa, Parth; Aguilar, Gustavo; Kar, Sudipta; Pandey, Suraj; PYKL, Srinivas; Gambäck, Björn; Chakraborty, Tanmoy; Solorio, Thamar; Das, Amitava
SemEval-2020 Task 9: Overview of Sentiment Analysis of Code-Mixed Tweets Inproceedings
In: Proceedings of the Fourteenth Workshop on Semantic Evaluation, pp. 774–790, International Committee for Computational Linguistics, Barcelona (online), 2020.
Abstract | Links | BibTeX | Tags: Code-Switching, Sentiment analysis
@inproceedings{patwa-etal-2020-semeval,
title = {SemEval-2020 Task 9: Overview of Sentiment Analysis of Code-Mixed Tweets},
author = { Parth Patwa and Gustavo Aguilar and Sudipta Kar and Suraj Pandey and Srinivas PYKL and Björn Gambäck and Tanmoy Chakraborty and Thamar Solorio and Amitava Das},
url = {https://www.aclweb.org/anthology/2020.semeval-1.100},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the Fourteenth Workshop on Semantic Evaluation},
pages = {774--790},
publisher = {International Committee for Computational Linguistics},
address = {Barcelona (online)},
abstract = {In this paper, we present the results of the SemEval-2020 Task 9 on Sentiment Analysis of Code-Mixed Tweets (SentiMix 2020). We also release and describe our Hinglish (Hindi-English)and Spanglish (Spanish-English) corpora annotated with word-level language identification and sentence-level sentiment labels. These corpora are comprised of 20K and 19K examples, respectively. The sentiment labels are - Positive, Negative, and Neutral. SentiMix attracted 89 submissions in total including 61 teams that participated in the Hinglish contest and 28 submitted systems to the Spanglish competition. The best performance achieved was 75.0% F1 score for Hinglish and 80.6% F1 for Spanglish. We observe that BERT-like models and ensemble methods are the most common and successful approaches among the participants.},
keywords = {Code-Switching, Sentiment analysis},
pubstate = {published},
tppubtype = {inproceedings}
}
2019
Shafaei, Mahsa; Samghabadi, Niloofar Safi; Kar, Sudipta; Solorio, Thamar
arXiv, (Ed.): 2019, visited: 21.08.2019.
Abstract | Links | BibTeX | Tags: Abusive Language detection, Sentiment analysis, Text Classification
@online{Shafaei2019cb,
title = {Rating for Parents: Predicting Children Suitability Rating for Movies Based on Language of the Movies},
author = {Mahsa Shafaei and Niloofar Safi Samghabadi and Sudipta Kar and Thamar Solorio},
editor = {arXiv},
url = {https://arxiv.org/abs/1908.07819},
year = {2019},
date = {2019-08-21},
urldate = {2019-08-21},
abstract = {The film culture has grown tremendously in recent years. The large number of streaming services put films as one of the most convenient forms of entertainment in today's world. Films can help us learn and inspire societal change. But they can also negatively affect viewers. In this paper, our goal is to predict the suitability of the movie content for children and young adults based on scripts. The criterion that we use to measure suitability is the MPAA rating that is specifically designed for this purpose. We propose an RNN based architecture with attention that jointly models the genre and the emotions in the script to predict the MPAA rating. We achieve 78% weighted F1-score for the classification model that outperforms the traditional machine learning method by 6%.},
keywords = {Abusive Language detection, Sentiment analysis, Text Classification},
pubstate = {published},
tppubtype = {online}
}
Shafaei, Mahsa; Lopez-Monroy, Adrian Pastor; Solorio, Thamar
Exploiting Textual, Visual and Product Features for Predicting the Likeability of Movies Conference
The 32nd International FLAIRS Conference, 2019.
Abstract | Links | BibTeX | Tags: Sentiment analysis, Text Classification
@conference{Shafaei2019,
title = {Exploiting Textual, Visual and Product Features for Predicting the Likeability of Movies},
author = {Mahsa Shafaei and Adrian Pastor Lopez-Monroy and Thamar Solorio},
url = {https://www.aaai.org/ocs/index.php/FLAIRS/FLAIRS19/paper/view/18305},
year = {2019},
date = {2019-05-01},
publisher = {The 32nd International FLAIRS Conference},
abstract = {Watching movies is one of the most popular entertainments among people. Every year, a huge amount of money goes to the movie industry to release movies to the market. In this paper, we propose a multimodal model to predict the likability of movies using textual, visual and product features. With the help of these features, we capture different aspects of movies and feed them as inputs to binary and multi-class classification and regression models to predict IMDB rating of movies at early steps of production. We also propose our own dataset consisting of about 15000 movie subtitles along with their metadata and poster images. We achieve 76% and 63% weighted F1-score for binary and multiclass classification respectively, and 0.7 mean square error for the regression model. Using prediction methods and data analysis, this research helps the movie business to be more productive. },
keywords = {Sentiment analysis, Text Classification},
pubstate = {published},
tppubtype = {conference}
}
2018
Kar, Sudipta; Maharjan, Suraj; Solorio, Thamar
Proceedings of the 27th International Conference on Computational Linguistics, 2018.
Links | BibTeX | Tags: CNN, Narrative Analysis, Sentiment analysis
@conference{Kar2018b,
title = {Folksonomication: Predicting Tags for Movies from Plot Synopses using Emotion Flow encoded Neural Network},
author = {Sudipta Kar and Suraj Maharjan and Thamar Solorio},
url = {http://ritual.uh.edu/folksonomication-2018},
year = {2018},
date = {2018-08-23},
booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
keywords = {CNN, Narrative Analysis, Sentiment analysis},
pubstate = {published},
tppubtype = {conference}
}
Kar, Sudipta; Maharjan, Suraj; López-Monroy, A. Pastor; Solorio, Thamar
MPST: A Corpus of Movie Plot Synopses with Tags Conference
Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018), European Language Resources Association (ELRA), 2018.
Abstract | Links | BibTeX | Tags: Information Extraction, Narrative Analysis, Sentiment analysis, Text Classification
@conference{Kar2018,
title = {MPST: A Corpus of Movie Plot Synopses with Tags},
author = {Sudipta Kar and Suraj Maharjan and A. Pastor López-Monroy and Thamar Solorio},
url = {http://sudiptakar.info/wp-content/uploads/2018/05/322_LREC_2018.pdf, Slide
http://sudiptakar.info/wp-content/uploads/2018/02/mpst-corpus-movie-2.pdf, Paper},
year = {2018},
date = {2018-05-10},
booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)},
publisher = {European Language Resources Association (ELRA)},
abstract = {Social tagging of movies reveals a wide range of heterogeneous information about movies, like the genre, plot structure, soundtracks, metadata, visual and emotional experiences. Such information can be valuable in building automatic systems to create tags for movies. Automatic tagging systems can help recommendation engines to improve the retrieval of similar movies as well as help viewers to know what to expect from a movie in advance. In this paper, we set out to the task of collecting a corpus of movie plot synopses and tags. We describe a methodology that enabled us to build a fine-grained set of around 70 tags exposing heterogeneous characteristics of movie plots and the multi-label associations of these tags with some 14K movie plot synopses. We investigate how these tags correlate with movies and the flow of emotions throughout different types of movies. Finally, we use this corpus to explore the feasibility of inferring tags from plot synopses. We expect the corpus will be useful in other tasks where analysis of narratives is relevant.},
keywords = {Information Extraction, Narrative Analysis, Sentiment analysis, Text Classification},
pubstate = {published},
tppubtype = {conference}
}
2017
Kar, Sudipta; Maharjan, Suraj; Solorio, Thamar
RiTUAL-UH at SemEval-2017 Task 5: Sentiment Analysis on Financial Data Using Neural Networks Inproceedings
In: Proceedings of the 11th International Workshop on Semantic Evaluation (SemEval-2017), 2017, (Ranked 2nd for Subtask 2. With alternate scoring, ranked 1st in both subtask.).
Abstract | Links | BibTeX | Tags: CNN, Neural Networks, Sentiment analysis
@inproceedings{Kar2017,
title = {RiTUAL-UH at SemEval-2017 Task 5: Sentiment Analysis on Financial Data Using Neural Networks},
author = {Sudipta Kar and Suraj Maharjan and Thamar Solorio},
url = {http://www.aclweb.org/anthology/S17-2150},
year = {2017},
date = {2017-08-03},
publisher = {Proceedings of the 11th International Workshop on Semantic Evaluation (SemEval-2017)},
abstract = {In this paper, we present our systems for the “SemEval-2017 Task-5 on FineGrained Sentiment Analysis on Financial Microblogs and News”. In our system, we combined hand-engineered lexical, sentiment and metadata features, the representations learned from Convolutional Neural Networks (CNN) and Bidirectional Gated Recurrent Unit (Bi-GRU) with Attention model applied on top. With this architecture, we obtained weighted cosine similarity scores of 72.34% and 74.37% for subtask-1 and subtask-2, respectively. Using the official scoring system, our system ranked the second place for subtask-2 and eighth place for the subtask-1. It ranked first for both of the subtasks by the scores achieved by an alternate scoring system.
.},
note = {Ranked 2nd for Subtask 2. With alternate scoring, ranked 1st in both subtask.},
keywords = {CNN, Neural Networks, Sentiment analysis},
pubstate = {published},
tppubtype = {inproceedings}
}
.
2014
Bogdanova, Dasha; Rosso, Paolo; Solorio, Thamar
Exploring High-level Features for Detecting Cyberpedophilia Journal Article
In: Comput. Speech Lang., vol. 28, no. 1, pp. 108–120, 2014, ISSN: 0885-2308.
Links | BibTeX | Tags: Cyberpedophilia, Emotion detection, Sentiment analysis
@article{BogdanovaEtAl:14,
title = {Exploring High-level Features for Detecting Cyberpedophilia},
author = { Dasha Bogdanova and Paolo Rosso and Thamar Solorio},
url = {http://dx.doi.org/10.1016/j.csl.2013.04.007},
doi = {10.1016/j.csl.2013.04.007},
issn = {0885-2308},
year = {2014},
date = {2014-01-01},
journal = {Comput. Speech Lang.},
volume = {28},
number = {1},
pages = {108--120},
publisher = {Academic Press Ltd.},
address = {London, UK, UK},
keywords = {Cyberpedophilia, Emotion detection, Sentiment analysis},
pubstate = {published},
tppubtype = {article}
}
0000
Shafaei, Mahsa; Samghabadi, Niloofar Safi; Kar, Sudipta; Solorio, Thamar
arXiv, (Ed.): 0000.
Abstract | Links | BibTeX | Tags: Abusive Language detection, Sentiment analysis, Text Classification
@online{Shafaei2019c,
title = {Rating for Parents: Predicting Children Suitability Rating for Movies Based on Language of the Movies},
author = {Mahsa Shafaei and Niloofar Safi Samghabadi and Sudipta Kar and Thamar Solorio},
editor = {arXiv},
url = {https://arxiv.org/abs/1908.07819},
abstract = {The film culture has grown tremendously in recent years. The large number of streaming services put films as one of the most convenient forms of entertainment in today's world. Films can help us learn and inspire societal change. But they can also negatively affect viewers. In this paper, our goal is to predict the suitability of the movie content for children and young adults based on scripts. The criterion that we use to measure suitability is the MPAA rating that is specifically designed for this purpose. We propose an RNN based architecture with attention that jointly models the genre and the emotions in the script to predict the MPAA rating. We achieve 78% weighted F1-score for the classification model that outperforms the traditional machine learning method by 6%.},
keywords = {Abusive Language detection, Sentiment analysis, Text Classification},
pubstate = {published},
tppubtype = {online}
}