2020
Kar, Sudipta; Aguilar, Gustavo; Lapata, Mirella; Solorio, Thamar
Multi-view Story Characterization from Movie Plot Synopses and Reviews Conference
EMNLP 2020, ACL 2020.
Links | BibTeX | Tags: Narrative Analysis, Text Classification
@conference{Kar2020,
title = {Multi-view Story Characterization from Movie Plot Synopses and Reviews},
author = {Sudipta Kar and Gustavo Aguilar and Mirella Lapata and Thamar Solorio},
url = {https://www.aclweb.org/anthology/2020.emnlp-main.454.pdf},
year = {2020},
date = {2020-11-16},
booktitle = {EMNLP 2020},
pages = {5629-5646},
organization = {ACL},
keywords = {Narrative Analysis, Text Classification},
pubstate = {published},
tppubtype = {conference}
}
Shafaei, Mahsa; Samghabadi, Niloofar Safi; Kar, Sudipta; Solorio, Thamar
Age Suitability Rating: Predicting the MPAA Rating Based on Movie Dialogues Proceeding
LREC, 2020.
Abstract | Links | BibTeX | Tags: MPAA Rating, Story Analysis, Text Classification
@proceedings{Shafaei2020,
title = {Age Suitability Rating: Predicting the MPAA Rating Based on Movie Dialogues},
author = {Mahsa Shafaei and Niloofar Safi Samghabadi and Sudipta Kar and Thamar Solorio },
url = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.166.pdf},
year = {2020},
date = {2020-05-01},
publisher = {LREC},
abstract = {Movies help us learn and inspire societal change. But they can also contain objectionable content that negatively affects viewers' behavior, especially children.
In this paper, our goal is to predict the suitability of movie content for children and young adults based on scripts. The criterion that we use to measure suitability is the MPAA rating that is specifically designed for this purpose. We create a corpus for movie MPAA ratings and propose an RNN-based architecture with attention that jointly models the genre and the emotions in the script to predict the MPAA rating. We achieve 81% weighted F1-score for the classification model that outperforms the traditional machine learning method by 7%.},
keywords = {MPAA Rating, Story Analysis, Text Classification},
pubstate = {published},
tppubtype = {proceedings}
}
In this paper, our goal is to predict the suitability of movie content for children and young adults based on scripts. The criterion that we use to measure suitability is the MPAA rating that is specifically designed for this purpose. We create a corpus for movie MPAA ratings and propose an RNN-based architecture with attention that jointly models the genre and the emotions in the script to predict the MPAA rating. We achieve 81% weighted F1-score for the classification model that outperforms the traditional machine learning method by 7%.
2019
Shafaei, Mahsa; Samghabadi, Niloofar Safi; Kar, Sudipta; Solorio, Thamar
arXiv, (Ed.): 2019, visited: 21.08.2019.
Abstract | Links | BibTeX | Tags: Abusive Language detection, Sentiment analysis, Text Classification
@online{Shafaei2019cb,
title = {Rating for Parents: Predicting Children Suitability Rating for Movies Based on Language of the Movies},
author = {Mahsa Shafaei and Niloofar Safi Samghabadi and Sudipta Kar and Thamar Solorio},
editor = {arXiv},
url = {https://arxiv.org/abs/1908.07819},
year = {2019},
date = {2019-08-21},
urldate = {2019-08-21},
abstract = {The film culture has grown tremendously in recent years. The large number of streaming services put films as one of the most convenient forms of entertainment in today's world. Films can help us learn and inspire societal change. But they can also negatively affect viewers. In this paper, our goal is to predict the suitability of the movie content for children and young adults based on scripts. The criterion that we use to measure suitability is the MPAA rating that is specifically designed for this purpose. We propose an RNN based architecture with attention that jointly models the genre and the emotions in the script to predict the MPAA rating. We achieve 78% weighted F1-score for the classification model that outperforms the traditional machine learning method by 6%.},
keywords = {Abusive Language detection, Sentiment analysis, Text Classification},
pubstate = {published},
tppubtype = {online}
}
Shafaei, Mahsa; Lopez-Monroy, Adrian Pastor; Solorio, Thamar
Exploiting Textual, Visual and Product Features for Predicting the Likeability of Movies Conference
The 32nd International FLAIRS Conference, 2019.
Abstract | Links | BibTeX | Tags: Sentiment analysis, Text Classification
@conference{Shafaei2019,
title = {Exploiting Textual, Visual and Product Features for Predicting the Likeability of Movies},
author = {Mahsa Shafaei and Adrian Pastor Lopez-Monroy and Thamar Solorio},
url = {https://www.aaai.org/ocs/index.php/FLAIRS/FLAIRS19/paper/view/18305},
year = {2019},
date = {2019-05-01},
publisher = {The 32nd International FLAIRS Conference},
abstract = {Watching movies is one of the most popular entertainments among people. Every year, a huge amount of money goes to the movie industry to release movies to the market. In this paper, we propose a multimodal model to predict the likability of movies using textual, visual and product features. With the help of these features, we capture different aspects of movies and feed them as inputs to binary and multi-class classification and regression models to predict IMDB rating of movies at early steps of production. We also propose our own dataset consisting of about 15000 movie subtitles along with their metadata and poster images. We achieve 76% and 63% weighted F1-score for binary and multiclass classification respectively, and 0.7 mean square error for the regression model. Using prediction methods and data analysis, this research helps the movie business to be more productive. },
keywords = {Sentiment analysis, Text Classification},
pubstate = {published},
tppubtype = {conference}
}
2018
López-Monroy, A. Pastor; González, Fabio A.; Montes-y-Gómez, Manuel; Escalante, Hugo Jair; Solorio, Thamar
Early Text Classification using Multi-Resolution Concept Representations Conference
The 16th Annual Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Association for Computational Linguistics , 2018.
Abstract | Links | BibTeX | Tags: Text Classification
@conference{pastor18,
title = {Early Text Classification using Multi-Resolution Concept Representations},
author = {A. Pastor López-Monroy and Fabio A. González and Manuel Montes-y-Gómez and Hugo Jair Escalante and Thamar Solorio},
editor = {Association for Computational Linguistics },
url = {http://www.aclweb.org/anthology/N18-1110},
year = {2018},
date = {2018-06-04},
booktitle = {The 16th Annual Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
publisher = {Association for Computational Linguistics },
abstract = {This paper proposes a novel document representation, called Multi-Resolution Representation (MulR), to improve the early detection of risks in social media sources. The goal is to effectively identify the potential risk using as little evidence as possible and with as much anticipation as possible. MulR allows us to generate multiple ``views" of the text. These views capture different semantic meanings for words and documents at different levels of granularity, which is very useful in early scenarios to model the variable amounts of evidence. Our experimental evaluation shows that MuLR using low resolution is better suited for modeling short documents (very early stages), whereas large documents (medium/late stages) are better modeled with higher resolutions. We evaluate the proposed ideas in two different tasks where anticipation is critical: sexual predator detection and depression detection. The experimental evaluation for these early tasks revealed that the proposed approach outperforms previous methodologies by a considerable margin.},
keywords = {Text Classification},
pubstate = {published},
tppubtype = {conference}
}
Kar, Sudipta; Maharjan, Suraj; López-Monroy, A. Pastor; Solorio, Thamar
MPST: A Corpus of Movie Plot Synopses with Tags Conference
Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018), European Language Resources Association (ELRA), 2018.
Abstract | Links | BibTeX | Tags: Information Extraction, Narrative Analysis, Sentiment analysis, Text Classification
@conference{Kar2018,
title = {MPST: A Corpus of Movie Plot Synopses with Tags},
author = {Sudipta Kar and Suraj Maharjan and A. Pastor López-Monroy and Thamar Solorio},
url = {http://sudiptakar.info/wp-content/uploads/2018/05/322_LREC_2018.pdf, Slide
http://sudiptakar.info/wp-content/uploads/2018/02/mpst-corpus-movie-2.pdf, Paper},
year = {2018},
date = {2018-05-10},
booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)},
publisher = {European Language Resources Association (ELRA)},
abstract = {Social tagging of movies reveals a wide range of heterogeneous information about movies, like the genre, plot structure, soundtracks, metadata, visual and emotional experiences. Such information can be valuable in building automatic systems to create tags for movies. Automatic tagging systems can help recommendation engines to improve the retrieval of similar movies as well as help viewers to know what to expect from a movie in advance. In this paper, we set out to the task of collecting a corpus of movie plot synopses and tags. We describe a methodology that enabled us to build a fine-grained set of around 70 tags exposing heterogeneous characteristics of movie plots and the multi-label associations of these tags with some 14K movie plot synopses. We investigate how these tags correlate with movies and the flow of emotions throughout different types of movies. Finally, we use this corpus to explore the feasibility of inferring tags from plot synopses. We expect the corpus will be useful in other tasks where analysis of narratives is relevant.},
keywords = {Information Extraction, Narrative Analysis, Sentiment analysis, Text Classification},
pubstate = {published},
tppubtype = {conference}
}
2016
Franco-Salvador, Marc; Kar, Sudipta; Solorio, Thamar; Rosso, Paolo
UH-PRHLT at SemEval-2016 Task 3: Combining Lexical and Semantic-based Features for Community Question Answering Inproceedings
In: Proceedings of SemEval-2016, pp. 814-821, Association for Computational Linguistics, San Diego, California, 2016.
Links | BibTeX | Tags: Question Answering, Text Classification
@inproceedings{Franco-Salvador2016,
title = {UH-PRHLT at SemEval-2016 Task 3: Combining Lexical and Semantic-based Features for Community Question Answering},
author = {Marc Franco-Salvador and Sudipta Kar and Thamar Solorio and Paolo Rosso},
url = {https://aclweb.org/anthology/S/S16/S16-1126.pdf},
year = {2016},
date = {2016-06-16},
booktitle = {Proceedings of SemEval-2016},
pages = {814-821},
publisher = {Association for Computational Linguistics},
address = {San Diego, California},
keywords = {Question Answering, Text Classification},
pubstate = {published},
tppubtype = {inproceedings}
}
2012
Ramírez-de-la-Rosa, Gabriela; Montes-y-Gómez, Manuel; Solorio, Thamar; Villaseñor-Pineda, Luis
A document is known by the company it keeps: Neighborhood consensus for short text categorization Journal Article
In: Language Resources and Evaluation, no. 47, pp. 127–149, 2012.
BibTeX | Tags: Text Classification
@article{RamirezEtAl:12,
title = {A document is known by the company it keeps: Neighborhood consensus for short text categorization},
author = { Gabriela Ramírez-de-la-Rosa and Manuel Montes-y-Gómez and Thamar Solorio and Luis Villaseñor-Pineda},
year = {2012},
date = {2012-01-01},
journal = {Language Resources and Evaluation},
number = {47},
pages = {127--149},
keywords = {Text Classification},
pubstate = {published},
tppubtype = {article}
}
2011
Dey, Debangana; Solorio, Thamar; Montes-y-Gómez, Manuel; Escalante, Hugo
Instance Selection based on the Silhouette Coefficient Measure for Text Classification Inproceedings
In: 10th Mexican International Conference on Artificial Intelligence, pp. 357–369, , %note = (acceptance rate 27.5,%), Puebla, Mexico, 2011.
BibTeX | Tags: Text Classification
@inproceedings{DeyEtAl:11,
title = {Instance Selection based on the Silhouette Coefficient Measure for Text Classification},
author = { Debangana Dey and Thamar Solorio and Manuel Montes-y-Gómez and Hugo Escalante},
year = {2011},
date = {2011-11-01},
booktitle = {10th Mexican International Conference on Artificial Intelligence},
pages = {357--369},
publisher = {, %note = (acceptance rate 27.5,%)},
address = {Puebla, Mexico},
keywords = {Text Classification},
pubstate = {published},
tppubtype = {inproceedings}
}
0000
Suraj Maharjan Niloofar S. Samghabadi, Alan Sprague
Detecting Nastiness in Social Media Inproceedings
In: 0000.
BibTeX | Tags: Abusive Language detection, Text Classification
@inproceedings{Safi2017,
title = {Detecting Nastiness in Social Media},
author = {Niloofar S. Samghabadi, Suraj Maharjan, Alan Sprague, Raquel D. Sprague, Thamar Solorio},
keywords = {Abusive Language detection, Text Classification},
pubstate = {published},
tppubtype = {inproceedings}
}
Niloofar Safi Samghabadi Mahsa Shafaei, Sudipta Kar
arXiv, (Ed.): 0000.
Abstract | Links | BibTeX | Tags: Abusive Language detection, Text Classification
@online{Shafaei2019b,
title = {Rating for Parents: Predicting Children Suitability Rating for Movies Based on Language of the Movies},
author = {Mahsa Shafaei, Niloofar Safi Samghabadi, Sudipta Kar, Thamar Solorio},
editor = { arXiv},
url = {https://arxiv.org/abs/1908.07819},
abstract = {The film culture has grown tremendously in recent years. The large number of streaming services put films as one of the most convenient forms of entertainment in today's world. Films can help us learn and inspire societal change. But they can also negatively affect viewers. In this paper, our goal is to predict the suitability of the movie content for children and young adults based on scripts. The criterion that we use to measure suitability is the MPAA rating that is specifically designed for this purpose. We propose an RNN based architecture with attention that jointly models the genre and the emotions in the script to predict the MPAA rating. We achieve 78% weighted F1-score for the classification model that outperforms the traditional machine learning method by 6%.},
keywords = {Abusive Language detection, Text Classification},
pubstate = {published},
tppubtype = {online}
}
Shafaei, Mahsa; Samghabadi, Niloofar Safi; Kar, Sudipta; Solorio, Thamar
arXiv, (Ed.): 0000.
Abstract | Links | BibTeX | Tags: Abusive Language detection, Sentiment analysis, Text Classification
@online{Shafaei2019c,
title = {Rating for Parents: Predicting Children Suitability Rating for Movies Based on Language of the Movies},
author = {Mahsa Shafaei and Niloofar Safi Samghabadi and Sudipta Kar and Thamar Solorio},
editor = {arXiv},
url = {https://arxiv.org/abs/1908.07819},
abstract = {The film culture has grown tremendously in recent years. The large number of streaming services put films as one of the most convenient forms of entertainment in today's world. Films can help us learn and inspire societal change. But they can also negatively affect viewers. In this paper, our goal is to predict the suitability of the movie content for children and young adults based on scripts. The criterion that we use to measure suitability is the MPAA rating that is specifically designed for this purpose. We propose an RNN based architecture with attention that jointly models the genre and the emotions in the script to predict the MPAA rating. We achieve 78% weighted F1-score for the classification model that outperforms the traditional machine learning method by 6%.},
keywords = {Abusive Language detection, Sentiment analysis, Text Classification},
pubstate = {published},
tppubtype = {online}
}