2019 |
Shafaei, Mahsa; Samghabadi, Niloofar Safi; Kar, Sudipta; Solorio, Thamar arXiv, (Ed.): 2019, visited: 21.08.2019. Abstract | Links | BibTeX | Tags: Abusive Language detection, Sentiment analysis, Text Classification @online{Shafaei2019cb, title = {Rating for Parents: Predicting Children Suitability Rating for Movies Based on Language of the Movies}, author = {Mahsa Shafaei and Niloofar Safi Samghabadi and Sudipta Kar and Thamar Solorio}, editor = {arXiv}, url = {https://arxiv.org/abs/1908.07819}, year = {2019}, date = {2019-08-21}, urldate = {2019-08-21}, abstract = {The film culture has grown tremendously in recent years. The large number of streaming services put films as one of the most convenient forms of entertainment in today's world. Films can help us learn and inspire societal change. But they can also negatively affect viewers. In this paper, our goal is to predict the suitability of the movie content for children and young adults based on scripts. The criterion that we use to measure suitability is the MPAA rating that is specifically designed for this purpose. We propose an RNN based architecture with attention that jointly models the genre and the emotions in the script to predict the MPAA rating. We achieve 78% weighted F1-score for the classification model that outperforms the traditional machine learning method by 6%.}, keywords = {Abusive Language detection, Sentiment analysis, Text Classification}, pubstate = {published}, tppubtype = {online} } The film culture has grown tremendously in recent years. The large number of streaming services put films as one of the most convenient forms of entertainment in today's world. Films can help us learn and inspire societal change. But they can also negatively affect viewers. In this paper, our goal is to predict the suitability of the movie content for children and young adults based on scripts. The criterion that we use to measure suitability is the MPAA rating that is specifically designed for this purpose. We propose an RNN based architecture with attention that jointly models the genre and the emotions in the script to predict the MPAA rating. We achieve 78% weighted F1-score for the classification model that outperforms the traditional machine learning method by 6%. |
Shafaei, Mahsa; Lopez-Monroy, Adrian Pastor; Solorio, Thamar Exploiting Textual, Visual and Product Features for Predicting the Likeability of Movies Conference The 32nd International FLAIRS Conference, 2019. Abstract | Links | BibTeX | Tags: Sentiment analysis, Text Classification @conference{Shafaei2019, title = {Exploiting Textual, Visual and Product Features for Predicting the Likeability of Movies}, author = {Mahsa Shafaei and Adrian Pastor Lopez-Monroy and Thamar Solorio}, url = {https://www.aaai.org/ocs/index.php/FLAIRS/FLAIRS19/paper/view/18305}, year = {2019}, date = {2019-05-01}, publisher = {The 32nd International FLAIRS Conference}, abstract = {Watching movies is one of the most popular entertainments among people. Every year, a huge amount of money goes to the movie industry to release movies to the market. In this paper, we propose a multimodal model to predict the likability of movies using textual, visual and product features. With the help of these features, we capture different aspects of movies and feed them as inputs to binary and multi-class classification and regression models to predict IMDB rating of movies at early steps of production. We also propose our own dataset consisting of about 15000 movie subtitles along with their metadata and poster images. We achieve 76% and 63% weighted F1-score for binary and multiclass classification respectively, and 0.7 mean square error for the regression model. Using prediction methods and data analysis, this research helps the movie business to be more productive. }, keywords = {Sentiment analysis, Text Classification}, pubstate = {published}, tppubtype = {conference} } Watching movies is one of the most popular entertainments among people. Every year, a huge amount of money goes to the movie industry to release movies to the market. In this paper, we propose a multimodal model to predict the likability of movies using textual, visual and product features. With the help of these features, we capture different aspects of movies and feed them as inputs to binary and multi-class classification and regression models to predict IMDB rating of movies at early steps of production. We also propose our own dataset consisting of about 15000 movie subtitles along with their metadata and poster images. We achieve 76% and 63% weighted F1-score for binary and multiclass classification respectively, and 0.7 mean square error for the regression model. Using prediction methods and data analysis, this research helps the movie business to be more productive. |
2018 |
Kar, Sudipta; Maharjan, Suraj; Solorio, Thamar Proceedings of the 27th International Conference on Computational Linguistics, 2018. Links | BibTeX | Tags: CNN, Narrative Analysis, Sentiment analysis @conference{Kar2018b, title = {Folksonomication: Predicting Tags for Movies from Plot Synopses using Emotion Flow encoded Neural Network}, author = {Sudipta Kar and Suraj Maharjan and Thamar Solorio}, url = {http://ritual.uh.edu/folksonomication-2018}, year = {2018}, date = {2018-08-23}, booktitle = {Proceedings of the 27th International Conference on Computational Linguistics}, keywords = {CNN, Narrative Analysis, Sentiment analysis}, pubstate = {published}, tppubtype = {conference} } |
Kar, Sudipta; Maharjan, Suraj; López-Monroy, Pastor A; Solorio, Thamar MPST: A Corpus of Movie Plot Synopses with Tags Conference Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018), European Language Resources Association (ELRA), 2018. Abstract | Links | BibTeX | Tags: Information Extraction, Narrative Analysis, Sentiment analysis, Text Classification @conference{Kar2018, title = {MPST: A Corpus of Movie Plot Synopses with Tags}, author = {Sudipta Kar and Suraj Maharjan and A. Pastor López-Monroy and Thamar Solorio}, url = {http://sudiptakar.info/wp-content/uploads/2018/05/322_LREC_2018.pdf, Slide http://sudiptakar.info/wp-content/uploads/2018/02/mpst-corpus-movie-2.pdf, Paper}, year = {2018}, date = {2018-05-10}, booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, publisher = {European Language Resources Association (ELRA)}, abstract = {Social tagging of movies reveals a wide range of heterogeneous information about movies, like the genre, plot structure, soundtracks, metadata, visual and emotional experiences. Such information can be valuable in building automatic systems to create tags for movies. Automatic tagging systems can help recommendation engines to improve the retrieval of similar movies as well as help viewers to know what to expect from a movie in advance. In this paper, we set out to the task of collecting a corpus of movie plot synopses and tags. We describe a methodology that enabled us to build a fine-grained set of around 70 tags exposing heterogeneous characteristics of movie plots and the multi-label associations of these tags with some 14K movie plot synopses. We investigate how these tags correlate with movies and the flow of emotions throughout different types of movies. Finally, we use this corpus to explore the feasibility of inferring tags from plot synopses. We expect the corpus will be useful in other tasks where analysis of narratives is relevant.}, keywords = {Information Extraction, Narrative Analysis, Sentiment analysis, Text Classification}, pubstate = {published}, tppubtype = {conference} } Social tagging of movies reveals a wide range of heterogeneous information about movies, like the genre, plot structure, soundtracks, metadata, visual and emotional experiences. Such information can be valuable in building automatic systems to create tags for movies. Automatic tagging systems can help recommendation engines to improve the retrieval of similar movies as well as help viewers to know what to expect from a movie in advance. In this paper, we set out to the task of collecting a corpus of movie plot synopses and tags. We describe a methodology that enabled us to build a fine-grained set of around 70 tags exposing heterogeneous characteristics of movie plots and the multi-label associations of these tags with some 14K movie plot synopses. We investigate how these tags correlate with movies and the flow of emotions throughout different types of movies. Finally, we use this corpus to explore the feasibility of inferring tags from plot synopses. We expect the corpus will be useful in other tasks where analysis of narratives is relevant. |
2017 |
Kar, Sudipta; Maharjan, Suraj; Solorio, Thamar RiTUAL-UH at SemEval-2017 Task 5: Sentiment Analysis on Financial Data Using Neural Networks Inproceedings Proceedings of the 11th International Workshop on Semantic Evaluation (SemEval-2017), 2017, (Ranked 2nd for Subtask 2. With alternate scoring, ranked 1st in both subtask.). Abstract | Links | BibTeX | Tags: CNN, Neural Networks, Sentiment analysis @inproceedings{Kar2017, title = {RiTUAL-UH at SemEval-2017 Task 5: Sentiment Analysis on Financial Data Using Neural Networks}, author = {Sudipta Kar and Suraj Maharjan and Thamar Solorio}, url = {http://www.aclweb.org/anthology/S17-2150}, year = {2017}, date = {2017-08-03}, publisher = {Proceedings of the 11th International Workshop on Semantic Evaluation (SemEval-2017)}, abstract = {In this paper, we present our systems for the “SemEval-2017 Task-5 on FineGrained Sentiment Analysis on Financial Microblogs and News”. In our system, we combined hand-engineered lexical, sentiment and metadata features, the representations learned from Convolutional Neural Networks (CNN) and Bidirectional Gated Recurrent Unit (Bi-GRU) with Attention model applied on top. With this architecture, we obtained weighted cosine similarity scores of 72.34% and 74.37% for subtask-1 and subtask-2, respectively. Using the official scoring system, our system ranked the second place for subtask-2 and eighth place for the subtask-1. It ranked first for both of the subtasks by the scores achieved by an alternate scoring system. .}, note = {Ranked 2nd for Subtask 2. With alternate scoring, ranked 1st in both subtask.}, keywords = {CNN, Neural Networks, Sentiment analysis}, pubstate = {published}, tppubtype = {inproceedings} } In this paper, we present our systems for the “SemEval-2017 Task-5 on FineGrained Sentiment Analysis on Financial Microblogs and News”. In our system, we combined hand-engineered lexical, sentiment and metadata features, the representations learned from Convolutional Neural Networks (CNN) and Bidirectional Gated Recurrent Unit (Bi-GRU) with Attention model applied on top. With this architecture, we obtained weighted cosine similarity scores of 72.34% and 74.37% for subtask-1 and subtask-2, respectively. Using the official scoring system, our system ranked the second place for subtask-2 and eighth place for the subtask-1. It ranked first for both of the subtasks by the scores achieved by an alternate scoring system. . |
2014 |
Bogdanova, Dasha; Rosso, Paolo; Solorio, Thamar Exploring High-level Features for Detecting Cyberpedophilia Journal Article Comput. Speech Lang., 28 (1), pp. 108–120, 2014, ISSN: 0885-2308. Links | BibTeX | Tags: Cyberpedophilia, Emotion detection, Sentiment analysis @article{BogdanovaEtAl:14, title = {Exploring High-level Features for Detecting Cyberpedophilia}, author = { Dasha Bogdanova and Paolo Rosso and Thamar Solorio}, url = {http://dx.doi.org/10.1016/j.csl.2013.04.007}, doi = {10.1016/j.csl.2013.04.007}, issn = {0885-2308}, year = {2014}, date = {2014-01-01}, journal = {Comput. Speech Lang.}, volume = {28}, number = {1}, pages = {108--120}, publisher = {Academic Press Ltd.}, address = {London, UK, UK}, keywords = {Cyberpedophilia, Emotion detection, Sentiment analysis}, pubstate = {published}, tppubtype = {article} } |
0000 |
Shafaei, Mahsa; Samghabadi, Niloofar Safi; Kar, Sudipta; Solorio, Thamar arXiv, (Ed.): 0000. Abstract | Links | BibTeX | Tags: Abusive Language detection, Sentiment analysis, Text Classification @online{Shafaei2019c, title = {Rating for Parents: Predicting Children Suitability Rating for Movies Based on Language of the Movies}, author = {Mahsa Shafaei and Niloofar Safi Samghabadi and Sudipta Kar and Thamar Solorio}, editor = {arXiv}, url = {https://arxiv.org/abs/1908.07819}, abstract = {The film culture has grown tremendously in recent years. The large number of streaming services put films as one of the most convenient forms of entertainment in today's world. Films can help us learn and inspire societal change. But they can also negatively affect viewers. In this paper, our goal is to predict the suitability of the movie content for children and young adults based on scripts. The criterion that we use to measure suitability is the MPAA rating that is specifically designed for this purpose. We propose an RNN based architecture with attention that jointly models the genre and the emotions in the script to predict the MPAA rating. We achieve 78% weighted F1-score for the classification model that outperforms the traditional machine learning method by 6%.}, keywords = {Abusive Language detection, Sentiment analysis, Text Classification}, pubstate = {published}, tppubtype = {online} } The film culture has grown tremendously in recent years. The large number of streaming services put films as one of the most convenient forms of entertainment in today's world. Films can help us learn and inspire societal change. But they can also negatively affect viewers. In this paper, our goal is to predict the suitability of the movie content for children and young adults based on scripts. The criterion that we use to measure suitability is the MPAA rating that is specifically designed for this purpose. We propose an RNN based architecture with attention that jointly models the genre and the emotions in the script to predict the MPAA rating. We achieve 78% weighted F1-score for the classification model that outperforms the traditional machine learning method by 6%. |