SpaCy’s Approach to Sentiment Analysis
spaCy is a modern, industrial-strength NLP library that offers excellent multilingual capabilities, including sentiment analysis. Unlike NLTK, spaCy is designed from the ground up for production use and provides pre-trained models for multiple languages with consistent APIs across different languages.
spaCy’s Approach to Sentiment Analysis
spaCy doesn’t include built-in sentiment analysis in its core library, but it integrates seamlessly with sentiment analysis extensions and models. The most common approaches include using spaCy’s text classification capabilities, integrating with transformer models through spacy-transformers, or using extensions like spacytextblob for simpler sentiment analysis tasks.
Multilingual Support in spaCy
English has the most comprehensive support with large pre-trained models (en_core_web_sm/md/lg) that include sophisticated text processing capabilities and can be easily extended with sentiment analysis.
French is well-supported with dedicated models (fr_core_news_sm/md/lg) that handle French morphology, syntax, and semantics effectively for sentiment analysis tasks.
Arabic support has improved significantly with models (ar_core_news_sm/md/lg) that handle right-to-left text, Arabic morphology, and can be used for Arabic sentiment analysis, though resources are still more limited compared to English and French.
import spacy
from spacytextblob.spacytextblob import SpacyTextBlob
import pandas as pd
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
# Note: Install required packages
# pip install spacy spacytextblob textblob
# python -m spacy download en_core_web_sm
# python -m spacy download fr_core_news_sm
# python -m spacy download ar_core_news_sm
class MultilingualSentimentAnalyzer:
def __init__(self):
"""Initialize spaCy models for different languages"""
self.models = {}
self.model_names = {
'english': 'en_core_web_sm',
'french': 'fr_core_news_sm',
'arabic': 'ar_core_news_sm'
}
# Load available models
for lang, model_name in self.model_names.items():
try:
nlp = spacy.load(model_name)
# Add TextBlob extension for sentiment analysis
if not nlp.has_pipe('spacytextblob'):
nlp.add_pipe('spacytextblob')
self.models[lang] = nlp
print(f"✓ Loaded {lang} model: {model_name}")
except OSError:
print(f"✗ Could not load {lang} model: {model_name}")
print(f" Install with: python -m spacy download {model_name}")
def basic_sentiment_analysis(self, text, language='english'):
"""Basic sentiment analysis using TextBlob integration"""
if language not in self.models:
print(f"Model for {language} not available")
return None
nlp = self.models[language]
doc = nlp(text)
print(f"\n--- {language.upper()} SENTIMENT ANALYSIS ---")
print(f"Text: {text}")
print(f"Polarity: {doc._.blob.polarity:.3f} (Range: -1 to 1)")
print(f"Subjectivity: {doc._.blob.subjectivity:.3f} (Range: 0 to 1)")
# Interpret sentiment
if doc._.blob.polarity > 0.1:
sentiment_label = "Positive"
elif doc._.blob.polarity < -0.1:
sentiment_label = "Negative"
else:
sentiment_label = "Neutral"
print(f"Sentiment: {sentiment_label}")
return {
'text': text,
'language': language,
'polarity': doc._.blob.polarity,
'subjectivity': doc._.blob.subjectivity,
'sentiment': sentiment_label
}
def sentence_level_sentiment(self, text, language='english'):
"""Analyze sentiment at sentence level"""
if language not in self.models:
print(f"Model for {language} not available")
return None
nlp = self.models[language]
doc = nlp(text)
print(f"\n--- {language.upper()} SENTENCE-LEVEL SENTIMENT ---")
sentence_sentiments = []
for sent in doc.sents:
sent_doc = nlp(sent.text)
sentiment_data = {
'sentence': sent.text.strip(),
'polarity': sent_doc._.blob.polarity,
'subjectivity': sent_doc._.blob.subjectivity
}
sentence_sentiments.append(sentiment_data)
print(f"Sentence: '{sent.text.strip()}'")
print(f" Polarity: {sent_doc._.blob.polarity:.3f}")
print(f" Subjectivity: {sent_doc._.blob.subjectivity:.3f}")
print()
return sentence_sentiments
def entity_sentiment_analysis(self, text, language='english'):
"""Analyze sentiment in relation to named entities"""
if language not in self.models:
print(f"Model for {language} not available")
return None
nlp = self.models[language]
doc = nlp(text)
print(f"\n--- {language.upper()} ENTITY-BASED SENTIMENT ---")
entities_sentiment = []
# Get overall document sentiment
overall_sentiment = doc._.blob.polarity
# Find entities and their context
for ent in doc.ents:
# Get sentence containing the entity
entity_sent = None
for sent in doc.sents:
if ent.start >= sent.start and ent.end <= sent.end:
entity_sent = sent
break
if entity_sent:
sent_doc = nlp(entity_sent.text)
entity_data = {
'entity': ent.text,
'label': ent.label_,
'sentence': entity_sent.text.strip(),
'sentiment': sent_doc._.blob.polarity
}
entities_sentiment.append(entity_data)
print(f"Entity: {ent.text} ({ent.label_})")
print(f"Context: '{entity_sent.text.strip()}'")
print(f"Sentiment: {sent_doc._.blob.polarity:.3f}")
print()
return entities_sentiment
def comparative_sentiment_analysis(self, texts_dict):
"""Compare sentiment across multiple languages"""
print(f"\n--- COMPARATIVE MULTILINGUAL SENTIMENT ANALYSIS ---")
results = []
for language, text in texts_dict.items():
if language in self.models:
sentiment_result = self.basic_sentiment_analysis(text, language)
if sentiment_result:
results.append(sentiment_result)
# Create comparison DataFrame
if results:
df = pd.DataFrame(results)
print(f"\nComparative Results:")
print(df[['language', 'polarity', 'subjectivity', 'sentiment']])
return df
return None
def advanced_sentiment_features(self, text, language='english'):
"""Extract advanced linguistic features that affect sentiment"""
if language not in self.models:
print(f"Model for {language} not available")
return None
nlp = self.models[language]
doc = nlp(text)
print(f"\n--- {language.upper()} ADVANCED SENTIMENT FEATURES ---")
features = {
'negations': [],
'intensifiers': [],
'sentiment_words': [],
'pos_distribution': defaultdict(int)
}
# Common intensifiers (language-specific lists would be better)
intensifiers = {
'english': ['very', 'extremely', 'really', 'quite', 'rather', 'pretty', 'absolutely'],
'french': ['très', 'extrêmement', 'vraiment', 'assez', 'plutôt', 'absolument'],
'arabic': ['جداً', 'جدا', 'كثيراً', 'كثيرا', 'للغاية', 'تماماً']
}
# Analyze tokens
for token in doc:
# POS distribution
features['pos_distribution'][token.pos_] += 1
# Find negations
if token.dep_ == 'neg':
features['negations'].append(token.text)
# Find intensifiers
if language in intensifiers and token.lemma_.lower() in intensifiers[language]:
features['intensifiers'].append(token.text)
# Get sentiment-bearing words (simplified approach)
sentiment_doc = nlp(text)
if abs(sentiment_doc._.blob.polarity) > 0.1:
for token in doc:
if token.pos_ in ['ADJ', 'VERB', 'NOUN'] and not token.is_stop:
token_sent = nlp(token.text)
if abs(token_sent._.blob.polarity) > 0.2:
features['sentiment_words'].append({
'word': token.text,
'pos': token.pos_,
'sentiment': token_sent._.blob.polarity
})
print(f"Negations found: {features['negations']}")
print(f"Intensifiers found: {features['intensifiers']}")
print(f"Key sentiment words: {[w['word'] for w in features['sentiment_words'][:5]]}")
print(f"POS distribution: {dict(features['pos_distribution'])}")
return features
# Initialize the analyzer
analyzer = MultilingualSentimentAnalyzer()
# Example texts for different languages with varying sentiments
example_texts = {
'english': {
'positive': "I absolutely love this amazing product! It exceeded all my expectations and made my life so much better.",
'negative': "This terrible service completely ruined my day. I'm extremely disappointed and frustrated.",
'neutral': "The weather today is cloudy with a chance of rain. The temperature is around 20 degrees.",
'mixed': "The hotel room was beautiful and clean, but the staff was quite rude and unhelpful."
},
'french': {
'positive': "J'adore absolument ce produit incroyable! Il a dépassé toutes mes attentes et a rendu ma vie tellement meilleure.",
'negative': "Ce service terrible a complètement gâché ma journée. Je suis extrêmement déçu et frustré.",
'neutral': "Le temps aujourd'hui est nuageux avec une possibilité de pluie. La température est d'environ 20 degrés.",
'mixed': "La chambre d'hôtel était belle et propre, mais le personnel était assez impoli et inutile."
},
'arabic': {
'positive': "أحب هذا المنتج الرائع جداً! لقد فاق كل توقعاتي وجعل حياتي أفضل بكثير.",
'negative': "هذه الخدمة السيئة دمرت يومي تماماً. أنا محبط ومخيب الأمل للغاية.",
'neutral': "الطقس اليوم غائم مع احتمال هطول أمطار. درجة الحرارة حوالي 20 درجة.",
'mixed': "غرفة الفندق كانت جميلة ونظيفة، لكن الموظفين كانوا وقحين وغير مفيدين."
}
}
# Run analysis for each language and sentiment type
print("="*80)
print("MULTILINGUAL SENTIMENT ANALYSIS WITH SPACY")
print("="*80)
all_results = []
for language in ['english', 'french', 'arabic']:
if language in analyzer.models:
print(f"\n{'#'*60}")
print(f"ANALYZING {language.upper()} TEXTS")
print(f"{'#'*60}")
for sentiment_type, text in example_texts[language].items():
print(f"\n--- {sentiment_type.upper()} EXAMPLE ---")
result = analyzer.basic_sentiment_analysis(text, language)
if result:
result['expected_sentiment'] = sentiment_type
all_results.append(result)
# Comparative analysis
print(f"\n{'#'*60}")
print("COMPARATIVE ANALYSIS")
print(f"{'#'*60}")
if all_results:
df = pd.DataFrame(all_results)
# Group by language
print("\nAverage sentiment by language:")
lang_summary = df.groupby('language').agg({
'polarity': 'mean',
'subjectivity': 'mean'
}).round(3)
print(lang_summary)
# Group by expected sentiment
print("\nAverage sentiment by expected category:")
sentiment_summary = df.groupby('expected_sentiment').agg({
'polarity': 'mean',
'subjectivity': 'mean'
}).round(3)
print(sentiment_summary)
# Detailed analysis examples
detailed_examples = {
'english': "The new restaurant has excellent food and amazing service, but the prices are quite expensive and the atmosphere is rather noisy.",
'french': "Le nouveau restaurant a une excellente cuisine et un service formidable, mais les prix sont assez chers et l'atmosphère est plutôt bruyante.",
'arabic': "المطعم الجديد لديه طعام ممتاز وخدمة رائعة، لكن الأسعار مرتفعة جداً والجو صاخب نوعاً ما."
}
for language, text in detailed_examples.items():
if language in analyzer.models:
print(f"\n{'='*80}")
print(f"DETAILED ANALYSIS - {language.upper()}")
print(f"{'='*80}")
# Sentence-level analysis
analyzer.sentence_level_sentiment(text, language)
# Entity-based analysis
analyzer.entity_sentiment_analysis(text, language)
# Advanced features
analyzer.advanced_sentiment_features(text, language)
# Batch processing example
print(f"\n{'='*80}")
print("BATCH PROCESSING EXAMPLE")
print(f"{'='*80}")
batch_texts = [
("english", "Love the new update!"),
("english", "Hate waiting in long queues."),
("french", "J'aime beaucoup ce film."),
("french", "Je déteste les embouteillages."),
("arabic", "أحب هذا الكتاب كثيراً."),
("arabic", "لا أحب هذا الطعام.")
]
batch_results = []
for language, text in batch_texts:
if language in analyzer.models:
result = analyzer.basic_sentiment_analysis(text, language)
if result:
batch_results.append(result)
if batch_results:
batch_df = pd.DataFrame(batch_results)
print(f"\nBatch Processing Results:")
print(batch_df[['language', 'text', 'polarity', 'sentiment']])
print(f"\n{'='*80}")
print("ANALYSIS COMPLETE")
print(f"{'='*80}")
Share via:
Leave a Reply