spaCy’s Approach to Sentiment Analysis

Posted :

Aug 25, 2025

by :

Tags :Multilingual Texts, NLTK, text analytics, text annotation

SpaCy’s Approach to Sentiment Analysis

spaCy is a modern, industrial-strength NLP library that offers excellent multilingual capabilities, including sentiment analysis. Unlike NLTK, spaCy is designed from the ground up for production use and provides pre-trained models for multiple languages with consistent APIs across different languages.

spaCy’s Approach to Sentiment Analysis

spaCy doesn’t include built-in sentiment analysis in its core library, but it integrates seamlessly with sentiment analysis extensions and models. The most common approaches include using spaCy’s text classification capabilities, integrating with transformer models through spacy-transformers, or using extensions like spacytextblob for simpler sentiment analysis tasks.

Multilingual Support in spaCy

English has the most comprehensive support with large pre-trained models (en_core_web_sm/md/lg) that include sophisticated text processing capabilities and can be easily extended with sentiment analysis.

French is well-supported with dedicated models (fr_core_news_sm/md/lg) that handle French morphology, syntax, and semantics effectively for sentiment analysis tasks.

Arabic support has improved significantly with models (ar_core_news_sm/md/lg) that handle right-to-left text, Arabic morphology, and can be used for Arabic sentiment analysis, though resources are still more limited compared to English and French.

Python Code Highlighting

import spacy
from spacytextblob.spacytextblob import SpacyTextBlob
import pandas as pd
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np

# Note: Install required packages
# pip install spacy spacytextblob textblob
# python -m spacy download en_core_web_sm
# python -m spacy download fr_core_news_sm  
# python -m spacy download ar_core_news_sm

class MultilingualSentimentAnalyzer:
    def __init__(self):
        """Initialize spaCy models for different languages"""
        self.models = {}
        self.model_names = {
            'english': 'en_core_web_sm',
            'french': 'fr_core_news_sm', 
            'arabic': 'ar_core_news_sm'
        }
        
        # Load available models
        for lang, model_name in self.model_names.items():
            try:
                nlp = spacy.load(model_name)
                # Add TextBlob extension for sentiment analysis
                if not nlp.has_pipe('spacytextblob'):
                    nlp.add_pipe('spacytextblob')
                self.models[lang] = nlp
                print(f"✓ Loaded {lang} model: {model_name}")
            except OSError:
                print(f"✗ Could not load {lang} model: {model_name}")
                print(f"  Install with: python -m spacy download {model_name}")
    
    def basic_sentiment_analysis(self, text, language='english'):
        """Basic sentiment analysis using TextBlob integration"""
        if language not in self.models:
            print(f"Model for {language} not available")
            return None
            
        nlp = self.models[language]
        doc = nlp(text)
        
        print(f"\n--- {language.upper()} SENTIMENT ANALYSIS ---")
        print(f"Text: {text}")
        print(f"Polarity: {doc._.blob.polarity:.3f} (Range: -1 to 1)")
        print(f"Subjectivity: {doc._.blob.subjectivity:.3f} (Range: 0 to 1)")
        
        # Interpret sentiment
        if doc._.blob.polarity > 0.1:
            sentiment_label = "Positive"
        elif doc._.blob.polarity < -0.1:
            sentiment_label = "Negative"
        else:
            sentiment_label = "Neutral"
            
        print(f"Sentiment: {sentiment_label}")
        
        return {
            'text': text,
            'language': language,
            'polarity': doc._.blob.polarity,
            'subjectivity': doc._.blob.subjectivity,
            'sentiment': sentiment_label
        }
    
    def sentence_level_sentiment(self, text, language='english'):
        """Analyze sentiment at sentence level"""
        if language not in self.models:
            print(f"Model for {language} not available")
            return None
            
        nlp = self.models[language]
        doc = nlp(text)
        
        print(f"\n--- {language.upper()} SENTENCE-LEVEL SENTIMENT ---")
        
        sentence_sentiments = []
        for sent in doc.sents:
            sent_doc = nlp(sent.text)
            sentiment_data = {
                'sentence': sent.text.strip(),
                'polarity': sent_doc._.blob.polarity,
                'subjectivity': sent_doc._.blob.subjectivity
            }
            sentence_sentiments.append(sentiment_data)
            
            print(f"Sentence: '{sent.text.strip()}'")
            print(f"  Polarity: {sent_doc._.blob.polarity:.3f}")
            print(f"  Subjectivity: {sent_doc._.blob.subjectivity:.3f}")
            print()
        
        return sentence_sentiments
    
    def entity_sentiment_analysis(self, text, language='english'):
        """Analyze sentiment in relation to named entities"""
        if language not in self.models:
            print(f"Model for {language} not available")
            return None
            
        nlp = self.models[language]
        doc = nlp(text)
        
        print(f"\n--- {language.upper()} ENTITY-BASED SENTIMENT ---")
        
        entities_sentiment = []
        
        # Get overall document sentiment
        overall_sentiment = doc._.blob.polarity
        
        # Find entities and their context
        for ent in doc.ents:
            # Get sentence containing the entity
            entity_sent = None
            for sent in doc.sents:
                if ent.start >= sent.start and ent.end <= sent.end:
                    entity_sent = sent
                    break
            
            if entity_sent:
                sent_doc = nlp(entity_sent.text)
                entity_data = {
                    'entity': ent.text,
                    'label': ent.label_,
                    'sentence': entity_sent.text.strip(),
                    'sentiment': sent_doc._.blob.polarity
                }
                entities_sentiment.append(entity_data)
                
                print(f"Entity: {ent.text} ({ent.label_})")
                print(f"Context: '{entity_sent.text.strip()}'")
                print(f"Sentiment: {sent_doc._.blob.polarity:.3f}")
                print()
        
        return entities_sentiment
    
    def comparative_sentiment_analysis(self, texts_dict):
        """Compare sentiment across multiple languages"""
        print(f"\n--- COMPARATIVE MULTILINGUAL SENTIMENT ANALYSIS ---")
        
        results = []
        for language, text in texts_dict.items():
            if language in self.models:
                sentiment_result = self.basic_sentiment_analysis(text, language)
                if sentiment_result:
                    results.append(sentiment_result)
        
        # Create comparison DataFrame
        if results:
            df = pd.DataFrame(results)
            print(f"\nComparative Results:")
            print(df[['language', 'polarity', 'subjectivity', 'sentiment']])
            
            return df
        
        return None
    
    def advanced_sentiment_features(self, text, language='english'):
        """Extract advanced linguistic features that affect sentiment"""
        if language not in self.models:
            print(f"Model for {language} not available")
            return None
            
        nlp = self.models[language]
        doc = nlp(text)
        
        print(f"\n--- {language.upper()} ADVANCED SENTIMENT FEATURES ---")
        
        features = {
            'negations': [],
            'intensifiers': [],
            'sentiment_words': [],
            'pos_distribution': defaultdict(int)
        }
        
        # Common intensifiers (language-specific lists would be better)
        intensifiers = {
            'english': ['very', 'extremely', 'really', 'quite', 'rather', 'pretty', 'absolutely'],
            'french': ['très', 'extrêmement', 'vraiment', 'assez', 'plutôt', 'absolument'],
            'arabic': ['جداً', 'جدا', 'كثيراً', 'كثيرا', 'للغاية', 'تماماً']
        }
        
        # Analyze tokens
        for token in doc:
            # POS distribution
            features['pos_distribution'][token.pos_] += 1
            
            # Find negations
            if token.dep_ == 'neg':
                features['negations'].append(token.text)
            
            # Find intensifiers
            if language in intensifiers and token.lemma_.lower() in intensifiers[language]:
                features['intensifiers'].append(token.text)
        
        # Get sentiment-bearing words (simplified approach)
        sentiment_doc = nlp(text)
        if abs(sentiment_doc._.blob.polarity) > 0.1:
            for token in doc:
                if token.pos_ in ['ADJ', 'VERB', 'NOUN'] and not token.is_stop:
                    token_sent = nlp(token.text)
                    if abs(token_sent._.blob.polarity) > 0.2:
                        features['sentiment_words'].append({
                            'word': token.text,
                            'pos': token.pos_,
                            'sentiment': token_sent._.blob.polarity
                        })
        
        print(f"Negations found: {features['negations']}")
        print(f"Intensifiers found: {features['intensifiers']}")
        print(f"Key sentiment words: {[w['word'] for w in features['sentiment_words'][:5]]}")
        print(f"POS distribution: {dict(features['pos_distribution'])}")
        
        return features

# Initialize the analyzer
analyzer = MultilingualSentimentAnalyzer()

# Example texts for different languages with varying sentiments
example_texts = {
    'english': {
        'positive': "I absolutely love this amazing product! It exceeded all my expectations and made my life so much better.",
        'negative': "This terrible service completely ruined my day. I'm extremely disappointed and frustrated.",
        'neutral': "The weather today is cloudy with a chance of rain. The temperature is around 20 degrees.",
        'mixed': "The hotel room was beautiful and clean, but the staff was quite rude and unhelpful."
    },
    'french': {
        'positive': "J'adore absolument ce produit incroyable! Il a dépassé toutes mes attentes et a rendu ma vie tellement meilleure.",
        'negative': "Ce service terrible a complètement gâché ma journée. Je suis extrêmement déçu et frustré.",
        'neutral': "Le temps aujourd'hui est nuageux avec une possibilité de pluie. La température est d'environ 20 degrés.",
        'mixed': "La chambre d'hôtel était belle et propre, mais le personnel était assez impoli et inutile."
    },
    'arabic': {
        'positive': "أحب هذا المنتج الرائع جداً! لقد فاق كل توقعاتي وجعل حياتي أفضل بكثير.",
        'negative': "هذه الخدمة السيئة دمرت يومي تماماً. أنا محبط ومخيب الأمل للغاية.",
        'neutral': "الطقس اليوم غائم مع احتمال هطول أمطار. درجة الحرارة حوالي 20 درجة.",
        'mixed': "غرفة الفندق كانت جميلة ونظيفة، لكن الموظفين كانوا وقحين وغير مفيدين."
    }
}

# Run analysis for each language and sentiment type
print("="*80)
print("MULTILINGUAL SENTIMENT ANALYSIS WITH SPACY")
print("="*80)

all_results = []

for language in ['english', 'french', 'arabic']:
    if language in analyzer.models:
        print(f"\n{'#'*60}")
        print(f"ANALYZING {language.upper()} TEXTS")
        print(f"{'#'*60}")
        
        for sentiment_type, text in example_texts[language].items():
            print(f"\n--- {sentiment_type.upper()} EXAMPLE ---")
            result = analyzer.basic_sentiment_analysis(text, language)
            if result:
                result['expected_sentiment'] = sentiment_type
                all_results.append(result)

# Comparative analysis
print(f"\n{'#'*60}")
print("COMPARATIVE ANALYSIS")
print(f"{'#'*60}")

if all_results:
    df = pd.DataFrame(all_results)
    
    # Group by language
    print("\nAverage sentiment by language:")
    lang_summary = df.groupby('language').agg({
        'polarity': 'mean',
        'subjectivity': 'mean'
    }).round(3)
    print(lang_summary)
    
    # Group by expected sentiment
    print("\nAverage sentiment by expected category:")
    sentiment_summary = df.groupby('expected_sentiment').agg({
        'polarity': 'mean',
        'subjectivity': 'mean'
    }).round(3)
    print(sentiment_summary)

# Detailed analysis examples
detailed_examples = {
    'english': "The new restaurant has excellent food and amazing service, but the prices are quite expensive and the atmosphere is rather noisy.",
    'french': "Le nouveau restaurant a une excellente cuisine et un service formidable, mais les prix sont assez chers et l'atmosphère est plutôt bruyante.",
    'arabic': "المطعم الجديد لديه طعام ممتاز وخدمة رائعة، لكن الأسعار مرتفعة جداً والجو صاخب نوعاً ما."
}

for language, text in detailed_examples.items():
    if language in analyzer.models:
        print(f"\n{'='*80}")
        print(f"DETAILED ANALYSIS - {language.upper()}")
        print(f"{'='*80}")
        
        # Sentence-level analysis
        analyzer.sentence_level_sentiment(text, language)
        
        # Entity-based analysis
        analyzer.entity_sentiment_analysis(text, language)
        
        # Advanced features
        analyzer.advanced_sentiment_features(text, language)

# Batch processing example
print(f"\n{'='*80}")
print("BATCH PROCESSING EXAMPLE")
print(f"{'='*80}")

batch_texts = [
    ("english", "Love the new update!"),
    ("english", "Hate waiting in long queues."),
    ("french", "J'aime beaucoup ce film."),
    ("french", "Je déteste les embouteillages."),
    ("arabic", "أحب هذا الكتاب كثيراً."),
    ("arabic", "لا أحب هذا الطعام.")
]

batch_results = []
for language, text in batch_texts:
    if language in analyzer.models:
        result = analyzer.basic_sentiment_analysis(text, language)
        if result:
            batch_results.append(result)

if batch_results:
    batch_df = pd.DataFrame(batch_results)
    print(f"\nBatch Processing Results:")
    print(batch_df[['language', 'text', 'polarity', 'sentiment']])

print(f"\n{'='*80}")
print("ANALYSIS COMPLETE")
print(f"{'='*80}")

Post Views: 24

Lexsense

spaCy’s Approach to Sentiment Analysis

SpaCy’s Approach to Sentiment Analysis

spaCy’s Approach to Sentiment Analysis

Multilingual Support in spaCy

Comments

Leave a Reply Cancel reply

spaCy’s Approach to Sentiment Analysis

SpaCy’s Approach to Sentiment Analysis

spaCy’s Approach to Sentiment Analysis

Multilingual Support in spaCy

Related posts:

Comments

Leave a Reply Cancel reply