Plugin Best Practices

This guide covers best practices for developing high-quality Sifaka plugins.

General Principles

1. Follow the Interface

Always implement the complete interface:

# Good: Complete implementation
class MyCritic(CriticPlugin):
    async def critique(self, text: str, result: SifakaResult) -> CritiqueResult:
        # Implementation
        return CritiqueResult(...)

# Bad: Missing required method
class BadCritic(CriticPlugin):
    def analyze(self, text):  # Wrong method name
        pass

2. Handle Errors Gracefully

class RobustCritic(CriticPlugin):
    async def critique(self, text: str, result: SifakaResult) -> CritiqueResult:
        try:
            # Your logic here
            analysis = await self._analyze(text)
            return CritiqueResult(
                critic="robust_critic",
                feedback=analysis.feedback,
                suggestions=analysis.suggestions,
                needs_improvement=True,
                confidence=0.8
            )
        except Exception as e:
            # Return meaningful feedback even on error
            return CritiqueResult(
                critic="robust_critic",
                feedback=f"Analysis incomplete: {str(e)}",
                suggestions=["Please review the text manually"],
                needs_improvement=True,
                confidence=0.1  # Low confidence on error
            )

3. Be Async-First

# Good: Async implementation
class AsyncCritic(CriticPlugin):
    async def critique(self, text: str, result: SifakaResult) -> CritiqueResult:
        # Can use await for async operations
        data = await fetch_external_data()
        return CritiqueResult(...)

# Bad: Blocking operations
class BlockingCritic(CriticPlugin):
    async def critique(self, text: str, result: SifakaResult) -> CritiqueResult:
        # This blocks the event loop!
        time.sleep(5)
        data = requests.get("https://api.example.com").json()
        return CritiqueResult(...)

Critic Plugin Best Practices

1. Provide Clear Feedback

class ClearFeedbackCritic(CriticPlugin):
    async def critique(self, text: str, result: SifakaResult) -> CritiqueResult:
        issues = []

        # Specific, actionable feedback
        if len(text.split()) < 50:
            issues.append("Expand the text to at least 50 words for better context")

        if "however" in text.lower() and "but" in text.lower():
            issues.append("Avoid using both 'however' and 'but' - choose one for consistency")

        return CritiqueResult(
            critic="clear_feedback",
            feedback="Style and length analysis complete",
            suggestions=issues,
            needs_improvement=len(issues) > 0,
            confidence=0.9 if len(issues) == 0 else 0.6
        )

2. Use Confidence Appropriately

class ConfidenceCritic(CriticPlugin):
    async def critique(self, text: str, result: SifakaResult) -> CritiqueResult:
        # Calculate confidence based on analysis certainty
        word_count = len(text.split())

        if word_count < 10:
            confidence = 0.3  # Low confidence for very short text
        elif word_count > 1000:
            confidence = 0.7  # Medium confidence for long text
        else:
            confidence = 0.9  # High confidence for typical text

        return CritiqueResult(
            critic="confidence_aware",
            feedback="Analysis complete",
            suggestions=[],
            needs_improvement=False,
            confidence=confidence
        )

3. Consider Context

class ContextAwareCritic(CriticPlugin):
    async def critique(self, text: str, result: SifakaResult) -> CritiqueResult:
        # Look at previous iterations
        if result.iteration > 1:
            # Check if issues from previous critiques were addressed
            prev_suggestions = []
            for critique in result.critiques:
                prev_suggestions.extend(critique.suggestions)

            # Adjust feedback based on progress
            if len(prev_suggestions) > len(current_issues):
                feedback = "Good progress on addressing previous issues"
            else:
                feedback = "Some previous issues remain unaddressed"

        return CritiqueResult(...)

Validator Plugin Best Practices

1. Return Meaningful Scores

class ScoringValidator(ValidatorPlugin):
    async def validate(self, text: str) -> ValidationResult:
        word_count = len(text.split())

        # Graduated scoring, not just pass/fail
        if word_count < 50:
            score = 0.3
            details = "Too short - aim for 50+ words"
        elif word_count < 100:
            score = 0.7
            details = "Acceptable length, could be expanded"
        else:
            score = 1.0
            details = "Excellent length"

        return ValidationResult(
            validator="length_scorer",
            passed=score >= 0.5,
            score=score,
            details=details
        )

2. Provide Actionable Details

class DetailedValidator(ValidatorPlugin):
    async def validate(self, text: str) -> ValidationResult:
        issues = []

        # Check multiple criteria
        if not text[0].isupper():
            issues.append("Start with a capital letter")

        if text[-1] not in '.!?':
            issues.append("End with proper punctuation")

        sentences = text.split('.')
        if any(len(s.split()) > 30 for s in sentences):
            issues.append("Break up long sentences (30+ words)")

        return ValidationResult(
            validator="grammar_checker",
            passed=len(issues) == 0,
            score=1.0 - (len(issues) * 0.2),  # Deduct 20% per issue
            details="; ".join(issues) if issues else "All checks passed"
        )

Storage Plugin Best Practices

1. Handle Concurrent Access

class ThreadSafeStorage(StoragePlugin):
    def __init__(self):
        self._lock = asyncio.Lock()
        self._data = {}

    async def save(self, result: SifakaResult) -> str:
        async with self._lock:
            self._data[result.id] = result
            return result.id

    async def load(self, result_id: str) -> SifakaResult | None:
        async with self._lock:
            return self._data.get(result_id)

2. Implement Cleanup

class ManagedStorage(StoragePlugin):
    def __init__(self, max_age_hours: int = 24):
        self.max_age_hours = max_age_hours

    async def cleanup(self):
        """Remove old results"""
        cutoff = datetime.now() - timedelta(hours=self.max_age_hours)

        for result_id in await self.list_results():
            result = await self.load(result_id)
            if result and result.created_at < cutoff:
                await self.delete(result_id)

3. Handle Large Data

class EfficientStorage(StoragePlugin):
    async def save(self, result: SifakaResult) -> str:
        # Save large fields separately if needed
        if len(result.final_text) > 10000:
            # Store text separately
            text_id = await self._save_large_text(result.final_text)
            # Save reference in main result
            result_data = result.model_dump()
            result_data['final_text'] = f"ref:{text_id}"

        return await self._save_result(result_data)

Performance Best Practices

1. Cache Expensive Operations

class CachedCritic(CriticPlugin):
    def __init__(self):
        self._cache = {}

    async def critique(self, text: str, result: SifakaResult) -> CritiqueResult:
        # Cache key based on text hash
        cache_key = hashlib.md5(text.encode()).hexdigest()

        if cache_key in self._cache:
            cached = self._cache[cache_key]
            # Return cached result with updated confidence
            return CritiqueResult(
                **cached,
                confidence=cached['confidence'] * 0.9  # Slightly lower
            )

        # Perform expensive analysis
        result = await self._expensive_analysis(text)
        self._cache[cache_key] = result.model_dump()

        return result

2. Batch Operations

class BatchValidator(ValidatorPlugin):
    def __init__(self):
        self._queue = []
        self._results = {}

    async def validate(self, text: str) -> ValidationResult:
        # Add to queue
        text_id = str(uuid4())
        self._queue.append((text_id, text))

        # Batch process when queue is full
        if len(self._queue) >= 10:
            await self._process_batch()

        # Wait for result
        while text_id not in self._results:
            await asyncio.sleep(0.1)

        return self._results.pop(text_id)

Testing Best Practices

1. Test Edge Cases

import pytest

class TestMyCritic:
    @pytest.mark.asyncio
    async def test_empty_text(self):
        critic = MyCritic()
        result = await critic.critique("", SifakaResult(...))
        assert result.confidence < 0.5

    @pytest.mark.asyncio
    async def test_very_long_text(self):
        critic = MyCritic()
        long_text = "word " * 10000
        result = await critic.critique(long_text, SifakaResult(...))
        assert result.feedback  # Should handle gracefully

2. Mock External Dependencies

class TestExternalCritic:
    @pytest.mark.asyncio
    async def test_api_failure(self, mocker):
        # Mock external API
        mocker.patch('aiohttp.ClientSession.get',
                    side_effect=Exception("API Error"))

        critic = ExternalAPICritic()
        result = await critic.critique("test", SifakaResult(...))

        # Should handle failure gracefully
        assert result.confidence < 0.5
        assert "error" in result.feedback.lower()

Documentation Best Practices

1. Include Examples

class WellDocumentedCritic(CriticPlugin):
    """
    Checks text for technical accuracy.

    Example:
        >>> critic = TechnicalAccuracyCritic()
        >>> result = await critic.critique(
        ...     "Python uses tabs for indentation",
        ...     SifakaResult(...)
        ... )
        >>> print(result.suggestions)
        ['Python typically uses 4 spaces, not tabs']
    """

    async def critique(self, text: str, result: SifakaResult) -> CritiqueResult:
        # Implementation
        pass

2. Document Parameters

class ConfigurableCritic(CriticPlugin):
    """
    A critic that checks for specific patterns.

    Args:
        patterns: List of regex patterns to check
        severity: How strictly to evaluate (0.0-1.0)

    Raises:
        ValueError: If severity is not between 0 and 1
    """

    def __init__(self, patterns: list[str], severity: float = 0.5):
        if not 0 <= severity <= 1:
            raise ValueError("Severity must be between 0 and 1")
        self.patterns = patterns
        self.severity = severity

Security Best Practices

1. Validate Input

class SecureCritic(CriticPlugin):
    async def critique(self, text: str, result: SifakaResult) -> CritiqueResult:
        # Validate input
        if not isinstance(text, str):
            raise TypeError("Text must be a string")

        if len(text) > 1_000_000:  # 1MB limit
            raise ValueError("Text too large")

        # Sanitize if needed
        safe_text = self._sanitize(text)

        # Continue with analysis
        return await self._analyze(safe_text)

2. Don't Expose Sensitive Data

class PrivacyCritic(CriticPlugin):
    async def critique(self, text: str, result: SifakaResult) -> CritiqueResult:
        # Don't include sensitive data in feedback
        if self._contains_pii(text):
            return CritiqueResult(
                critic="privacy_critic",
                feedback="Text contains sensitive information",
                suggestions=["Remove personal identifiable information"],
                needs_improvement=True,
                confidence=1.0
            )

        # Regular analysis for safe text
        return await self._normal_critique(text, result)

Summary

Key takeaways for plugin development:

Be Async: Use async/await properly
Handle Errors: Always return valid results
Provide Value: Clear, actionable feedback
Test Thoroughly: Edge cases and failures
Document Well: Examples and parameter docs
Think Performance: Cache and batch when appropriate
Stay Secure: Validate input and protect data