diff --git a/.cursor/.DS_Store b/.cursor/.DS_Store new file mode 100644 index 0000000000..231b48d052 Binary files /dev/null and b/.cursor/.DS_Store differ diff --git a/.cursor/rules/backend-technical-stack.mdc b/.cursor/rules/backend-technical-stack.mdc new file mode 100644 index 0000000000..be546ad575 --- /dev/null +++ b/.cursor/rules/backend-technical-stack.mdc @@ -0,0 +1,230 @@ +--- +description: Technical Stack Specification for the /backend. +globs: backend/* +alwaysApply: false +--- +## 1. Technology Stack Overview + +| Component | Technology | Version | Purpose | +|-----------|------------|---------|---------| +| Framework | FastAPI | 0.114.2+ | Web API framework | +| ORM | SQLModel | 0.0.21+ | Database ORM | +| Primary Database | PostgreSQL | 13+ | Relational database | +| Document Database | MongoDB | 6.0+ | Social media content storage | +| In-memory Database | Redis | 7.0+ | Caching and real-time operations | +| Vector Database | Pinecone | Latest | Semantic content analysis | +| Authentication | JWT | 2.8.0+ | User authentication | +| Password Hashing | Passlib + Bcrypt | 1.7.4+ | Secure password storage | +| Dependency Management | uv | 0.5.11+ | Package management | +| Migrations | Alembic | 1.12.1+ | Database schema migrations | +| API Documentation | OpenAPI/Swagger | Built-in | API documentation | +| Error Tracking | Sentry | 1.40.6+ | Error reporting | +| Email Delivery | emails | 0.6+ | Email notifications | +| Testing | pytest | 7.4.3+ | Unit and integration testing | +| Linting | ruff | 0.2.2+ | Code quality | +| Type Checking | mypy | 1.8.0+ | Static type checking | +| Task Queue | Celery | 5.3.0+ | Asynchronous task processing | +| Message Broker | RabbitMQ | 3.12+ | Task distribution | +| Stream Processing | Apache Kafka | 3.4+ | Real-time data streaming | +| NLP Processing | spaCy + Transformers | 3.6+ / 4.28+ | Content analysis | + +## 2. Architecture + +### 2.1 Architectural Pattern + +The application follows a clean architecture pattern with clear separation of concerns: + +``` +Client Request → API Layer → Service Layer → Repository Layer → Database + ↑ + Schema Layer +``` + +### 2.2 Key Components + +- **API Layer**: Handles HTTP requests/responses, input validation, and routing +- **Service Layer**: Contains business logic and orchestrates repositories +- **Repository Layer**: Provides data access patterns and database operations +- **Schema Layer**: Defines data contracts for API input/output +- **Model Layer**: Defines database models and relationships + +### 2.3 Directory Structure + +``` +/app +├── api/ # API endpoints and routing +│ ├── api_v1/ # API version 1 +│ │ ├── endpoints/ # Resource endpoints +│ │ └── api.py # Router configuration +│ ├── deps.py # Dependency injection +│ └── main.py # Main router +├── core/ # Core components +│ ├── config.py # Environment configuration +│ ├── security.py # Authentication +│ └── errors.py # Error handling +├── db/ # Database configuration +│ ├── models/ # SQLModel definitions +│ └── session.py # Database session management +├── schemas/ # Pydantic models for API +├── services/ # Business logic +│ └── repositories/ # Data access layer +├── tasks/ # Celery tasks for background processing +│ ├── scraping/ # Social media scraping tasks +│ ├── analysis/ # Content analysis tasks +│ └── notifications/ # Alert and notification tasks +├── processing/ # Data processing components +│ ├── models/ # ML model wrappers +│ ├── streams/ # Kafka stream processors +│ └── embeddings/ # Vector embedding generators +├── worker.py # Celery worker configuration +└── main.py # Application entry point +``` + +## 3. Database Design + +### 3.1 Hybrid Database Technology + +The application employs a hybrid database architecture to address the diverse data requirements of political social media analysis: + +| Component | Technology | Version | Purpose | +|-----------|------------|---------|---------| +| Relational Database | PostgreSQL | 13+ | Entity data and relationships | +| Document Database | MongoDB | 6.0+ | Social media content and engagement | +| In-memory Database | Redis | 7.0+ | Caching and real-time operations | +| Vector Database | Pinecone | Latest | Semantic similarity analysis | + +Refer to `database-architecture.mdc` for detailed implementation specifications. + +### 3.2 Primary Domain Models + +- **User**: Authentication and user profile data +- **PoliticalEntity**: Political figures, organizations, and campaigns +- **SocialMediaAccount**: Platform account connections +- **EntityRelationship**: Relationships between political entities + +### 3.3 Key Design Decisions + +- **UUID Primary Keys**: All entities use UUID primary keys for security and distributed system compatibility +- **Cross-database References**: Consistent reference patterns between databases +- **Relationship Management**: Proper foreign key constraints with cascade delete +- **String Field Constraints**: Appropriate length limits on all VARCHAR fields +- **Migration Strategy**: Alembic for version-controlled schema changes + +### 3.4 Additional Dependencies + +| Dependency | Version | Purpose | +|------------|---------|---------| +| motor | 3.2.0+ | Async MongoDB driver | +| redis | 4.6.0+ | Redis client | +| pinecone-client | 2.2.1+ | Pinecone Vector DB client | +| pymongo | 4.5.0+ | MongoDB client | + +Refer to `data-processing-architecture.mdc` for details on processing pipelines and analysis components. + +## 4. API Design + +### 4.1 API Structure + +- RESTful API design principles +- Resource-based URL structure +- Version prefixing (`/api/v1/...`) +- OpenAPI documentation + +### 4.2 Response Format + +All API responses follow a standardized format: + +```json +{ + "data": { ... }, // Actual response payload + "success": true, // Success indicator + "message": "Description" // Human-readable message +} +``` + +Error responses: + +```json +{ + "success": false, + "error": "Error message", + "details": { ... } // Additional error details +} +``` + +### 4.3 Authentication + +- JWT token-based authentication +- OAuth2 password flow with Bearer token +- Role-based access control (standard user vs superuser) + +## 5. Performance Considerations + +### 5.1 Database Optimization + +- Efficient query patterns via repositories +- Appropriate indexing of frequently queried fields +- Pagination for list endpoints + +### 5.2 API Performance + +- Async/await support for I/O bound operations +- Connection pooling for database operations +- Request validation at the edge + +## 6. Security Features + +### 6.1 Authentication & Authorization + +- Password hashing with bcrypt +- JWT with appropriate expiration +- Principle of least privilege in endpoint permissions + +### 6.2 Data Protection + +- Input validation using Pydantic +- CORS protection +- Email-based password recovery flow + +## 7. Development Workflow + +### 7.1 Environment Setup + +- Containerized development with Docker +- Environment-specific configuration +- Virtual environment management with uv + +### 7.2 Quality Assurance + +- Type checking with mypy +- Linting with ruff +- Automated testing with pytest +- Pre-commit hooks + +## 8. Deployment Strategy + +### 8.1 Containerization + +- Docker-based deployment +- Multi-stage build for optimized image size +- Environment variable configuration + +### 8.2 Monitoring + +- Sentry integration for error tracking +- Health check endpoints +- Structured logging + +## 9. Scalability Considerations + +### 9.1 Horizontal Scaling + +- Stateless API design +- Database connection pooling +- External state management + +### 9.2 Future Extensibility + +- Modular service architecture +- Clear separation of concerns +- Version-prefixed API endpoints \ No newline at end of file diff --git a/.cursor/rules/data-processing-architecture.mdc b/.cursor/rules/data-processing-architecture.mdc new file mode 100644 index 0000000000..1100dec26c --- /dev/null +++ b/.cursor/rules/data-processing-architecture.mdc @@ -0,0 +1,333 @@ +--- +description: Data Processing Architecture Specification for the Political Social Media Analysis Platform. +globs: backend/tasks/*, backend/processing/* +alwaysApply: false +--- +# Data Processing Architecture + +## 1. Technology Stack Overview + +| Component | Technology | Version | Purpose | +|-----------|------------|---------|---------| +| Task Queue | Celery | 5.3.0+ | Asynchronous task processing | +| Message Broker | RabbitMQ | 3.12+ | Task distribution and messaging | +| Stream Processing | Apache Kafka | 3.4+ | Real-time event streaming | +| Text Processing | spaCy | 3.6+ | NLP and entity recognition | +| Sentiment Analysis | Transformers | 4.28+ | Content sentiment detection | +| Vector Embeddings | sentence-transformers | 2.2.2+ | Text embedding generation | +| Machine Learning | scikit-learn | 1.2+ | Classification and regression | +| Full-Text Search | MongoDB Atlas Search | N/A | Content search capabilities | + +## 2. Processing Pipeline Components + +### 2.1 Data Collection Components + +- **Platform-specific scrapers**: Modular adapters for each social media platform +- **Rate limiters**: Respects platform API constraints +- **Scheduled collection**: Configurable intervals for data collection +- **Content processors**: Standardizes data from different platforms + +### 2.2 Analysis Components + +- **Sentiment analyzer**: Determines content sentiment +- **Topic modeler**: Identifies content themes and categories +- **Entity recognizer**: Detects mentions of political entities +- **Vector embedder**: Generates semantic representations +- **Relationship mapper**: Builds entity relationship graphs + +### 2.3 Real-time Components + +- **Stream processors**: Kafka consumers for real-time analysis +- **Alert generators**: Triggers based on configurable thresholds +- **Metric calculators**: Real-time engagement statistics +- **Notification services**: Delivery of critical alerts + +## 3. Task Distribution + +### 3.1 Task Queue Design + +Celery task queues with priority-based routing: + +| Queue Name | Priority | Purpose | +|------------|----------|---------| +| scraping | High | Content collection from social platforms | +| analysis | Medium | Content processing and analysis | +| embeddings | Low | Vector embedding generation | +| alerts | Critical | Real-time notification processing | +| reports | Low | Scheduled report generation | + +### 3.2 Task Implementation Pattern + +```python +@app.task(queue="analysis", rate_limit="100/m") +def analyze_sentiment(post_id: str, text: str): + """ + Analyze the sentiment of a social media post. + + Args: + post_id: The MongoDB ID of the post + text: The text content to analyze + + Returns: + Dict containing sentiment scores and emotional classification + """ + # Sentiment analysis implementation + sentiment_score = sentiment_model.predict(text) + + # Update the post in MongoDB with sentiment results + mongodb.posts.update_one( + {"_id": ObjectId(post_id)}, + {"$set": {"analysis.sentiment_score": sentiment_score}} + ) + + # Return result for potential chaining + return { + "post_id": post_id, + "sentiment_score": sentiment_score + } +``` + +## 4. Stream Processing Design + +### 4.1 Kafka Topic Design + +| Topic | Purpose | Retention | Partitioning | +|-------|---------|-----------|--------------| +| social-media-raw | Raw content from platforms | 7 days | By platform and entity | +| entity-mentions | Mentions of tracked entities | 30 days | By mentioned entity | +| sentiment-changes | Significant sentiment shifts | 30 days | By entity | +| engagement-metrics | Real-time engagement updates | 2 days | By entity | + +### 4.2 Stream Processing Pattern + +```python +def process_sentiment_stream(): + """ + Process the sentiment stream to detect significant changes. + """ + consumer = KafkaConsumer( + 'social-media-raw', + bootstrap_servers='kafka:9092', + group_id='sentiment-analyzer', + auto_offset_reset='latest' + ) + + for message in consumer: + # Decode message + post = json.loads(message.value) + + # Calculate sentiment + sentiment = analyze_content(post['content']['text']) + + # Check for significant changes + if is_significant_change(post, sentiment): + # Publish to sentiment-changes topic + publish_sentiment_change(post, sentiment) + + # Generate alert if needed + if requires_alert(post, sentiment): + generate_alert(post, sentiment) +``` + +## 5. Machine Learning Implementation + +### 5.1 Model Management + +- **Model Registry**: Central repository for trained models +- **Versioning**: Tracking model versions and performance +- **A/B Testing**: Framework for evaluating model improvements +- **Automated Retraining**: Scheduled model updates + +### 5.2 Core Models + +| Model | Purpose | Architecture | Training Data | +|-------|---------|--------------|--------------| +| Sentiment Analyzer | Content sentiment scoring | Fine-tuned transformer | Labeled political content | +| Topic Classifier | Content categorization | Multi-label classification | Domain-specific corpus | +| Entity Relationship | Relationship scoring | Graph neural network | Historical interaction data | +| Audience Segmenter | User clustering | Unsupervised model | Engagement patterns | +| Performance Predictor | Engagement prediction | Gradient boosting | Historical post performance | + +### 5.3 Vector Embedding Process + +```python +@app.task(queue="embeddings") +def generate_embedding(content_id: str, content_type: str, text: str): + """ + Generate vector embedding for text content. + + Args: + content_id: MongoDB ID of the content + content_type: Type of content (post, comment) + text: Text to embed + + Returns: + ID of the created vector entry + """ + # Generate embedding + embedding = embedding_model.encode(text) + + # Get metadata from MongoDB + if content_type == "post": + content = mongodb.posts.find_one({"_id": ObjectId(content_id)}) + else: + content = mongodb.comments.find_one({"_id": ObjectId(content_id)}) + + # Create metadata for vector DB + metadata = { + "content_type": content_type, + "source_id": str(content["_id"]), + "entity_id": content.get("account_id"), + "platform": content["platform"], + "created_at": content["metadata"]["created_at"], + "topics": content.get("analysis", {}).get("topics", []), + "sentiment_score": content.get("analysis", {}).get("sentiment_score") + } + + # Store in vector database + vector_id = vector_client.upsert( + vectors=[embedding.tolist()], + metadata=metadata, + namespace="social_content" + ) + + # Update reference in MongoDB + collection = mongodb.posts if content_type == "post" else mongodb.comments + collection.update_one( + {"_id": ObjectId(content_id)}, + {"$set": {"vector_id": vector_id}} + ) + + return vector_id +``` + +## 6. Search Implementation + +### 6.1 MongoDB Atlas Search Configuration + +```javascript +// Search index configuration +{ + "mappings": { + "dynamic": false, + "fields": { + "content.text": { + "type": "string", + "analyzer": "lucene.standard", + "searchAnalyzer": "lucene.standard" + }, + "metadata.location.country": { + "type": "string" + }, + "metadata.language": { + "type": "string" + }, + "analysis.topics": { + "type": "string" + }, + "analysis.entities_mentioned": { + "type": "string" + } + } + } +} +``` + +### 6.2 Search Implementation + +```python +async def search_content(query: str, filters: dict = None): + """ + Search social media content using MongoDB Atlas Search. + + Args: + query: Text query to search for + filters: Optional filters to apply (topics, entities, etc.) + + Returns: + List of matching documents + """ + search_pipeline = [ + { + "$search": { + "index": "social_content", + "text": { + "query": query, + "path": "content.text" + } + } + } + ] + + # Add filters if provided + if filters: + search_pipeline.append({"$match": filters}) + + # Add projection to limit fields returned + search_pipeline.append({ + "$project": { + "_id": 1, + "content": 1, + "metadata": 1, + "analysis": 1, + "score": {"$meta": "searchScore"} + } + }) + + # Execute search + results = await mongodb.posts.aggregate(search_pipeline).to_list(length=50) + return results +``` + +## 7. Performance Considerations + +### 7.1 Resource Allocation + +| Component | CPU Allocation | Memory Allocation | Scaling Trigger | +|-----------|---------------|-------------------|-----------------| +| Scraping Workers | Medium | Low | Queue depth > 1000 tasks | +| Analysis Workers | High | High | Queue depth > 500 tasks | +| Vector Workers | High | Medium | Queue depth > 200 tasks | +| Stream Processors | Medium | High | Consumer lag > 1000 messages | + +### 7.2 Processing Patterns + +- **Real-time processing**: Critical alerts, high-priority entity updates +- **Near real-time processing**: Sentiment analysis, engagement metrics +- **Batch processing**: Vector embedding, relationship analysis, historical trends + +### 7.3 Rate Limiting + +- Platform-specific API rate limits +- Resource-based rate limits for compute-intensive tasks +- Prioritization of critical entity monitoring + +## 8. Monitoring and Observability + +### 8.1 Key Metrics + +- Task processing rates and success/failure ratios +- Model inference latency and throughput +- Stream processing lag and throughput +- Database operation latency +- Queue depths and processing backlogs + +### 8.2 Implementation Strategy + +- Structured logging with correlation IDs +- Error tracking with Sentry integration +- Performance monitoring with Prometheus +- Worker monitoring with Flower for Celery +- Custom health check endpoints for services + +## 9. Additional Dependencies + +| Dependency | Version | Purpose | +|------------|---------|---------| +| celery | 5.3.0+ | Task queue library | +| kafka-python | 2.0.2+ | Kafka client | +| spacy | 3.6.0+ | Natural language processing | +| transformers | 4.28.0+ | Machine learning models | +| scikit-learn | 1.2.0+ | Classical machine learning tools | +| torch | 2.0.0+ | Deep learning framework | +| sentence-transformers | 2.2.2+ | Text embedding generation | \ No newline at end of file diff --git a/.cursor/rules/database-architecture.mdc b/.cursor/rules/database-architecture.mdc new file mode 100644 index 0000000000..2d719956ee --- /dev/null +++ b/.cursor/rules/database-architecture.mdc @@ -0,0 +1,205 @@ +--- +description: Hybrid Database Architecture Specification for the Political Social Media Analysis Platform. +globs: backend/db/* +alwaysApply: false +--- +# Hybrid Database Architecture + +## 1. Database Technologies + +| Component | Technology | Version | Purpose | +|-----------|------------|---------|---------| +| Relational Database | PostgreSQL | 13+ | Entity data and relationships | +| Document Database | MongoDB | 6.0+ | Social media content and engagement | +| In-memory Database | Redis | 7.0+ | Caching and real-time operations | +| Vector Database | Pinecone | Latest | Semantic similarity analysis | + +## 2. Relational Database Design + +### 2.1 Primary Technology + +PostgreSQL with SQLModel ORM integration + +### 2.2 Key Design Decisions + +- **UUID Primary Keys**: All entities use UUID primary keys for security and distributed system compatibility +- **Relationship Management**: Proper foreign key constraints with cascade delete +- **String Field Constraints**: Appropriate length limits on all VARCHAR fields +- **Migration Strategy**: Alembic for version-controlled schema changes + +### 2.3 Domain Models + +```python +class PoliticalEntity(SQLModel, table=True): + id: UUID = Field(default_factory=uuid4, primary_key=True) + name: str = Field(index=True) + entity_type: str # politician, party, organization + platforms: List["SocialMediaAccount"] = Relationship(back_populates="entity") + relationships: List["EntityRelationship"] = Relationship(back_populates="source_entity") + +class SocialMediaAccount(SQLModel, table=True): + id: UUID = Field(default_factory=uuid4, primary_key=True) + platform: str # twitter, facebook, instagram, etc. + platform_id: str = Field(index=True) # platform-specific identifier + handle: str + entity_id: UUID = Field(foreign_key="politicalentity.id") + entity: PoliticalEntity = Relationship(back_populates="platforms") + +class EntityRelationship(SQLModel, table=True): + id: UUID = Field(default_factory=uuid4, primary_key=True) + source_entity_id: UUID = Field(foreign_key="politicalentity.id") + target_entity_id: UUID = Field(foreign_key="politicalentity.id") + relationship_type: str # ally, opponent, neutral + strength: float # normalized relationship strength + last_updated: datetime = Field(default_factory=datetime.utcnow) + source_entity: PoliticalEntity = Relationship(back_populates="relationships") +``` + +## 3. Document Database Design + +### 3.1 Primary Technology + +MongoDB for flexible document storage and querying + +### 3.2 Key Collections + +- **posts**: Social media posts from tracked accounts +- **comments**: User comments on tracked posts +- **metrics**: Aggregated engagement statistics +- **topics**: Topic analysis results and trends + +### 3.3 Schema Patterns + +**Post Document Example:** +```javascript +{ + "_id": ObjectId, + "platform_id": String, // Original ID from the platform + "platform": String, // twitter, facebook, etc. + "account_id": String, // Reference to PostgreSQL SocialMediaAccount.id + "content_type": String, // post, story, video, etc. + "content": { + "text": String, + "media": Array, // URLs to media content + "links": Array // External links + }, + "metadata": { + "created_at": Date, + "location": Object, + "language": String, + "client": String + }, + "engagement": { + "likes": Number, + "shares": Number, + "comments": Number, + "engagement_rate": Number + }, + "analysis": { + "sentiment_score": Number, + "topics": Array, + "entities_mentioned": Array, + "key_phrases": Array, + "emotional_tone": String + }, + "vector_id": String // Reference to vector database entry +} +``` + +### 3.4 Indexing Strategy + +- Compound index on `platform` and `account_id` +- Compound index on `metadata.created_at` and `account_id` +- Text index on `content.text` for content search +- Single field indexes on `engagement` metrics + +## 4. In-memory Database Design + +### 4.1 Primary Technology + +Redis for caching, real-time metrics and messaging + +### 4.2 Key Data Structures + +- **Hash maps**: Entity and post metrics (`entity:{id}:metrics`) +- **Sorted sets**: Trending topics and influencers (`trending:topics:{timeframe}`) +- **Lists**: Recent activity streams (`activity:entity:{id}`) +- **Pub/Sub channels**: Real-time alerts and notifications + +### 4.3 Caching Strategy + +- Time-based expiration for volatile metrics +- LRU eviction policy for cached data +- Write-through cache for critical metrics + +## 5. Vector Database Design + +### 5.1 Primary Technology + +Pinecone or similar vector database for semantic similarity analysis + +### 5.2 Embedding Strategy + +- Text embeddings using sentence-transformers +- 1536-dimension vectors for high-fidelity similarity +- Namespaces separated by content type +- Metadata filtering for efficient queries + +### 5.3 Vector Schema + +```javascript +{ + "id": String, // Unique identifier + "values": Array, // Embedding vector + "metadata": { + "content_type": String, // post, comment + "source_id": String, // MongoDB ID of source content + "entity_id": String, // PostgreSQL ID of political entity + "platform": String, + "created_at": Date, + "topics": Array, + "sentiment_score": Number + } +} +``` + +## 6. Cross-Database Integration + +### 6.1 Reference Patterns + +- PostgreSQL → MongoDB: UUID references stored as strings +- MongoDB → Vector DB: Document IDs linked to vector entries +- All DBs → Redis: Consistent key format for entity references + +### 6.2 Synchronization Strategy + +- PostgreSQL as the source of truth for entity data +- MongoDB change streams for data propagation +- Redis as intermediary for real-time updates +- Periodic reconciliation for data consistency + +### 6.3 Transaction Management + +- Two-phase commit for critical cross-database operations +- Eventual consistency model for non-critical updates +- Compensating transactions for error recovery + +## 7. Performance Optimization + +### 7.1 Query Optimization + +- Materialized views for frequent analytical queries +- Denormalization of frequently accessed data +- Query result caching with Redis + +### 7.2 Sharding Strategy + +- MongoDB sharded by entity and time period +- Vector database partitioned by content domains +- Redis cluster for horizontal scaling + +### 7.3 Connection Pooling + +- Optimized connection pools for each database +- Connection reuse across related operations +- Graceful handling of connection failures \ No newline at end of file diff --git a/.cursor/rules/frontend-technical-stack.mdc b/.cursor/rules/frontend-technical-stack.mdc new file mode 100644 index 0000000000..ce97676c81 --- /dev/null +++ b/.cursor/rules/frontend-technical-stack.mdc @@ -0,0 +1,123 @@ +--- +description: Technical Stack Specification for the frontend. +globs: frontend/* +alwaysApply: false +--- +# Technical Stack Specification - Frontend + +## 1. Core Technologies + +| Technology | Version | Purpose | +|------------|---------|---------| +| React | 18.2.0 | UI library for building component-based user interfaces | +| TypeScript | 5.2.2 | Static type checking for JavaScript | +| Vite | 5.4.14 | Modern frontend build tool and development server | + +## 2. UI Framework and Styling + +| Technology | Version | Purpose | +|------------|---------|---------| +| Chakra UI | 3.8.0 | Component library providing accessible UI components | +| @emotion/react | 11.14.0 | CSS-in-JS library used by Chakra UI | +| react-icons | 5.4.0 | Icon library with support for multiple icon sets | + +## 3. Data Fetching and State Management + +| Technology | Version | Purpose | +|------------|---------|---------| +| TanStack Query | 5.28.14 | Data fetching, caching, and state management | +| TanStack Query DevTools | 5.28.14 | Development tools for React Query | +| Axios | 1.7.4 | HTTP client for API requests | + +## 4. Routing and Navigation + +| Technology | Version | Purpose | +|------------|---------|---------| +| TanStack Router | 1.19.1 | Type-safe routing library | +| TanStack Router DevTools | 1.19.1 | Development tools for TanStack Router | + +## 5. Form Handling + +| Technology | Version | Purpose | +|------------|---------|---------| +| React Hook Form | 7.49.3 | Form state management and validation | + +## 6. API Integration + +| Technology | Version | Purpose | +|------------|---------|---------| +| @hey-api/openapi-ts | 0.57.0 | OpenAPI client generator | +| form-data | 4.0.0 | Library to create form data for API requests | + +## 7. Testing + +| Technology | Version | Purpose | +|------------|---------|---------| +| Playwright | 1.45.2 | End-to-end testing framework | + +## 8. Code Quality and Development Tools + +| Technology | Version | Purpose | +|------------|---------|---------| +| Biome | 1.6.1 | JavaScript linter and formatter | +| next-themes | 0.4.4 | Theme management utility | +| react-error-boundary | 4.0.13 | Error handling for React components | +| dotenv | 16.4.5 | Environment variable management | + +## 9. Build and Deployment + +| Technology | Version | Purpose | +|------------|---------|---------| +| Docker | Latest | Containerization for consistent environments | +| Nginx | 1.x | Web server for serving static assets in production | + +## 10. Development Environment + +| Requirement | Details | +|-------------|---------| +| Node.js Version | 20.x (specified in .nvmrc) | +| Package Manager | npm | +| Environment Variables | VITE_API_URL for API endpoint configuration | + +## 11. Project Structure + +``` +frontend/ +├── src/ +│ ├── assets/ # Static assets +│ ├── client/ # Auto-generated OpenAPI client +│ ├── components/ # Reusable React components +│ ├── hooks/ # Custom React hooks +│ ├── routes/ # Application routes and pages +├── public/ # Static files served directly +├── tests/ # End-to-end tests with Playwright +``` + +## 12. API Integration Workflow + +1. Backend API is defined using FastAPI and OpenAPI +2. OpenAPI schema is exported from the backend as openapi.json +3. Frontend client is generated using @hey-api/openapi-ts +4. Generated client provides type-safe interfaces for API calls +5. TanStack Query is used to manage API call state and caching + +## 13. Development Workflow + +1. Local development uses Vite's development server +2. API requests are directed to the backend server (configurable via VITE_API_URL) +3. Code quality is enforced using Biome +4. End-to-end testing is performed with Playwright + +## 14. Deployment Strategy + +1. Production builds are created using Vite's build process +2. Docker multi-stage build optimizes the container size +3. Nginx serves the static files in production +4. Configuration can be modified via environment variables + +## 15. Testing Strategy + +1. End-to-end testing with Playwright +2. Test files are located in the /tests directory +3. Authentication state is preserved between tests +4. Tests can be run in UI mode or headless mode \ No newline at end of file diff --git a/.cursor/rules/next-implementations.mdc b/.cursor/rules/next-implementations.mdc new file mode 100644 index 0000000000..71ddeb1f6f --- /dev/null +++ b/.cursor/rules/next-implementations.mdc @@ -0,0 +1,111 @@ +--- +description: Next Implementations +globs: +alwaysApply: false +--- +# Concise Implementation Plan for Hybrid Database Architecture + +## Phase 1: Environment and Dependency Setup + +1. **Update requirements.txt** + - Add MongoDB drivers: `motor`, `pymongo` + - Add Redis client: `redis` + - Add Pinecone: `pinecone-client` + - Add task processing: `celery`, `kafka-python` + - Add ML/NLP: `spacy`, `transformers`, `sentence-transformers` + +2. **Update Docker environment** + - Modify Dockerfile to install dependencies + - Add services to docker-compose: + - MongoDB, Redis, RabbitMQ, Kafka + - Worker service for Celery tasks + +3. **Create configuration** + - Update `config.py` with connection settings for all databases + - Create connection utilities for MongoDB and Redis + +## Phase 2: Database Models Implementation + +1. **PostgreSQL Models** + - Create `PoliticalEntity`, `SocialMediaAccount`, `EntityRelationship` + - Set up UUID primary keys and relationships + - Create Pydantic schemas for API + +2. **MongoDB Documents** + - Define schemas for social media posts and comments + - Set up indexing strategy + - Create initialization script + +3. **Redis Data Structures** + - Define key patterns for metrics, trending data, and activity streams + - Create cache invalidation strategy + +## Phase 3: Repository Layer Implementation + +1. **Implement PostgreSQL Repositories** + - `PoliticalEntityRepository` with CRUD operations + - `EntityRelationshipRepository` for relationship management + +2. **Implement MongoDB Repositories** + - `PostRepository` for social media content + - `CommentRepository` for user comments + - Implement cross-database reference patterns + +3. **Implement Redis Service** + - Methods for caching frequent data + - Real-time counters and metrics + - Activity streams and trending topic tracking + +4. **Implement Vector Database Service** + - Connect to Pinecone + - Vector embedding generation + - Similarity search functionality + +## Phase 4: Task Processing Implementation + +1. **Set up Celery** + - Configure worker and task queues + - Set up task routing based on priority + +2. **Create Core Tasks** + - Data collection tasks (platform-specific scrapers) + - Content analysis tasks (sentiment, topic modeling) + - Embedding generation tasks + - Alert generation tasks + +3. **Implement Kafka Stream Processors** + - Stream processors for real-time monitoring + - Topic configuration for entity mentions, sentiment changes + - Consumer implementation for alerts + +## Phase 5: Integration and Testing + +1. **Build Cross-Database Services** + - Social media monitoring service + - Entity relationship analysis service + - Content similarity service + +2. **Create Test Data and Fixtures** + - Test data for each database + - Integration test scenarios + - Performance benchmarks + +3. **Implement API Endpoints** + - Entity management endpoints + - Content search and analysis endpoints + - Analytics dashboard endpoints + +## Implementation Order + +1. Start with basic PostgreSQL models and repositories +2. Add MongoDB integration for posts and comments +3. Implement Redis for caching and real-time features +4. Add vector database functionality for semantic analysis +5. Set up Celery and Kafka for processing pipelines + +## Testing Strategy + +- Unit test each repository layer individually +- Integration tests for cross-database operations +- Load testing for performance-critical components +- End-to-end tests for complete workflows \ No newline at end of file diff --git a/.cursor/rules/prd.mdc b/.cursor/rules/prd.mdc new file mode 100644 index 0000000000..49807f9631 --- /dev/null +++ b/.cursor/rules/prd.mdc @@ -0,0 +1,316 @@ +--- +description: PRD for the actual repository. +globs: +alwaysApply: false +--- +# Product Requirements Document +# Political Social Media Analysis Platform + +## Document History +| Version | Date | Author | Description | +|---------|------|--------|-------------| +| 1.0 | March 12, 2025 | | Initial PRD | + +## Overview +The Political Social Media Analysis Platform is a comprehensive application designed to scrape, analyze, and derive insights from political figures' social media presence. The platform collects posts and audience engagement data across multiple social media platforms, analyzes sentiment and relationships between political entities, and provides actionable intelligence for strategic communication planning. + +## Business Objectives +- Provide comprehensive social media intelligence for political campaigns and figures +- Enable data-driven decision making for future content and messaging strategies +- Track and analyze relationships between political entities (opponents and allies) +- Identify audience sentiment patterns to optimize communication strategies +- Deliver actionable insights to improve engagement and messaging effectiveness + +## Target Users +- Political campaign managers +- Political communications directors +- Policy advisors +- Political analysts +- Public relations specialists + +## User Stories + +### As a Campaign Manager +- I want to track all social media activity of our political figure across platforms +- I want to understand audience sentiment towards specific policy messages +- I want to compare our engagement metrics against political opponents +- I need to identify trending topics our audience cares about + +### As a Communications Director +- I want to see which messaging themes resonate most with our audience +- I need to identify potential PR issues before they escalate +- I want to track the effectiveness of our response to opponent messaging +- I need insights on optimal posting times and content formats + +### As a Political Analyst +- I want to map relationships between political figures based on social interactions +- I need to track evolving narratives on specific policy issues +- I want to identify influential supporters and detractors +- I need to analyze regional variations in audience response + +## Core Features + +### 1. Multi-Platform Data Collection +#### Description +Automated scraping system to collect posts, comments, and engagement metrics from multiple social media platforms. + +#### Requirements +- Support for Instagram, Facebook, TikTok, and Twitter/X +- Collection of posts, videos, comments, reactions, and shares +- Media content archiving (images, videos) +- Metadata extraction (posting time, location tags, mentioned accounts) +- Historical data backfilling capability + +#### Acceptance Criteria +- Successfully collects 99.5%+ of public posts from tracked accounts +- Captures all public comments on monitored posts +- Updates data at configurable intervals (minimum hourly) +- Maintains collection despite platform UI changes +- Properly handles rate limits and access restrictions + +### 2. Political Entity Relationship Mapping +#### Description +System to track, visualize, and analyze relationships between political figures based on mentions, interactions, and content similarity. + +#### Requirements +- Define relationship types (ally, opponent, neutral, evolving) +- Track direct mentions and indirect references +- Quantify relationship strength through interaction frequency +- Visualize network graphs of political relationships +- Track relationship changes over time + +#### Acceptance Criteria +- Accurately identifies relationships between tracked entities +- Updates relationship status based on new interactions +- Provides filterable visualization of relationship networks +- Generates alerts for significant relationship changes +- Supports manual relationship tagging to supplement automated analysis + +### 3. Sentiment Analysis Engine +#### Description +Advanced NLP system to analyze audience sentiment in comments and reactions to political content. + +#### Requirements +- Comment-level sentiment scoring (positive, negative, neutral) +- Emotion classification (anger, support, confusion, etc.) +- Aggregated sentiment metrics by post, topic, and time period +- Automated detection of sentiment shifts +- Topic-specific sentiment breakdowns + +#### Acceptance Criteria +- Sentiment classification with 85%+ accuracy compared to human analysts +- Real-time processing of new comments +- Identification of sentiment trends and anomalies +- Language support for Spanish as primary language, with English as secondary +- Ability to filter toxic or irrelevant comments + +### 4. Topic Modeling & Issue Tracking +#### Description +System to identify, categorize, and track discussion topics across social media content. + +#### Requirements +- Automatic topic extraction from posts and comments +- Classification of content by policy areas +- Tracking topic evolution over time +- Identification of emerging issues +- Comparison of topic engagement across platforms + +#### Acceptance Criteria +- Correctly categorizes 90%+ of content into relevant topics +- Identifies trending topics within 1 hour of emergence +- Tracks topic sentiment independently +- Correlates topics across different political entities +- Supports manual topic tagging and categorization + +### 5. Analysis Dashboard & Reporting +#### Description +Comprehensive visualization interface providing actionable insights from collected data. + +#### Requirements +- Overview dashboard with key performance metrics +- Entity-specific profile dashboards +- Comparative analysis tools +- Customizable report generation +- Data export functionality +- Alert configuration for critical metrics + +#### Acceptance Criteria +- Displays real-time and historical data +- Supports filtering by date range, platform, and entity +- Generates scheduled reports in PDF and Excel formats +- Allows bookmark saving of specific analysis views +- Maintains responsive performance with large datasets + +## Enhanced Features + +### 6. Real-time Monitoring +#### Description +Alert system for tracking sudden changes in sentiment or mentions by influential accounts. + +#### Requirements +- Configurable alert thresholds for sentiment changes +- Notification system for mentions by high-influence accounts +- Real-time monitoring dashboard +- Trend detection algorithms to identify viral potential +- Integration with external notification channels (email, SMS, app) + +#### Acceptance Criteria +- Detects significant sentiment shifts within 15 minutes +- Correctly identifies high-importance mentions with 95%+ accuracy +- Delivers alerts through configured channels within 5 minutes +- Provides context with each alert +- Supports alert customization by user role + +### 7. Campaign Effectiveness Metrics +#### Description +Advanced analytics to measure message resonance, audience growth, and predict content performance. + +#### Requirements +- Message resonance scoring across demographics +- Audience growth attribution models +- Content performance prediction +- A/B testing framework for message variations +- Conversion tracking (from awareness to engagement) + +#### Acceptance Criteria +- Provides quantifiable metrics for message effectiveness +- Tracks audience growth correlated with specific content strategies +- Predicts post performance with 80%+ accuracy +- Generates actionable recommendations for content optimization +- Supports campaign-level grouping and analysis + +### 8. Competitive Intelligence +#### Description +Tools to analyze and compare messaging strategies and effectiveness across political entities. + +#### Requirements +- Share of voice measurement across platforms +- Narrative comparison between entities +- Messaging gap analysis +- Audience overlap identification +- Response timing analysis + +#### Acceptance Criteria +- Accurately measures relative visibility of tracked entities +- Identifies messaging similarities and differences +- Highlights underserved topics with audience interest +- Tracks narrative evolution compared to competitors +- Provides actionable competitive positioning insights + +### 9. Historical Context +#### Description +Timeline views and analysis tools to track messaging evolution and effectiveness over time. + +#### Requirements +- Timeline visualization of messaging +- Crisis response effectiveness tracking +- Message consistency analysis +- Before/after analysis for major events +- Historical trend comparison + +#### Acceptance Criteria +- Displays comprehensive messaging history +- Allows comparison of multiple time periods +- Quantifies messaging consistency and evolution +- Identifies correlations between events and messaging changes +- Supports annotation of significant events + +### 10. Geographic Insights +#### Description +Tools to analyze regional variations in audience response and engagement. + +#### Requirements +- Regional sentiment mapping +- Demographic response analysis +- Location-based messaging effectiveness +- Geographic hot spots for specific topics +- Regional influence tracking + +#### Acceptance Criteria +- Maps engagement and sentiment to geographic regions +- Identifies regional variations in message effectiveness +- Provides insights for location-targeted messaging +- Tracks regional influence of political entities +- Supports filtering and comparison by region + +## Technical Requirements + +### Infrastructure +- Cloud-based deployment with scalability for traffic spikes +- Containerized architecture for consistent deployment +- Fault-tolerant design with redundancy for critical components +- Automated backup and disaster recovery + +### Security +- End-to-end encryption for all data +- Role-based access control +- Audit logging for all system actions +- Secure API authentication +- Regular security assessments + +### Performance +- Dashboard loading time < 3 seconds +- Data collection processing capacity of 10,000+ posts/hour +- Analysis processing of 100,000+ comments/hour +- Support for 100+ concurrent users +- 99.9% system uptime + +### Integration +- API access for external system integration +- Export formats: CSV, Excel, JSON +- Webhook support for real-time data sharing +- Email integration for reports and alerts +- Calendar integration for scheduling + +## Implementation Timeline + +### Phase 1: Foundation (Months 1-3) +- Core data collection infrastructure +- Basic data storage and processing +- Initial entity and relationship models +- Simple dashboard with basic metrics + +### Phase 2: Analysis Capabilities (Months 4-6) +- Sentiment analysis engine +- Topic modeling implementation +- Enhanced dashboard visualizations +- Basic reporting functionality + +### Phase 3: Advanced Features (Months 7-9) +- Relationship mapping visualization +- Campaign effectiveness metrics +- Competitive intelligence tools +- Alert system implementation + +### Phase 4: Refinement (Months 10-12) +- Geographic insights +- Historical context tools +- Performance optimization +- Enhanced reporting + +## Success Metrics +- System consistently captures >99% of relevant social media activity +- Sentiment analysis achieves >85% accuracy against human review +- Users report >50% reduction in time spent on manual social media analysis +- Platform identifies emerging issues 24+ hours before traditional methods +- Strategic recommendations achieve measurable improvement in engagement metrics + +## Assumptions & Constraints +- Public APIs or scraping capabilities remain available for target platforms +- Legal compliance with platform terms of service is maintained +- Processing capacity scales with data volume growth +- User adoption requires minimal training (<2 hours) +- System maintains compliance with relevant data privacy regulations + +## Open Questions +- How will the system handle platform API changes or limitations? +- What is the strategy for platforms that actively prevent scraping? +- How will we validate sentiment analysis accuracy? +- What is the approach for expanding language support beyond Spanish and English? +- How will we determine relationship classifications initially? + +## Appendix +- Glossary of Terms +- User Persona Details +- Competitive Analysis +- Technical Architecture Diagrams \ No newline at end of file diff --git a/.cursor/rules/server-architecture.mdc b/.cursor/rules/server-architecture.mdc new file mode 100644 index 0000000000..87ba4d828a --- /dev/null +++ b/.cursor/rules/server-architecture.mdc @@ -0,0 +1,350 @@ +--- +description: Server architectur of the project. +globs: +alwaysApply: false +--- +## 1. System Overview + +The Political Social Media Analysis Platform follows a modern, containerized microservices architecture designed for scalability, resilience, and maintainable development. This document outlines the overall system architecture, deployment strategy, and service interaction patterns. + +``` +┌───────────────────────┐ ┌───────────────────────┐ +│ │ │ │ +│ Frontend (React/TS) │◄────┤ Backend (FastAPI) │ +│ │ │ │ +└───────────────────────┘ └───────────┬───────────┘ + │ + ▼ +┌───────────────────────┐ ┌───────────────────────┐ +│ Database Layer │ │ Task Processing │ +│ │ │ │ +│ ┌─────────────────┐ │ │ ┌─────────────────┐ │ +│ │ PostgreSQL │ │ │ │ Celery Worker │ │ +│ │ (Relational) │ │ │ │ (Tasks) │ │ +│ └─────────────────┘ │ │ └─────────────────┘ │ +│ │ │ │ +│ ┌─────────────────┐ │ │ ┌─────────────────┐ │ +│ │ MongoDB │ │ │ │ Celery Beat │ │ +│ │ (Document) │ │ │ │ (Scheduling) │ │ +│ └─────────────────┘ │ │ └─────────────────┘ │ +│ │ │ │ +│ ┌─────────────────┐ │ │ ┌─────────────────┐ │ +│ │ Redis │ │ │ │ RabbitMQ │ │ +│ │ (Cache) │ │ │ │ (Message Queue)│ │ +│ └─────────────────┘ │ │ └─────────────────┘ │ +│ │ │ │ +│ ┌─────────────────┐ │ │ ┌─────────────────┐ │ +│ │ Pinecone │ │ │ │ Kafka │ │ +│ │ (Vector) │ │ │ │ (Streaming) │ │ +│ └─────────────────┘ │ │ └─────────────────┘ │ +│ │ │ │ +└───────────────────────┘ └───────────────────────┘ +``` + +## 2. Containerization Strategy + +### 2.1 Docker Compose Architecture + +The system uses Docker Compose for container orchestration with a dual-file approach: + +| File | Purpose | Usage | +|------|---------|-------| +| `docker-compose.yml` | Production-ready base configuration | Primary service definitions | +| `docker-compose.override.yml` | Development environment customizations | Automatically merged during development | + +### 2.2 Service Organization + +Services are organized into logical groups: + +1. **Frontend Services** + - React frontend application + +2. **Backend Services** + - FastAPI application + - Prestart initialization service + +3. **Database Services** + - PostgreSQL (relational data) + - MongoDB (document data) + - Redis (caching and real-time operations) + - Pinecone (vector embeddings) + +4. **Message Processing** + - RabbitMQ (message broker) + - Celery Worker (task execution) + - Celery Beat (task scheduling) + +5. **Stream Processing** + - Kafka (event streaming) + - Zookeeper (Kafka coordination) + +6. **Development Tools** + - Adminer (PostgreSQL management) + - MongoDB Express (MongoDB management) + - Traefik Proxy (API gateway) + - Mailcatcher (email testing) + - Celery Flower (task monitoring) + +### 2.3 Development vs. Production + +| Aspect | Development | Production | +|--------|------------|------------| +| Restart Policy | `restart: "no"` | `restart: always` | +| Port Exposure | Ports exposed to host | Only necessary ports exposed | +| Volume Mounts | Source code mounted | Built artifacts only | +| Network Configuration | Local networks | External Traefik network | +| Health Checks | Simple checks | Comprehensive checks with retries | +| Environment | Development settings | Production settings | +| Logging | Verbose logging | Production logging levels | + +## 3. Network Architecture + +### 3.1 Network Configuration + +``` +┌─────────────────────────────────────────────────────────────┐ +│ traefik-public │ +│ │ +│ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ +│ │ Traefik │ │ Frontend│ │ Backend │ │ Adminer │ │ +│ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────────┐ +│ default │ +│ │ +│ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ +│ │PostgreSQL│ │ MongoDB │ │ Redis │ │RabbitMQ │ │ +│ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │ +│ │ +│ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ +│ │ Kafka │ │Zookeeper│ │ Celery │ │ Celery │ │ +│ │ │ │ │ │ Worker │ │ Beat │ │ +│ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────┘ +``` + +### 3.2 Traefik Integration + +- **Production**: Uses external Traefik network with proper TLS termination +- **Development**: Includes local Traefik instance with insecure dashboard +- Routing follows pattern: `{service}.{domain}` → appropriate container + +### 3.3 HTTPS Configuration + +- Automatic TLS certificate issuance via Let's Encrypt +- HTTP to HTTPS redirection enforced +- Custom middleware for security headers + +## 4. Data Architecture + +### 4.1 Hybrid Database Strategy + +The system employs a polyglot persistence approach using specialized databases: + +| Database | Purpose | Data Types | +|----------|---------|------------| +| PostgreSQL | Relational data, user accounts, structured entities | Users, political entities, relationships, configuration | +| MongoDB | Document storage, social media content | Posts, comments, media items, engagement metrics | +| Redis | Caching, real-time operations, task management | Session data, counters, leaderboards, task queues | +| Pinecone | Vector embeddings for semantic search | Text embeddings, similarity models | + +### 4.2 Data Flow Patterns + +``` +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ │ │ │ │ │ +│ API Request │────►│ FastAPI │────►│ PostgreSQL │ +│ │ │ Handler │ │ │ +└─────────────┘ └──────┬──────┘ └─────────────┘ + │ + ▼ +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ │ │ │ │ │ +│ Celery Task │◄────┤ Task Queue │◄────┤ RabbitMQ │ +│ │ │ │ │ │ +└──────┬──────┘ └─────────────┘ └─────────────┘ + │ + ▼ +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ │ │ │ │ │ +│ MongoDB │ │ Redis Cache │ │ Pinecone │ +│ Storage │ │ │ │ Vectors │ +│ │ │ │ │ │ +└─────────────┘ └─────────────┘ └─────────────┘ +``` + +### 4.3 Data Persistence + +- Volume mapping for all databases to ensure data persistence +- Standardized volume naming: `{service-name}_data` +- Consistent backup solutions for each database type + +## 5. Task Processing Architecture + +### 5.1 Celery Integration + +``` +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ │ │ │ │ │ +│ FastAPI │────►│ RabbitMQ │────►│ Celery │ +│ Backend │ │ Broker │ │ Worker │ +│ │ │ │ │ │ +└─────────────┘ └─────────────┘ └─────────────┘ + ▲ │ + │ │ + │ ┌─────────────┐ │ + │ │ │ │ + └───────────┤ Redis │◄────────────┘ + │ Results │ + │ │ + └─────────────┘ + +┌─────────────┐ ┌─────────────┐ +│ │ │ │ +│ Celery Beat │────►│ Task │ +│ Scheduler │ │ Schedule │ +│ │ │ │ +└─────────────┘ └─────────────┘ +``` + +### 5.2 Task Types + +- **Data Collection Tasks**: Social media scraping, data acquisition +- **Analysis Tasks**: Content analysis, sentiment scoring, entity extraction +- **Reporting Tasks**: Report generation, alert/notification creation +- **Maintenance Tasks**: Database cleanup, analytics generation + +### 5.3 Kafka Stream Processing + +- Event-driven architecture for real-time data streams +- Topic-based segregation of event types +- Consumer groups for scalable processing + +## 6. Security Architecture + +### 6.1 Authentication and Authorization + +- JWT-based authentication with appropriate expiration +- Role-based access control (RBAC) +- OAuth2 password flow with secure password hashing + +### 6.2 Network Security + +- Traefik as edge gateway with TLS termination +- Internal network isolation +- Minimal port exposure + +### 6.3 Secret Management + +- Environment variable-based secret injection +- No hardcoded credentials +- Support for container secrets in production + +## 7. Deployment Strategy + +### 7.1 Development Workflow + +``` +Local Development → CI/CD Pipeline → Staging → Production +``` + +- **Local**: Docker Compose with override file +- **CI/CD**: Automated testing and container building +- **Staging**: Production-like environment for validation +- **Production**: Optimized for performance and security + +### 7.2 Scaling Strategy + +- Horizontal scaling of stateless services +- Vertical scaling of database services +- Load balancing through Traefik + +## 8. Monitoring and Observability + +### 8.1 Logging + +- Structured logging format +- Log aggregation across services +- Sentry integration for error tracking + +### 8.2 Metrics + +- Health check endpoints for all services +- Prometheus-compatible metrics endpoints +- Celery Flower for task monitoring + +## 9. Resilience Features + +### 9.1 Health Checks + +- Database connectivity checks +- API endpoint checks +- Appropriate retry policies + +### 9.2 Failover Strategy + +- Restart policies for critical services +- Connection retry logic +- Graceful degradation when components are unavailable + +## 10. Development Environment + +### 10.1 Local Setup + +- Simple startup with `docker-compose up` +- Hot-reloading for backend and frontend +- Development admin interfaces for all databases + +### 10.2 Testing + +- Environment-specific testing configuration +- Integration tests with in-memory databases +- E2E testing with Playwright + +## 11. Future Considerations + +### 11.1 Kubernetes Migration Path + +- Current Docker Compose structure designed for easy K8s migration +- Service definitions align with Kubernetes patterns +- Volume definitions compatible with persistent volume claims + +### 11.2 Service Mesh Integration + +- Prepared for Istio or Linkerd integration +- Service-to-service communication patterns established +- Observability foundations in place + +## Appendix A: Environment Variables + +| Variable | Purpose | Example | +|----------|---------|---------| +| `DOMAIN` | Base domain for all services | `example.com` | +| `POSTGRES_*` | PostgreSQL configuration | `POSTGRES_USER=postgres` | +| `MONGO_*` | MongoDB configuration | `MONGO_USER=mongo` | +| `RABBITMQ_*` | RabbitMQ configuration | `RABBITMQ_USER=guest` | +| `REDIS_*` | Redis configuration | `REDIS_PORT=6379` | +| `SECRET_KEY` | Application encryption key | `supersecretkey` | +| `SENTRY_DSN` | Sentry error tracking | `https://...` | + +## Appendix B: Network Ports + +| Service | Port | Purpose | +|---------|------|---------| +| Traefik | 80, 443 | HTTP/HTTPS | +| PostgreSQL | 5432 | Database access | +| MongoDB | 27017 | Database access | +| Redis | 6379 | Cache access | +| RabbitMQ | 5672, 15672 | AMQP and management | +| Kafka | 9092 | Stream processing | +| FastAPI | 8000 | API access | +| Frontend | 5173 | Web UI (development) | + +## Appendix C: Related Documentation + +- `backend-technical-stack.mdc` - Backend technology details +- `database-architecture.mdc` - Detailed database design +- `data-processing-architecture.mdc` - Data processing pipeline details \ No newline at end of file diff --git a/.env b/.env index 1d44286e25..cea0cf42bd 100644 --- a/.env +++ b/.env @@ -13,14 +13,14 @@ FRONTEND_HOST=http://localhost:5173 # Environment: local, staging, production ENVIRONMENT=local -PROJECT_NAME="Full Stack FastAPI Project" -STACK_NAME=full-stack-fastapi-project +PROJECT_NAME="Political Social Media Analysis Platform" +STACK_NAME=political-analysis-local # Backend BACKEND_CORS_ORIGINS="http://localhost,http://localhost:5173,https://localhost,https://localhost:5173,http://localhost.tiangolo.com" -SECRET_KEY=changethis +SECRET_KEY="SII-BEQmcN8arjGWpOdpHhz0kz8PIqONaWRhekIqFDc" FIRST_SUPERUSER=admin@example.com -FIRST_SUPERUSER_PASSWORD=changethis +FIRST_SUPERUSER_PASSWORD=password # Emails SMTP_HOST= @@ -38,6 +38,25 @@ POSTGRES_DB=app POSTGRES_USER=postgres POSTGRES_PASSWORD=changethis +# MongoDB +MONGO_USER=mongouser +MONGO_PASSWORD=mongopassword +MONGO_DB=socialmediadb + +# RabbitMQ +RABBITMQ_USER=rabbitmquser +RABBITMQ_PASSWORD=rabbitmqpassword + +# Celery +CELERY_BROKER=amqp://rabbitmquser:rabbitmqpassword@rabbitmq:5672// + +# Add for local development when not using Docker +# These will be automatically set in Docker based on service names +# MONGO_SERVER=localhost +# REDIS_SERVER=localhost +# RABBITMQ_SERVER=localhost +# KAFKA_BOOTSTRAP_SERVERS=localhost:9092 + SENTRY_DSN= # Configure these with your own Docker registry images diff --git a/.env.example b/.env.example new file mode 100644 index 0000000000..e5ea7a4a63 --- /dev/null +++ b/.env.example @@ -0,0 +1,56 @@ +# Project settings +PROJECT_NAME=Political Social Media Analysis Platform +DOMAIN=localhost +STACK_NAME=political-analysis-local +ENVIRONMENT=local + +# Security +SECRET_KEY=changethis + +# PostgreSQL +POSTGRES_SERVER=db +POSTGRES_PORT=5432 +POSTGRES_USER=postgres +POSTGRES_PASSWORD=changethis +POSTGRES_DB=app + +# MongoDB +MONGODB_SERVER=mongodb +MONGODB_PORT=27017 +MONGO_USER=mongo +MONGO_PASSWORD=changethis +MONGODB_DATABASE=political_analysis + +# Redis +REDIS_SERVER=redis +REDIS_PORT=6379 +REDIS_PASSWORD= +REDIS_DB=0 + +# Frontend +FRONTEND_HOST=http://localhost:5173 + +# CORS +BACKEND_CORS_ORIGINS=["http://localhost:5173", "http://localhost:3000"] + +# First superuser +FIRST_SUPERUSER=admin@example.com +FIRST_SUPERUSER_PASSWORD=changethis + +# Email +SMTP_TLS=True +SMTP_SSL=False +SMTP_PORT=587 +SMTP_HOST= +SMTP_USER= +SMTP_PASSWORD= +EMAILS_FROM_EMAIL=info@example.com +EMAILS_FROM_NAME=Political Analysis Platform + +# Sentry +SENTRY_DSN= + +# Docker +DOCKER_IMAGE_BACKEND=political-analysis-backend +DOCKER_IMAGE_FRONTEND=political-analysis-frontend +TAG=latest \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000000..0e2895e9b9 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,155 @@ +name: CI + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + backend-lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ruff black mypy + pip install -r backend/requirements.txt + - name: Lint with ruff + run: | + cd backend + ruff check . + - name: Format with black + run: | + cd backend + black --check . + - name: Type check with mypy + run: | + cd backend + mypy . + + backend-test: + runs-on: ubuntu-latest + services: + postgres: + image: postgres:15 + env: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: test_db + ports: + - 5432:5432 + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + mongodb: + image: mongo:6 + env: + MONGO_INITDB_ROOT_USERNAME: mongo + MONGO_INITDB_ROOT_PASSWORD: mongo + ports: + - 27017:27017 + redis: + image: redis:7-alpine + ports: + - 6379:6379 + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest pytest-cov + pip install -r backend/requirements.txt + - name: Test with pytest + run: | + cd backend + pytest --cov=app + env: + DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test_db + MONGODB_URL: mongodb://mongo:mongo@localhost:27017/ + REDIS_URL: redis://localhost:6379/0 + + frontend-lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Node.js + uses: actions/setup-node@v3 + with: + node-version: '18' + cache: 'npm' + cache-dependency-path: frontend/package-lock.json + - name: Install dependencies + run: | + cd frontend + npm ci + - name: Lint with ESLint + run: | + cd frontend + npm run lint + - name: Check formatting with Prettier + run: | + cd frontend + npx prettier --check "src/**/*.{ts,tsx}" + - name: Type check with TypeScript + run: | + cd frontend + npx tsc --noEmit + + frontend-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Node.js + uses: actions/setup-node@v3 + with: + node-version: '18' + cache: 'npm' + cache-dependency-path: frontend/package-lock.json + - name: Install dependencies + run: | + cd frontend + npm ci + - name: Run tests + run: | + cd frontend + npm test + + build: + runs-on: ubuntu-latest + needs: [backend-lint, backend-test, frontend-lint, frontend-test] + steps: + - uses: actions/checkout@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - name: Build backend + uses: docker/build-push-action@v4 + with: + context: ./backend + push: false + tags: political-analysis-backend:latest + target: production + cache-from: type=gha + cache-to: type=gha,mode=max + - name: Build frontend + uses: docker/build-push-action@v4 + with: + context: ./frontend + push: false + tags: political-analysis-frontend:latest + target: production + cache-from: type=gha + cache-to: type=gha,mode=max \ No newline at end of file diff --git a/.gitignore b/.gitignore index a6dd346572..5997da43b0 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ node_modules/ /playwright-report/ /blob-report/ /playwright/.cache/ +.env \ No newline at end of file diff --git a/app/models.py b/app/models.py new file mode 100644 index 0000000000..4436d6dad7 --- /dev/null +++ b/app/models.py @@ -0,0 +1,11 @@ +from app.alembic.env import SQLModel + +# Import all models here so Alembic can discover them +from app.db.models.user import User +from app.db.models.item import Item + +# Import MongoDB schemas - TO BE REMOVED +# from app.db.schemas.political_entity import PoliticalEntity +# from app.db.schemas.social_post import SocialPost + +__all__ = ["User", "Item", "SQLModel"] \ No newline at end of file diff --git a/backend/Dockerfile b/backend/Dockerfile index 44c53f0365..4b56184159 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -40,4 +40,7 @@ COPY ./app /app/app RUN --mount=type=cache,target=/root/.cache/uv \ uv sync +# Install all required dependencies directly +RUN pip install celery==5.3.0 flower==1.2.0 redis==4.6.0 pydantic>=2.0 pydantic-settings>=2.0 fastapi>=0.114.2 pymongo>=4.5.0 motor>=3.2.0 + CMD ["fastapi", "run", "--workers", "4", "app/main.py"] diff --git a/backend/Dockerfile.celery b/backend/Dockerfile.celery new file mode 100644 index 0000000000..a89036c7f8 --- /dev/null +++ b/backend/Dockerfile.celery @@ -0,0 +1,14 @@ +FROM python:3.10 + +ENV PYTHONUNBUFFERED=1 +ENV PYTHONPATH=/app + +WORKDIR /app/ + +COPY ./pyproject.toml /app/ +COPY ./app /app/app + +# Install all required dependencies directly +RUN pip install celery==5.3.0 flower==1.2.0 redis==4.6.0 pydantic>=2.0 pydantic-settings>=2.0 fastapi>=0.114.2 pymongo>=4.5.0 motor>=3.2.0 pika>=1.3.2 email-validator>=2.1.0 python-multipart>=0.0.7 passlib[bcrypt]>=1.7.4 bcrypt==4.0.1 pyjwt>=2.8.0 sqlmodel>=0.0.21 psycopg[binary]>=3.1.13 alembic>=1.12.1 tenacity>=8.2.3 + +CMD ["celery", "-A", "app.tasks.celery_app", "worker", "--loglevel=info"] \ No newline at end of file diff --git a/backend/README.md b/backend/README.md index 17210a2f2c..c6d5593939 100644 --- a/backend/README.md +++ b/backend/README.md @@ -1,172 +1,129 @@ -# FastAPI Project - Backend +# FastAPI Backend Template + +This is a modern backend template built with FastAPI, SQLModel, PostgreSQL, and JWT authentication. + +## Project Structure + +``` +backend/ +├── app/ +│ ├── api/ # API routes +│ │ ├── api_v1/ # API v1 routes +│ │ │ ├── endpoints/ # API endpoints +│ │ │ └── api.py # API router +│ │ └── deps.py # Dependency injection +│ ├── core/ # Core functionality +│ │ ├── config.py # Application configuration +│ │ ├── security.py # Authentication and security +│ │ └── errors.py # Error handling +│ ├── db/ # Database models and connections +│ │ ├── models/ # SQLModel models +│ │ │ ├── user.py +│ │ │ └── item.py +│ │ └── session.py # Database session management +│ ├── schemas/ # API schemas (Pydantic models) +│ │ ├── user.py +│ │ ├── item.py +│ │ └── common.py # Shared schema definitions +│ ├── services/ # Business logic services +│ │ └── user.py # User service +│ └── main.py # FastAPI application entry point +├── tests/ # Tests +├── Dockerfile # Docker configuration +├── pyproject.toml # Python project configuration +└── README.md # This file +``` + +## Features + +- **FastAPI**: Modern, fast web framework for building APIs +- **SQLModel**: SQL databases in Python, designed for simplicity, compatibility, and robustness +- **JWT Authentication**: Secure authentication with JWT tokens +- **Dependency Injection**: Clean, modular code with FastAPI's dependency injection +- **Environment Variables**: Configuration via environment variables +- **Docker**: Containerized development and deployment +- **UUID Primary Keys**: All database models use UUIDs as primary keys + +## Database Structure + +The application uses **PostgreSQL** with SQLModel ORM for: +- User accounts and authentication +- Application data storage +- Relationship management + +### UUID Migration Notes + +As of July 2024, the project has migrated from using integer IDs to UUIDs for all database models. This change improves: +- Security (IDs are not sequential or predictable) +- Distributed system compatibility +- Data privacy + +## Getting Started + +### Prerequisites + +- Docker and Docker Compose +- Python 3.10+ +- uv (for dependency management) + +### Development Setup + +1. Clone the repository +2. Create a `.env` file from the example: + ```bash + cp .env.example .env + ``` +3. Install dependencies using uv: + ```bash + uv venv + source .venv/bin/activate + uv pip sync + ``` +4. Start the development server: + ```bash + uvicorn app.main:app --reload + ``` + +### Running with Docker -## Requirements - -* [Docker](https://www.docker.com/). -* [uv](https://docs.astral.sh/uv/) for Python package and environment management. - -## Docker Compose - -Start the local development environment with Docker Compose following the guide in [../development.md](../development.md). - -## General Workflow - -By default, the dependencies are managed with [uv](https://docs.astral.sh/uv/), go there and install it. - -From `./backend/` you can install all the dependencies with: - -```console -$ uv sync -``` - -Then you can activate the virtual environment with: - -```console -$ source .venv/bin/activate -``` - -Make sure your editor is using the correct Python virtual environment, with the interpreter at `backend/.venv/bin/python`. - -Modify or add SQLModel models for data and SQL tables in `./backend/app/models.py`, API endpoints in `./backend/app/api/`, CRUD (Create, Read, Update, Delete) utils in `./backend/app/crud.py`. - -## VS Code - -There are already configurations in place to run the backend through the VS Code debugger, so that you can use breakpoints, pause and explore variables, etc. - -The setup is also already configured so you can run the tests through the VS Code Python tests tab. - -## Docker Compose Override - -During development, you can change Docker Compose settings that will only affect the local development environment in the file `docker-compose.override.yml`. - -The changes to that file only affect the local development environment, not the production environment. So, you can add "temporary" changes that help the development workflow. - -For example, the directory with the backend code is synchronized in the Docker container, copying the code you change live to the directory inside the container. That allows you to test your changes right away, without having to build the Docker image again. It should only be done during development, for production, you should build the Docker image with a recent version of the backend code. But during development, it allows you to iterate very fast. - -There is also a command override that runs `fastapi run --reload` instead of the default `fastapi run`. It starts a single server process (instead of multiple, as would be for production) and reloads the process whenever the code changes. Have in mind that if you have a syntax error and save the Python file, it will break and exit, and the container will stop. After that, you can restart the container by fixing the error and running again: - -```console -$ docker compose watch -``` - -There is also a commented out `command` override, you can uncomment it and comment the default one. It makes the backend container run a process that does "nothing", but keeps the container alive. That allows you to get inside your running container and execute commands inside, for example a Python interpreter to test installed dependencies, or start the development server that reloads when it detects changes. - -To get inside the container with a `bash` session you can start the stack with: - -```console -$ docker compose watch -``` - -and then in another terminal, `exec` inside the running container: - -```console -$ docker compose exec backend bash -``` - -You should see an output like: - -```console -root@7f2607af31c3:/app# -``` - -that means that you are in a `bash` session inside your container, as a `root` user, under the `/app` directory, this directory has another directory called "app" inside, that's where your code lives inside the container: `/app/app`. - -There you can use the `fastapi run --reload` command to run the debug live reloading server. - -```console -$ fastapi run --reload app/main.py -``` - -...it will look like: - -```console -root@7f2607af31c3:/app# fastapi run --reload app/main.py +```bash +docker-compose up -d ``` -and then hit enter. That runs the live reloading server that auto reloads when it detects code changes. +### API Documentation -Nevertheless, if it doesn't detect a change but a syntax error, it will just stop with an error. But as the container is still alive and you are in a Bash session, you can quickly restart it after fixing the error, running the same command ("up arrow" and "Enter"). +Once the application is running, you can access the API documentation at: -...this previous detail is what makes it useful to have the container alive doing nothing and then, in a Bash session, make it run the live reload server. - -## Backend tests - -To test the backend run: - -```console -$ bash ./scripts/test.sh -``` +- Swagger UI: http://localhost:8000/docs +- ReDoc: http://localhost:8000/redoc -The tests run with Pytest, modify and add tests to `./backend/app/tests/`. +## Development -If you use GitHub Actions the tests will run automatically. +### Adding New Models -### Test running stack +1. Create a new model in `app/db/models/` +2. Create corresponding API schemas in `app/schemas/` +3. Create service functions in `app/services/` +4. Create API endpoints in `app/api/api_v1/endpoints/` -If your stack is already up and you just want to run the tests, you can use: +### Running Tests ```bash -docker compose exec backend bash scripts/tests-start.sh +pytest ``` -That `/app/scripts/tests-start.sh` script just calls `pytest` after making sure that the rest of the stack is running. If you need to pass extra arguments to `pytest`, you can pass them to that command and they will be forwarded. - -For example, to stop on first error: +### Code Formatting and Linting ```bash -docker compose exec backend bash scripts/tests-start.sh -x -``` - -### Test Coverage - -When the tests are run, a file `htmlcov/index.html` is generated, you can open it in your browser to see the coverage of the tests. - -## Migrations - -As during local development your app directory is mounted as a volume inside the container, you can also run the migrations with `alembic` commands inside the container and the migration code will be in your app directory (instead of being only inside the container). So you can add it to your git repository. - -Make sure you create a "revision" of your models and that you "upgrade" your database with that revision every time you change them. As this is what will update the tables in your database. Otherwise, your application will have errors. - -* Start an interactive session in the backend container: - -```console -$ docker compose exec backend bash -``` - -* Alembic is already configured to import your SQLModel models from `./backend/app/models.py`. - -* After changing a model (for example, adding a column), inside the container, create a revision, e.g.: - -```console -$ alembic revision --autogenerate -m "Add column last_name to User model" -``` - -* Commit to the git repository the files generated in the alembic directory. - -* After creating the revision, run the migration in the database (this is what will actually change the database): - -```console -$ alembic upgrade head +ruff check . +ruff format . +mypy . ``` -If you don't want to use migrations at all, uncomment the lines in the file at `./backend/app/core/db.py` that end in: - -```python -SQLModel.metadata.create_all(engine) -``` - -and comment the line in the file `scripts/prestart.sh` that contains: - -```console -$ alembic upgrade head -``` - -If you don't want to start with the default models and want to remove them / modify them, from the beginning, without having any previous revision, you can remove the revision files (`.py` Python files) under `./backend/app/alembic/versions/`. And then create a first migration as described above. - -## Email Templates - -The email templates are in `./backend/app/email-templates/`. Here, there are two directories: `build` and `src`. The `src` directory contains the source files that are used to build the final email templates. The `build` directory contains the final email templates that are used by the application. +## Dependency Management with uv -Before continuing, ensure you have the [MJML extension](https://marketplace.visualstudio.com/items?itemName=attilabuti.vscode-mjml) installed in your VS Code. +This project uses [uv](https://github.com/astral-sh/uv) for dependency management: -Once you have the MJML extension installed, you can create a new email template in the `src` directory. After creating the new email template and with the `.mjml` file open in your editor, open the command palette with `Ctrl+Shift+P` and search for `MJML: Export to HTML`. This will convert the `.mjml` file to a `.html` file and now you can save it in the build directory. +- `uv venv` - Create a virtual environment +- `uv pip sync` - Install dependencies from uv.lock +- `uv pip add ` - Add a new dependency diff --git a/backend/app/alembic/env.py b/backend/app/alembic/env.py index 7f29c04680..c01978911d 100755 --- a/backend/app/alembic/env.py +++ b/backend/app/alembic/env.py @@ -18,7 +18,9 @@ # target_metadata = mymodel.Base.metadata # target_metadata = None -from app.models import SQLModel # noqa +# Updated import for new architecture +from sqlmodel import SQLModel # noqa +from app.db.models import user, item # Import all models to register them from app.core.config import settings # noqa target_metadata = SQLModel.metadata diff --git a/backend/app/api/api_v1/api.py b/backend/app/api/api_v1/api.py new file mode 100644 index 0000000000..7a84ed36de --- /dev/null +++ b/backend/app/api/api_v1/api.py @@ -0,0 +1,13 @@ +from fastapi import APIRouter + +from app.api.api_v1.endpoints import items, login, private, users, utils +from app.core.config import settings + +api_router = APIRouter() +api_router.include_router(login.router) +api_router.include_router(users.router) +api_router.include_router(utils.router) +api_router.include_router(items.router) + +if settings.ENVIRONMENT == "local": + api_router.include_router(private.router) \ No newline at end of file diff --git a/backend/app/api/api_v1/endpoints/__init__.py b/backend/app/api/api_v1/endpoints/__init__.py new file mode 100644 index 0000000000..d5c1390d5b --- /dev/null +++ b/backend/app/api/api_v1/endpoints/__init__.py @@ -0,0 +1 @@ +# API endpoints package \ No newline at end of file diff --git a/backend/app/api/api_v1/endpoints/items.py b/backend/app/api/api_v1/endpoints/items.py new file mode 100644 index 0000000000..ba6e81d452 --- /dev/null +++ b/backend/app/api/api_v1/endpoints/items.py @@ -0,0 +1,115 @@ +import uuid +from typing import Any + +from fastapi import APIRouter, HTTPException +from sqlmodel import func, select + +from app.api.deps import CurrentUser, SessionDep +from app.schemas import ItemCreate, ItemPublic, ItemsPublic, ItemUpdate, Message, StandardResponse +from app.services import item as item_service + +router = APIRouter(prefix="/items", tags=["items"]) + + +@router.get("/", response_model=StandardResponse[ItemsPublic]) +def read_items( + session: SessionDep, current_user: CurrentUser, skip: int = 0, limit: int = 100 +) -> Any: + """ + Retrieve items. + """ + if current_user.is_superuser: + count = item_service.count_items(session=session) + items = item_service.get_items(session=session, skip=skip, limit=limit) + else: + count = item_service.count_items_by_owner(session=session, owner_id=current_user.id) + items = item_service.get_items_by_owner( + session=session, owner_id=current_user.id, skip=skip, limit=limit + ) + + return StandardResponse( + data=ItemsPublic(data=items, count=count), + message="Items retrieved successfully" + ) + + +@router.get("/{id}", response_model=StandardResponse[ItemPublic]) +def read_item(session: SessionDep, current_user: CurrentUser, id: uuid.UUID) -> Any: + """ + Get item by ID. + """ + item = item_service.get_item(session=session, item_id=id) + if not item: + raise HTTPException(status_code=404, detail="Item not found") + if not current_user.is_superuser and (item.owner_id != current_user.id): + raise HTTPException(status_code=403, detail="Not enough permissions") + + return StandardResponse( + data=item, + message="Item retrieved successfully" + ) + + +@router.post("/", response_model=StandardResponse[ItemPublic]) +def create_item( + *, session: SessionDep, current_user: CurrentUser, item_in: ItemCreate +) -> Any: + """ + Create new item. + """ + item = item_service.create_item( + session=session, item_create=item_in, owner_id=current_user.id + ) + + return StandardResponse( + data=item, + message="Item created successfully" + ) + + +@router.put("/{id}", response_model=StandardResponse[ItemPublic]) +def update_item( + *, + session: SessionDep, + current_user: CurrentUser, + id: uuid.UUID, + item_in: ItemUpdate, +) -> Any: + """ + Update an item. + """ + item = item_service.get_item(session=session, item_id=id) + if not item: + raise HTTPException(status_code=404, detail="Item not found") + if not current_user.is_superuser and (item.owner_id != current_user.id): + raise HTTPException(status_code=403, detail="Not enough permissions") + + updated_item = item_service.update_item( + session=session, db_item=item, item_update=item_in + ) + + return StandardResponse( + data=updated_item, + message="Item updated successfully" + ) + + +@router.delete("/{id}", response_model=StandardResponse[Message]) +def delete_item( + session: SessionDep, current_user: CurrentUser, id: uuid.UUID +) -> Any: + """ + Delete an item. + """ + item = item_service.get_item(session=session, item_id=id) + if not item: + raise HTTPException(status_code=404, detail="Item not found") + if not current_user.is_superuser and (item.owner_id != current_user.id): + raise HTTPException(status_code=403, detail="Not enough permissions") + + item_service.delete_item(session=session, item_id=id) + + return StandardResponse( + data=Message(message="Item deleted successfully"), + message="Item deleted successfully" + ) \ No newline at end of file diff --git a/backend/app/api/routes/login.py b/backend/app/api/api_v1/endpoints/login.py similarity index 70% rename from backend/app/api/routes/login.py rename to backend/app/api/api_v1/endpoints/login.py index 980c66f86f..7592a7ef5b 100644 --- a/backend/app/api/routes/login.py +++ b/backend/app/api/api_v1/endpoints/login.py @@ -5,12 +5,12 @@ from fastapi.responses import HTMLResponse from fastapi.security import OAuth2PasswordRequestForm -from app import crud +from app.services import user as user_service from app.api.deps import CurrentUser, SessionDep, get_current_active_superuser from app.core import security from app.core.config import settings from app.core.security import get_password_hash -from app.models import Message, NewPassword, Token, UserPublic +from app.schemas import Message, NewPassword, StandardResponse, Token, UserPublic from app.utils import ( generate_password_reset_token, generate_reset_password_email, @@ -21,14 +21,14 @@ router = APIRouter(tags=["login"]) -@router.post("/login/access-token") +@router.post("/login/access-token", response_model=StandardResponse[Token]) def login_access_token( session: SessionDep, form_data: Annotated[OAuth2PasswordRequestForm, Depends()] -) -> Token: +) -> Any: """ OAuth2 compatible token login, get an access token for future requests """ - user = crud.authenticate( + user = user_service.authenticate( session=session, email=form_data.username, password=form_data.password ) if not user: @@ -36,27 +36,34 @@ def login_access_token( elif not user.is_active: raise HTTPException(status_code=400, detail="Inactive user") access_token_expires = timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES) - return Token( + token = Token( access_token=security.create_access_token( user.id, expires_delta=access_token_expires ) ) + return StandardResponse( + data=token, + message="Authentication successful" + ) -@router.post("/login/test-token", response_model=UserPublic) +@router.post("/login/test-token", response_model=StandardResponse[UserPublic]) def test_token(current_user: CurrentUser) -> Any: """ Test access token """ - return current_user + return StandardResponse( + data=current_user, + message="Token is valid" + ) -@router.post("/password-recovery/{email}") -def recover_password(email: str, session: SessionDep) -> Message: +@router.post("/password-recovery/{email}", response_model=StandardResponse[Message]) +def recover_password(email: str, session: SessionDep) -> Any: """ Password Recovery """ - user = crud.get_user_by_email(session=session, email=email) + user = user_service.get_user_by_email(session=session, email=email) if not user: raise HTTPException( @@ -72,18 +79,21 @@ def recover_password(email: str, session: SessionDep) -> Message: subject=email_data.subject, html_content=email_data.html_content, ) - return Message(message="Password recovery email sent") + return StandardResponse( + data=Message(message="Password recovery email sent"), + message="Password recovery email sent" + ) -@router.post("/reset-password/") -def reset_password(session: SessionDep, body: NewPassword) -> Message: +@router.post("/reset-password/", response_model=StandardResponse[Message]) +def reset_password(session: SessionDep, body: NewPassword) -> Any: """ Reset password """ email = verify_password_reset_token(token=body.token) if not email: raise HTTPException(status_code=400, detail="Invalid token") - user = crud.get_user_by_email(session=session, email=email) + user = user_service.get_user_by_email(session=session, email=email) if not user: raise HTTPException( status_code=404, @@ -95,7 +105,10 @@ def reset_password(session: SessionDep, body: NewPassword) -> Message: user.hashed_password = hashed_password session.add(user) session.commit() - return Message(message="Password updated successfully") + return StandardResponse( + data=Message(message="Password updated successfully"), + message="Password has been reset successfully" + ) @router.post( @@ -107,7 +120,7 @@ def recover_password_html_content(email: str, session: SessionDep) -> Any: """ HTML Content for Password Recovery """ - user = crud.get_user_by_email(session=session, email=email) + user = user_service.get_user_by_email(session=session, email=email) if not user: raise HTTPException( @@ -121,4 +134,4 @@ def recover_password_html_content(email: str, session: SessionDep) -> Any: return HTMLResponse( content=email_data.html_content, headers={"subject:": email_data.subject} - ) + ) \ No newline at end of file diff --git a/backend/app/api/routes/private.py b/backend/app/api/api_v1/endpoints/private.py similarity index 84% rename from backend/app/api/routes/private.py rename to backend/app/api/api_v1/endpoints/private.py index 9f33ef1900..80a38f37cf 100644 --- a/backend/app/api/routes/private.py +++ b/backend/app/api/api_v1/endpoints/private.py @@ -5,10 +5,7 @@ from app.api.deps import SessionDep from app.core.security import get_password_hash -from app.models import ( - User, - UserPublic, -) +from app.db.models.user import User router = APIRouter(tags=["private"], prefix="/private") @@ -20,7 +17,7 @@ class PrivateUserCreate(BaseModel): is_verified: bool = False -@router.post("/users/", response_model=UserPublic) +@router.post("/users/", response_model=User) def create_user(user_in: PrivateUserCreate, session: SessionDep) -> Any: """ Create a new user. @@ -35,4 +32,4 @@ def create_user(user_in: PrivateUserCreate, session: SessionDep) -> Any: session.add(user) session.commit() - return user + return user \ No newline at end of file diff --git a/backend/app/api/routes/users.py b/backend/app/api/api_v1/endpoints/users.py similarity index 64% rename from backend/app/api/routes/users.py rename to backend/app/api/api_v1/endpoints/users.py index 6429818458..9a8533b66a 100644 --- a/backend/app/api/routes/users.py +++ b/backend/app/api/api_v1/endpoints/users.py @@ -4,7 +4,7 @@ from fastapi import APIRouter, Depends, HTTPException from sqlmodel import col, delete, func, select -from app import crud +from app.services import user as user_service from app.api.deps import ( CurrentUser, SessionDep, @@ -12,11 +12,12 @@ ) from app.core.config import settings from app.core.security import get_password_hash, verify_password -from app.models import ( - Item, +from app.db.models.user import User +from app.db.models.item import Item +from app.schemas import ( Message, + StandardResponse, UpdatePassword, - User, UserCreate, UserPublic, UserRegister, @@ -26,13 +27,14 @@ ) from app.utils import generate_new_account_email, send_email + router = APIRouter(prefix="/users", tags=["users"]) @router.get( "/", dependencies=[Depends(get_current_active_superuser)], - response_model=UsersPublic, + response_model=StandardResponse[UsersPublic], ) def read_users(session: SessionDep, skip: int = 0, limit: int = 100) -> Any: """ @@ -45,24 +47,29 @@ def read_users(session: SessionDep, skip: int = 0, limit: int = 100) -> Any: statement = select(User).offset(skip).limit(limit) users = session.exec(statement).all() - return UsersPublic(data=users, count=count) + return StandardResponse( + data=UsersPublic(data=users, count=count), + message="Users retrieved successfully" + ) @router.post( - "/", dependencies=[Depends(get_current_active_superuser)], response_model=UserPublic + "/", + dependencies=[Depends(get_current_active_superuser)], + response_model=StandardResponse[UserPublic] ) def create_user(*, session: SessionDep, user_in: UserCreate) -> Any: """ Create new user. """ - user = crud.get_user_by_email(session=session, email=user_in.email) + user = user_service.get_user_by_email(session=session, email=user_in.email) if user: raise HTTPException( status_code=400, detail="The user with this email already exists in the system.", ) - user = crud.create_user(session=session, user_create=user_in) + user = user_service.create_user(session=session, user_create=user_in) if settings.emails_enabled and user_in.email: email_data = generate_new_account_email( email_to=user_in.email, username=user_in.email, password=user_in.password @@ -72,10 +79,13 @@ def create_user(*, session: SessionDep, user_in: UserCreate) -> Any: subject=email_data.subject, html_content=email_data.html_content, ) - return user + return StandardResponse( + data=user, + message="User created successfully" + ) -@router.patch("/me", response_model=UserPublic) +@router.patch("/me", response_model=StandardResponse[UserPublic]) def update_user_me( *, session: SessionDep, user_in: UserUpdateMe, current_user: CurrentUser ) -> Any: @@ -84,7 +94,7 @@ def update_user_me( """ if user_in.email: - existing_user = crud.get_user_by_email(session=session, email=user_in.email) + existing_user = user_service.get_user_by_email(session=session, email=user_in.email) if existing_user and existing_user.id != current_user.id: raise HTTPException( status_code=409, detail="User with this email already exists" @@ -94,10 +104,13 @@ def update_user_me( session.add(current_user) session.commit() session.refresh(current_user) - return current_user + return StandardResponse( + data=current_user, + message="User updated successfully" + ) -@router.patch("/me/password", response_model=Message) +@router.patch("/me/password", response_model=StandardResponse[Message]) def update_password_me( *, session: SessionDep, body: UpdatePassword, current_user: CurrentUser ) -> Any: @@ -114,18 +127,24 @@ def update_password_me( current_user.hashed_password = hashed_password session.add(current_user) session.commit() - return Message(message="Password updated successfully") + return StandardResponse( + data=Message(message="Password updated successfully"), + message="Password updated successfully" + ) -@router.get("/me", response_model=UserPublic) +@router.get("/me", response_model=StandardResponse[UserPublic]) def read_user_me(current_user: CurrentUser) -> Any: """ Get current user. """ - return current_user + return StandardResponse( + data=current_user, + message="User profile retrieved successfully" + ) -@router.delete("/me", response_model=Message) +@router.delete("/me", response_model=StandardResponse[Message]) def delete_user_me(session: SessionDep, current_user: CurrentUser) -> Any: """ Delete own user. @@ -136,26 +155,32 @@ def delete_user_me(session: SessionDep, current_user: CurrentUser) -> Any: ) session.delete(current_user) session.commit() - return Message(message="User deleted successfully") + return StandardResponse( + data=Message(message="User deleted successfully"), + message="User account has been deleted" + ) -@router.post("/signup", response_model=UserPublic) +@router.post("/signup", response_model=StandardResponse[UserPublic]) def register_user(session: SessionDep, user_in: UserRegister) -> Any: """ Create new user without the need to be logged in. """ - user = crud.get_user_by_email(session=session, email=user_in.email) + user = user_service.get_user_by_email(session=session, email=user_in.email) if user: raise HTTPException( status_code=400, detail="The user with this email already exists in the system", ) user_create = UserCreate.model_validate(user_in) - user = crud.create_user(session=session, user_create=user_create) - return user + user = user_service.create_user(session=session, user_create=user_create) + return StandardResponse( + data=user, + message="User registered successfully" + ) -@router.get("/{user_id}", response_model=UserPublic) +@router.get("/{user_id}", response_model=StandardResponse[UserPublic]) def read_user_by_id( user_id: uuid.UUID, session: SessionDep, current_user: CurrentUser ) -> Any: @@ -164,19 +189,25 @@ def read_user_by_id( """ user = session.get(User, user_id) if user == current_user: - return user + return StandardResponse( + data=user, + message="User retrieved successfully" + ) if not current_user.is_superuser: raise HTTPException( status_code=403, detail="The user doesn't have enough privileges", ) - return user + return StandardResponse( + data=user, + message="User retrieved successfully" + ) @router.patch( "/{user_id}", dependencies=[Depends(get_current_active_superuser)], - response_model=UserPublic, + response_model=StandardResponse[UserPublic], ) def update_user( *, @@ -195,20 +226,25 @@ def update_user( detail="The user with this id does not exist in the system", ) if user_in.email: - existing_user = crud.get_user_by_email(session=session, email=user_in.email) + existing_user = user_service.get_user_by_email(session=session, email=user_in.email) if existing_user and existing_user.id != user_id: raise HTTPException( status_code=409, detail="User with this email already exists" ) - db_user = crud.update_user(session=session, db_user=db_user, user_in=user_in) - return db_user + db_user = user_service.update_user(session=session, db_user=db_user, user_in=user_in) + return StandardResponse( + data=db_user, + message="User updated successfully" + ) -@router.delete("/{user_id}", dependencies=[Depends(get_current_active_superuser)]) +@router.delete("/{user_id}", + dependencies=[Depends(get_current_active_superuser)], + response_model=StandardResponse[Message]) def delete_user( session: SessionDep, current_user: CurrentUser, user_id: uuid.UUID -) -> Message: +) -> Any: """ Delete a user. """ @@ -223,4 +259,7 @@ def delete_user( session.exec(statement) # type: ignore session.delete(user) session.commit() - return Message(message="User deleted successfully") + return StandardResponse( + data=Message(message="User deleted successfully"), + message="User has been deleted" + ) \ No newline at end of file diff --git a/backend/app/api/routes/utils.py b/backend/app/api/api_v1/endpoints/utils.py similarity index 50% rename from backend/app/api/routes/utils.py rename to backend/app/api/api_v1/endpoints/utils.py index fc093419b3..4a1b7a21c0 100644 --- a/backend/app/api/routes/utils.py +++ b/backend/app/api/api_v1/endpoints/utils.py @@ -2,7 +2,7 @@ from pydantic.networks import EmailStr from app.api.deps import get_current_active_superuser -from app.models import Message +from app.schemas import Message, StandardResponse from app.utils import generate_test_email, send_email router = APIRouter(prefix="/utils", tags=["utils"]) @@ -12,8 +12,9 @@ "/test-email/", dependencies=[Depends(get_current_active_superuser)], status_code=201, + response_model=StandardResponse[Message] ) -def test_email(email_to: EmailStr) -> Message: +def test_email(email_to: EmailStr) -> StandardResponse[Message]: """ Test emails. """ @@ -23,9 +24,18 @@ def test_email(email_to: EmailStr) -> Message: subject=email_data.subject, html_content=email_data.html_content, ) - return Message(message="Test email sent") + return StandardResponse( + data=Message(message="Test email sent"), + message="Test email has been sent successfully" + ) -@router.get("/health-check/") -async def health_check() -> bool: - return True +@router.get("/health-check/", response_model=StandardResponse[bool]) +async def health_check() -> StandardResponse[bool]: + """ + Endpoint for health checks and monitoring + """ + return StandardResponse( + data=True, + message="API is healthy" + ) \ No newline at end of file diff --git a/backend/app/api/deps.py b/backend/app/api/deps.py index c2b83c841d..1cdba8dc50 100644 --- a/backend/app/api/deps.py +++ b/backend/app/api/deps.py @@ -10,20 +10,16 @@ from app.core import security from app.core.config import settings -from app.core.db import engine -from app.models import TokenPayload, User +from app.db.session import get_session +from app.db.models.user import User +from app.schemas import TokenPayload reusable_oauth2 = OAuth2PasswordBearer( tokenUrl=f"{settings.API_V1_STR}/login/access-token" ) -def get_db() -> Generator[Session, None, None]: - with Session(engine) as session: - yield session - - -SessionDep = Annotated[Session, Depends(get_db)] +SessionDep = Annotated[Session, Depends(get_session)] TokenDep = Annotated[str, Depends(reusable_oauth2)] diff --git a/backend/app/api/main.py b/backend/app/api/main.py index eac18c8e8f..f15856ea2d 100644 --- a/backend/app/api/main.py +++ b/backend/app/api/main.py @@ -1,14 +1,12 @@ from fastapi import APIRouter -from app.api.routes import items, login, private, users, utils +from app.api.api_v1.api import api_router as api_v1_router from app.core.config import settings +# Main API router that includes version-specific routers api_router = APIRouter() -api_router.include_router(login.router) -api_router.include_router(users.router) -api_router.include_router(utils.router) -api_router.include_router(items.router) +# Include the v1 API router without an additional prefix +# The /v1 prefix is handled in app.main.py with settings.API_V1_STR +api_router.include_router(api_v1_router) -if settings.ENVIRONMENT == "local": - api_router.include_router(private.router) diff --git a/backend/app/api/routes/__init__.py b/backend/app/api/routes/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/backend/app/api/routes/items.py b/backend/app/api/routes/items.py deleted file mode 100644 index 177dc1e476..0000000000 --- a/backend/app/api/routes/items.py +++ /dev/null @@ -1,109 +0,0 @@ -import uuid -from typing import Any - -from fastapi import APIRouter, HTTPException -from sqlmodel import func, select - -from app.api.deps import CurrentUser, SessionDep -from app.models import Item, ItemCreate, ItemPublic, ItemsPublic, ItemUpdate, Message - -router = APIRouter(prefix="/items", tags=["items"]) - - -@router.get("/", response_model=ItemsPublic) -def read_items( - session: SessionDep, current_user: CurrentUser, skip: int = 0, limit: int = 100 -) -> Any: - """ - Retrieve items. - """ - - if current_user.is_superuser: - count_statement = select(func.count()).select_from(Item) - count = session.exec(count_statement).one() - statement = select(Item).offset(skip).limit(limit) - items = session.exec(statement).all() - else: - count_statement = ( - select(func.count()) - .select_from(Item) - .where(Item.owner_id == current_user.id) - ) - count = session.exec(count_statement).one() - statement = ( - select(Item) - .where(Item.owner_id == current_user.id) - .offset(skip) - .limit(limit) - ) - items = session.exec(statement).all() - - return ItemsPublic(data=items, count=count) - - -@router.get("/{id}", response_model=ItemPublic) -def read_item(session: SessionDep, current_user: CurrentUser, id: uuid.UUID) -> Any: - """ - Get item by ID. - """ - item = session.get(Item, id) - if not item: - raise HTTPException(status_code=404, detail="Item not found") - if not current_user.is_superuser and (item.owner_id != current_user.id): - raise HTTPException(status_code=400, detail="Not enough permissions") - return item - - -@router.post("/", response_model=ItemPublic) -def create_item( - *, session: SessionDep, current_user: CurrentUser, item_in: ItemCreate -) -> Any: - """ - Create new item. - """ - item = Item.model_validate(item_in, update={"owner_id": current_user.id}) - session.add(item) - session.commit() - session.refresh(item) - return item - - -@router.put("/{id}", response_model=ItemPublic) -def update_item( - *, - session: SessionDep, - current_user: CurrentUser, - id: uuid.UUID, - item_in: ItemUpdate, -) -> Any: - """ - Update an item. - """ - item = session.get(Item, id) - if not item: - raise HTTPException(status_code=404, detail="Item not found") - if not current_user.is_superuser and (item.owner_id != current_user.id): - raise HTTPException(status_code=400, detail="Not enough permissions") - update_dict = item_in.model_dump(exclude_unset=True) - item.sqlmodel_update(update_dict) - session.add(item) - session.commit() - session.refresh(item) - return item - - -@router.delete("/{id}") -def delete_item( - session: SessionDep, current_user: CurrentUser, id: uuid.UUID -) -> Message: - """ - Delete an item. - """ - item = session.get(Item, id) - if not item: - raise HTTPException(status_code=404, detail="Item not found") - if not current_user.is_superuser and (item.owner_id != current_user.id): - raise HTTPException(status_code=400, detail="Not enough permissions") - session.delete(item) - session.commit() - return Message(message="Item deleted successfully") diff --git a/backend/app/core/config.py b/backend/app/core/config.py index d58e03c87d..1cbeb52ea9 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -1,11 +1,12 @@ import secrets import warnings -from typing import Annotated, Any, Literal +from typing import Annotated, Any, Dict, Literal, Optional from pydantic import ( AnyUrl, BeforeValidator, EmailStr, + Field, HttpUrl, PostgresDsn, computed_field, @@ -51,6 +52,8 @@ def all_cors_origins(self) -> list[str]: PROJECT_NAME: str SENTRY_DSN: HttpUrl | None = None + + # PostgreSQL settings POSTGRES_SERVER: str POSTGRES_PORT: int = 5432 POSTGRES_USER: str @@ -69,6 +72,73 @@ def SQLALCHEMY_DATABASE_URI(self) -> PostgresDsn: path=self.POSTGRES_DB, ) + # MongoDB settings + MONGODB_SERVER: str = "localhost" + MONGODB_PORT: int = 27017 + MONGODB_USER: str = "" + MONGODB_PASSWORD: str = "" + MONGODB_DB: str = "political_social_media" + MONGODB_AUTH_SOURCE: str = "admin" + + @computed_field # type: ignore[prop-decorator] + @property + def MONGODB_URI(self) -> str: + auth_part = "" + if self.MONGODB_USER and self.MONGODB_PASSWORD: + auth_part = f"{self.MONGODB_USER}:{self.MONGODB_PASSWORD}@" + + auth_source = f"?authSource={self.MONGODB_AUTH_SOURCE}" if auth_part else "" + return f"mongodb://{auth_part}{self.MONGODB_SERVER}:{self.MONGODB_PORT}/{self.MONGODB_DB}{auth_source}" + + # Redis settings + REDIS_SERVER: str = "localhost" + REDIS_PORT: int = 6379 + REDIS_PASSWORD: str = "" + REDIS_DB: int = 0 + + @computed_field # type: ignore[prop-decorator] + @property + def REDIS_URI(self) -> str: + auth_part = f":{self.REDIS_PASSWORD}@" if self.REDIS_PASSWORD else "" + return f"redis://{auth_part}{self.REDIS_SERVER}:{self.REDIS_PORT}/{self.REDIS_DB}" + + # Pinecone (Vector Database) settings + PINECONE_API_KEY: str = "" + PINECONE_ENVIRONMENT: str = "us-west1-gcp" + PINECONE_INDEX_NAME: str = "political-content" + + # Celery settings + CELERY_BROKER: str = "amqp://guest:guest@localhost:5672//" + CELERY_RESULT_BACKEND: str = "" # Will default to Redis URI if not set + CELERY_TASK_SERIALIZER: str = "json" + CELERY_RESULT_SERIALIZER: str = "json" + CELERY_ACCEPT_CONTENT: list[str] = ["json"] + CELERY_TIMEZONE: str = "UTC" + CELERY_TASK_ROUTES: Dict[str, Dict[str, str]] = { + "app.tasks.scraping.*": {"queue": "scraping"}, + "app.tasks.analysis.*": {"queue": "analysis"}, + "app.tasks.notifications.*": {"queue": "notifications"}, + } + + @computed_field # type: ignore[prop-decorator] + @property + def celery_result_backend_uri(self) -> str: + """Return Redis URI as the default Celery result backend if none specified.""" + return self.CELERY_RESULT_BACKEND or self.REDIS_URI + + # Kafka settings + KAFKA_BOOTSTRAP_SERVERS: str = "localhost:9092" + KAFKA_CONSUMER_GROUP_ID: str = "political-media-analysis" + KAFKA_TOPIC_SOCIAL_MEDIA_POSTS: str = "social-media-posts" + KAFKA_TOPIC_SENTIMENT_ANALYSIS: str = "sentiment-analysis" + KAFKA_TOPIC_ENTITY_RECOGNITION: str = "entity-recognition" + + # NLP model settings + SPACY_MODEL_NAME: str = "en_core_web_lg" + TRANSFORMER_MODEL_NAME: str = "distilbert-base-uncased" + SENTENCE_TRANSFORMER_MODEL_NAME: str = "all-MiniLM-L6-v2" + + # Email settings SMTP_TLS: bool = True SMTP_SSL: bool = False SMTP_PORT: int = 587 @@ -96,24 +166,20 @@ def emails_enabled(self) -> bool: FIRST_SUPERUSER_PASSWORD: str def _check_default_secret(self, var_name: str, value: str | None) -> None: - if value == "changethis": - message = ( - f'The value of {var_name} is "changethis", ' - "for security, please change it, at least for deployments." - ) - if self.ENVIRONMENT == "local": - warnings.warn(message, stacklevel=1) - else: - raise ValueError(message) + """Check if a secret is using a default value and warn the user.""" + if value is not None and value in {"changethis", "changeme", ""}: + message = f"The value of {var_name} is \"{value}\", for security, please change it, at least for deployments." + warnings.warn(message, stacklevel=1) @model_validator(mode="after") def _enforce_non_default_secrets(self) -> Self: - self._check_default_secret("SECRET_KEY", self.SECRET_KEY) + """Enforce that secrets don't use default values.""" self._check_default_secret("POSTGRES_PASSWORD", self.POSTGRES_PASSWORD) - self._check_default_secret( - "FIRST_SUPERUSER_PASSWORD", self.FIRST_SUPERUSER_PASSWORD - ) - + self._check_default_secret("MONGODB_PASSWORD", self.MONGODB_PASSWORD) + self._check_default_secret("REDIS_PASSWORD", self.REDIS_PASSWORD) + self._check_default_secret("PINECONE_API_KEY", self.PINECONE_API_KEY) + self._check_default_secret("SECRET_KEY", self.SECRET_KEY) + self._check_default_secret("FIRST_SUPERUSER_PASSWORD", self.FIRST_SUPERUSER_PASSWORD) return self diff --git a/backend/app/core/db.py b/backend/app/core/db.py index ba991fb36d..f8ded4d4aa 100644 --- a/backend/app/core/db.py +++ b/backend/app/core/db.py @@ -1,8 +1,11 @@ from sqlmodel import Session, create_engine, select -from app import crud +# Updated import to match new architecture +from app.services.repositories import user as user_repository from app.core.config import settings -from app.models import User, UserCreate +# Updated model imports +from app.db.models.user import User +from app.schemas.user import UserCreate engine = create_engine(str(settings.SQLALCHEMY_DATABASE_URI)) @@ -30,4 +33,5 @@ def init_db(session: Session) -> None: password=settings.FIRST_SUPERUSER_PASSWORD, is_superuser=True, ) - user = crud.create_user(session=session, user_create=user_in) + # Updated to use the new repository pattern + user = user_repository.create_user(session=session, user_create=user_in) diff --git a/backend/app/core/errors.py b/backend/app/core/errors.py new file mode 100644 index 0000000000..2be95ed70c --- /dev/null +++ b/backend/app/core/errors.py @@ -0,0 +1,48 @@ +from typing import Any, Dict, Optional + +from fastapi import FastAPI, Request, status +from fastapi.exceptions import RequestValidationError +from fastapi.responses import JSONResponse +from starlette.exceptions import HTTPException as StarletteHTTPException + +from app.schemas.common import ErrorResponse + + +def add_exception_handlers(app: FastAPI) -> None: + """Add exception handlers to the FastAPI app.""" + + @app.exception_handler(StarletteHTTPException) + async def http_exception_handler(request: Request, exc: StarletteHTTPException) -> JSONResponse: + """Handle HTTP exceptions and return standardized error response.""" + return JSONResponse( + status_code=exc.status_code, + content=ErrorResponse( + success=False, + error=str(exc.detail), + details=getattr(exc, "headers", None), + ).model_dump(), + ) + + @app.exception_handler(RequestValidationError) + async def validation_exception_handler(request: Request, exc: RequestValidationError) -> JSONResponse: + """Handle validation errors and return standardized error response with details.""" + return JSONResponse( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + content=ErrorResponse( + success=False, + error="Validation error", + details=exc.errors(), + ).model_dump(), + ) + + @app.exception_handler(Exception) + async def general_exception_handler(request: Request, exc: Exception) -> JSONResponse: + """Handle any unhandled exceptions and return standardized error response.""" + return JSONResponse( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + content=ErrorResponse( + success=False, + error="Internal server error", + details=str(exc) if app.debug else None, + ).model_dump(), + ) \ No newline at end of file diff --git a/backend/app/crud.py b/backend/app/crud.py deleted file mode 100644 index 905bf48724..0000000000 --- a/backend/app/crud.py +++ /dev/null @@ -1,54 +0,0 @@ -import uuid -from typing import Any - -from sqlmodel import Session, select - -from app.core.security import get_password_hash, verify_password -from app.models import Item, ItemCreate, User, UserCreate, UserUpdate - - -def create_user(*, session: Session, user_create: UserCreate) -> User: - db_obj = User.model_validate( - user_create, update={"hashed_password": get_password_hash(user_create.password)} - ) - session.add(db_obj) - session.commit() - session.refresh(db_obj) - return db_obj - - -def update_user(*, session: Session, db_user: User, user_in: UserUpdate) -> Any: - user_data = user_in.model_dump(exclude_unset=True) - extra_data = {} - if "password" in user_data: - password = user_data["password"] - hashed_password = get_password_hash(password) - extra_data["hashed_password"] = hashed_password - db_user.sqlmodel_update(user_data, update=extra_data) - session.add(db_user) - session.commit() - session.refresh(db_user) - return db_user - - -def get_user_by_email(*, session: Session, email: str) -> User | None: - statement = select(User).where(User.email == email) - session_user = session.exec(statement).first() - return session_user - - -def authenticate(*, session: Session, email: str, password: str) -> User | None: - db_user = get_user_by_email(session=session, email=email) - if not db_user: - return None - if not verify_password(password, db_user.hashed_password): - return None - return db_user - - -def create_item(*, session: Session, item_in: ItemCreate, owner_id: uuid.UUID) -> Item: - db_item = Item.model_validate(item_in, update={"owner_id": owner_id}) - session.add(db_item) - session.commit() - session.refresh(db_item) - return db_item diff --git a/backend/app/db/init_db.py b/backend/app/db/init_db.py new file mode 100644 index 0000000000..6d76ed26d0 --- /dev/null +++ b/backend/app/db/init_db.py @@ -0,0 +1,43 @@ +from sqlmodel import Session, select + +from app.services.repositories import user as user_repository +from app.core.config import settings +from app.db.models.user import User +from app.schemas.user import UserCreate + + +def init_db(session: Session) -> None: + """ + Initialize the database with initial data. + + This function is called when the application starts. + It creates a superuser if it doesn't exist yet. + """ + # Tables should be created with Alembic migrations + # But if you don't want to use migrations, create + # the tables un-commenting the next lines + # from sqlmodel import SQLModel + # from app.db.session import engine + # SQLModel.metadata.create_all(engine) + + user = session.exec( + select(User).where(User.email == settings.FIRST_SUPERUSER) + ).first() + if not user: + user_in = UserCreate( + email=settings.FIRST_SUPERUSER, + password=settings.FIRST_SUPERUSER_PASSWORD, + is_superuser=True, + ) + user = user_repository.create_user(session=session, user_create=user_in) + + +async def init_mongodb() -> None: + """ + Initialize MongoDB with any required initial data or indexes. + + This function is called when the application starts. + It sets up indexes and any initial data required for MongoDB collections. + """ + # This will be implemented as needed when MongoDB collections are defined + pass \ No newline at end of file diff --git a/backend/app/db/models/__init__.py b/backend/app/db/models/__init__.py new file mode 100644 index 0000000000..a167563a79 --- /dev/null +++ b/backend/app/db/models/__init__.py @@ -0,0 +1,4 @@ +from app.db.models.item import Item +from app.db.models.user import User + +__all__ = ["User", "Item"] \ No newline at end of file diff --git a/backend/app/db/models/item.py b/backend/app/db/models/item.py new file mode 100644 index 0000000000..161af13087 --- /dev/null +++ b/backend/app/db/models/item.py @@ -0,0 +1,20 @@ +import uuid + +from sqlmodel import Field, Relationship, SQLModel + +from app.db.models.user import User + + +class Item(SQLModel, table=True): + """ + Item model for database storage. + + This model represents an item in the system and is stored in PostgreSQL. + """ + id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) + title: str = Field(max_length=255) + description: str | None = Field(default=None, max_length=255) + owner_id: uuid.UUID = Field( + foreign_key="user.id", nullable=False, ondelete="CASCADE" + ) + owner: User | None = Relationship(back_populates="items") \ No newline at end of file diff --git a/backend/app/db/models/user.py b/backend/app/db/models/user.py new file mode 100644 index 0000000000..edf604453b --- /dev/null +++ b/backend/app/db/models/user.py @@ -0,0 +1,19 @@ +import uuid + +from pydantic import EmailStr +from sqlmodel import Field, Relationship, SQLModel + + +class User(SQLModel, table=True): + """ + User model for database storage. + + This model represents a user in the system and is stored in PostgreSQL. + """ + id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) + email: EmailStr = Field(unique=True, index=True, max_length=255) + hashed_password: str + is_active: bool = True + is_superuser: bool = False + full_name: str | None = Field(default=None, max_length=255) + items: list["Item"] = Relationship(back_populates="owner", cascade_delete=True) \ No newline at end of file diff --git a/backend/app/db/schemas/__init__.py b/backend/app/db/schemas/__init__.py new file mode 100644 index 0000000000..666d98aba9 --- /dev/null +++ b/backend/app/db/schemas/__init__.py @@ -0,0 +1,4 @@ +from app.db.schemas.political_entity import EntityAnalytics, PoliticalEntity +from app.db.schemas.social_post import SocialAnalytics, SocialPost + +__all__ = ["SocialPost", "SocialAnalytics", "PoliticalEntity", "EntityAnalytics"] \ No newline at end of file diff --git a/backend/app/db/schemas/political_entity.py b/backend/app/db/schemas/political_entity.py new file mode 100644 index 0000000000..85cf177c3a --- /dev/null +++ b/backend/app/db/schemas/political_entity.py @@ -0,0 +1,53 @@ +from datetime import datetime +from typing import List, Optional, Any + +from bson import ObjectId +from pydantic import BaseModel, Field + +from app.db.schemas.social_post import PyObjectId + + +class PoliticalEntity(BaseModel): + """ + Schema for political entities stored in MongoDB. + + This model represents a political entity such as a politician, political party, or organization. + """ + id: PyObjectId = Field(default_factory=lambda: str(ObjectId()), alias="_id") + name: str + entity_type: str # "politician", "party", "organization", etc. + description: Optional[str] = None + country: str + social_accounts: List[dict[str, str]] = [] # List of {platform: string, username: string} + political_stance: Optional[str] = None + tags: List[str] = [] + related_entities: List[PyObjectId] = [] + metadata: Optional[dict[str, Any]] = None + created_at: datetime = Field(default_factory=datetime.utcnow) + updated_at: datetime = Field(default_factory=datetime.utcnow) + + class Config: + populate_by_name = True + arbitrary_types_allowed = True + json_encoders = {ObjectId: str} + + +class EntityAnalytics(BaseModel): + """ + Schema for entity analytics stored in MongoDB. + + This model represents analytics for political entities. + """ + id: PyObjectId = Field(default_factory=lambda: str(ObjectId()), alias="_id") + entity_id: PyObjectId + total_mentions: int = 0 + sentiment_distribution: dict[str, float] = {} # e.g., {"positive": 0.3, "neutral": 0.5, "negative": 0.2} + engagement_metrics: dict[str, int] = {} # e.g., {"comments": 1000, "likes": 5000, "shares": 2000} + trending_topics: List[dict[str, Any]] = [] # List of {topic: string, count: number, sentiment: number} + time_period: str # e.g., "last_24h", "last_week", "last_month" + analysis_timestamp: datetime = Field(default_factory=datetime.utcnow) + + class Config: + populate_by_name = True + arbitrary_types_allowed = True + json_encoders = {ObjectId: str} \ No newline at end of file diff --git a/backend/app/db/schemas/social_post.py b/backend/app/db/schemas/social_post.py new file mode 100644 index 0000000000..7f15e0a6e9 --- /dev/null +++ b/backend/app/db/schemas/social_post.py @@ -0,0 +1,66 @@ +from datetime import datetime +from typing import List, Optional, Any + +from bson import ObjectId +from pydantic import BaseModel, Field + + +class PyObjectId(str): + """Custom type for handling MongoDB ObjectId.""" + @classmethod + def __get_validators__(cls): + yield cls.validate + + @classmethod + def validate(cls, v): + if not ObjectId.is_valid(v): + raise ValueError(f"Invalid ObjectId: {v}") + return str(v) + + +class SocialPost(BaseModel): + """ + Schema for social media posts stored in MongoDB. + + This model represents a social media post from various platforms. + """ + id: PyObjectId = Field(default_factory=lambda: str(ObjectId()), alias="_id") + platform: str + content: str + author: str + author_username: str + published_at: datetime + likes: int = 0 + shares: int = 0 + comments: int = 0 + url: Optional[str] = None + media_urls: List[str] = [] + hashtags: List[str] = [] + mentions: List[str] = [] + metadata: Optional[dict[str, Any]] = None + + class Config: + populate_by_name = True + arbitrary_types_allowed = True + json_encoders = {ObjectId: str} + + +class SocialAnalytics(BaseModel): + """ + Schema for social media analytics stored in MongoDB. + + This model represents analytics for social media data. + """ + id: PyObjectId = Field(default_factory=lambda: str(ObjectId()), alias="_id") + post_id: PyObjectId + sentiment_score: float + topic_classification: List[str] = [] + engagement_rate: float + political_leaning: Optional[str] = None + key_entities: List[str] = [] + analysis_timestamp: datetime = Field(default_factory=datetime.utcnow) + + class Config: + populate_by_name = True + arbitrary_types_allowed = True + json_encoders = {ObjectId: str} \ No newline at end of file diff --git a/backend/app/db/session.py b/backend/app/db/session.py new file mode 100644 index 0000000000..4a69dc495d --- /dev/null +++ b/backend/app/db/session.py @@ -0,0 +1,28 @@ +from typing import Generator +import os +from pymongo import MongoClient + +from sqlmodel import Session, create_engine + +from app.core.config import settings + +# PostgreSQL Connection +engine = create_engine(str(settings.SQLALCHEMY_DATABASE_URI)) + + +def get_session() -> Generator[Session, None, None]: + """Get a SQLModel session for PostgreSQL database operations.""" + with Session(engine) as session: + yield session + +# MongoDB Connection +mongodb_server = os.environ.get("MONGODB_SERVER", "localhost") +mongodb_port = int(os.environ.get("MONGODB_PORT", "27017")) +mongodb_user = os.environ.get("MONGODB_USER", "mongo") +mongodb_password = os.environ.get("MONGODB_PASSWORD", "mongo") +mongodb_database = os.environ.get("MONGODB_DATABASE", "political_analysis") + +# Create MongoDB client +mongodb_uri = f"mongodb://{mongodb_user}:{mongodb_password}@{mongodb_server}:{mongodb_port}/{mongodb_database}" +mongodb_client = MongoClient(mongodb_uri) +mongodb = mongodb_client[mongodb_database] \ No newline at end of file diff --git a/backend/app/main.py b/backend/app/main.py index 9a95801e74..cdcba98605 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -3,12 +3,18 @@ from fastapi.routing import APIRoute from starlette.middleware.cors import CORSMiddleware -from app.api.main import api_router +from app.api.api_v1.api import api_router as api_v1_router from app.core.config import settings +from app.core.errors import add_exception_handlers +from app.schemas import StandardResponse def custom_generate_unique_id(route: APIRoute) -> str: - return f"{route.tags[0]}-{route.name}" + """Generate a unique ID for API routes to improve Swagger documentation.""" + if route.tags and len(route.tags) > 0: + return f"{route.tags[0]}-{route.name}" + else: + return f"root-{route.name}" if settings.SENTRY_DSN and settings.ENVIRONMENT != "local": @@ -16,10 +22,15 @@ def custom_generate_unique_id(route: APIRoute) -> str: app = FastAPI( title=settings.PROJECT_NAME, + description="FastAPI Backend Template", + version="1.0.0", openapi_url=f"{settings.API_V1_STR}/openapi.json", generate_unique_id_function=custom_generate_unique_id, ) +# Add exception handlers +add_exception_handlers(app) + # Set all CORS enabled origins if settings.all_cors_origins: app.add_middleware( @@ -30,4 +41,21 @@ def custom_generate_unique_id(route: APIRoute) -> str: allow_headers=["*"], ) -app.include_router(api_router, prefix=settings.API_V1_STR) +# Include the v1 API router directly with the version prefix +app.include_router(api_v1_router, prefix=settings.API_V1_STR) + +@app.get("/", response_model=StandardResponse[dict]) +async def root(): + """Root endpoint providing basic API information.""" + return StandardResponse( + data={"message": "Welcome to the FastAPI Backend Template"}, + message="API is running" + ) + +@app.get("/health", response_model=StandardResponse[dict]) +async def health_check(): + """Health check endpoint for monitoring.""" + return StandardResponse( + data={"status": "healthy"}, + message="API is healthy" + ) diff --git a/backend/app/models.py b/backend/app/models.py deleted file mode 100644 index 90ef5559e3..0000000000 --- a/backend/app/models.py +++ /dev/null @@ -1,114 +0,0 @@ -import uuid - -from pydantic import EmailStr -from sqlmodel import Field, Relationship, SQLModel - - -# Shared properties -class UserBase(SQLModel): - email: EmailStr = Field(unique=True, index=True, max_length=255) - is_active: bool = True - is_superuser: bool = False - full_name: str | None = Field(default=None, max_length=255) - - -# Properties to receive via API on creation -class UserCreate(UserBase): - password: str = Field(min_length=8, max_length=40) - - -class UserRegister(SQLModel): - email: EmailStr = Field(max_length=255) - password: str = Field(min_length=8, max_length=40) - full_name: str | None = Field(default=None, max_length=255) - - -# Properties to receive via API on update, all are optional -class UserUpdate(UserBase): - email: EmailStr | None = Field(default=None, max_length=255) # type: ignore - password: str | None = Field(default=None, min_length=8, max_length=40) - - -class UserUpdateMe(SQLModel): - full_name: str | None = Field(default=None, max_length=255) - email: EmailStr | None = Field(default=None, max_length=255) - - -class UpdatePassword(SQLModel): - current_password: str = Field(min_length=8, max_length=40) - new_password: str = Field(min_length=8, max_length=40) - - -# Database model, database table inferred from class name -class User(UserBase, table=True): - id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) - hashed_password: str - items: list["Item"] = Relationship(back_populates="owner", cascade_delete=True) - - -# Properties to return via API, id is always required -class UserPublic(UserBase): - id: uuid.UUID - - -class UsersPublic(SQLModel): - data: list[UserPublic] - count: int - - -# Shared properties -class ItemBase(SQLModel): - title: str = Field(min_length=1, max_length=255) - description: str | None = Field(default=None, max_length=255) - - -# Properties to receive on item creation -class ItemCreate(ItemBase): - pass - - -# Properties to receive on item update -class ItemUpdate(ItemBase): - title: str | None = Field(default=None, min_length=1, max_length=255) # type: ignore - - -# Database model, database table inferred from class name -class Item(ItemBase, table=True): - id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) - title: str = Field(max_length=255) - owner_id: uuid.UUID = Field( - foreign_key="user.id", nullable=False, ondelete="CASCADE" - ) - owner: User | None = Relationship(back_populates="items") - - -# Properties to return via API, id is always required -class ItemPublic(ItemBase): - id: uuid.UUID - owner_id: uuid.UUID - - -class ItemsPublic(SQLModel): - data: list[ItemPublic] - count: int - - -# Generic message -class Message(SQLModel): - message: str - - -# JSON payload containing access token -class Token(SQLModel): - access_token: str - token_type: str = "bearer" - - -# Contents of JWT token -class TokenPayload(SQLModel): - sub: str | None = None - - -class NewPassword(SQLModel): - token: str - new_password: str = Field(min_length=8, max_length=40) diff --git a/backend/app/schemas/__init__.py b/backend/app/schemas/__init__.py new file mode 100644 index 0000000000..ba171e853c --- /dev/null +++ b/backend/app/schemas/__init__.py @@ -0,0 +1,35 @@ +from app.schemas.common import ErrorResponse, Message, StandardResponse, Token, TokenPayload +from app.schemas.item import ItemBase, ItemCreate, ItemPublic, ItemsPublic, ItemUpdate +from app.schemas.user import ( + NewPassword, + UpdatePassword, + UserBase, + UserCreate, + UserPublic, + UserRegister, + UserUpdate, + UserUpdateMe, + UsersPublic, +) + +__all__ = [ + "ErrorResponse", + "Message", + "StandardResponse", + "Token", + "TokenPayload", + "ItemBase", + "ItemCreate", + "ItemPublic", + "ItemsPublic", + "ItemUpdate", + "NewPassword", + "UpdatePassword", + "UserBase", + "UserCreate", + "UserPublic", + "UserRegister", + "UserUpdate", + "UserUpdateMe", + "UsersPublic", +] \ No newline at end of file diff --git a/backend/app/schemas/common.py b/backend/app/schemas/common.py new file mode 100644 index 0000000000..ede5b9ebdc --- /dev/null +++ b/backend/app/schemas/common.py @@ -0,0 +1,29 @@ +from typing import Any, Generic, TypeVar + +from pydantic import BaseModel + +# Generic message +class Message(BaseModel): + message: str + +# JSON payload containing access token +class Token(BaseModel): + access_token: str + token_type: str = "bearer" + +# Contents of JWT token +class TokenPayload(BaseModel): + sub: str | None = None + +# Standard API response wrapper +T = TypeVar('T') +class StandardResponse(BaseModel, Generic[T]): + data: T + success: bool = True + message: str | None = None + +# Standard error response +class ErrorResponse(BaseModel): + success: bool = False + error: str + details: Any | None = None \ No newline at end of file diff --git a/backend/app/schemas/item.py b/backend/app/schemas/item.py new file mode 100644 index 0000000000..b3ae91b3c7 --- /dev/null +++ b/backend/app/schemas/item.py @@ -0,0 +1,35 @@ +import uuid + +from pydantic import BaseModel, Field + + +# Shared properties +class ItemBase(BaseModel): + """Base Item schema with common attributes.""" + title: str = Field(min_length=1, max_length=255) + description: str | None = Field(default=None, max_length=255) + + +# Properties to receive on item creation +class ItemCreate(ItemBase): + """Schema for creating a new item.""" + pass + + +# Properties to receive on item update +class ItemUpdate(ItemBase): + """Schema for updating an item.""" + title: str | None = Field(default=None, min_length=1, max_length=255) + + +# Properties to return via API, id is always required +class ItemPublic(ItemBase): + """Schema for public item information returned via API.""" + id: uuid.UUID + owner_id: uuid.UUID + + +class ItemsPublic(BaseModel): + """Schema for returning a list of items.""" + data: list[ItemPublic] + count: int \ No newline at end of file diff --git a/backend/app/schemas/user.py b/backend/app/schemas/user.py new file mode 100644 index 0000000000..b036ffd177 --- /dev/null +++ b/backend/app/schemas/user.py @@ -0,0 +1,62 @@ +import uuid + +from pydantic import BaseModel, EmailStr, Field + + +# Shared properties +class UserBase(BaseModel): + """Base User schema with common attributes.""" + email: EmailStr = Field(max_length=255) + is_active: bool = True + is_superuser: bool = False + full_name: str | None = Field(default=None, max_length=255) + + +# Properties to receive via API on creation +class UserCreate(UserBase): + """Schema for creating a new user.""" + password: str = Field(min_length=8, max_length=40) + + +class UserRegister(BaseModel): + """Schema for user registration.""" + email: EmailStr = Field(max_length=255) + password: str = Field(min_length=8, max_length=40) + full_name: str | None = Field(default=None, max_length=255) + + +# Properties to receive via API on update, all are optional +class UserUpdate(UserBase): + """Schema for updating a user.""" + email: EmailStr | None = Field(default=None, max_length=255) + password: str | None = Field(default=None, min_length=8, max_length=40) + + +class UserUpdateMe(BaseModel): + """Schema for updating own user information.""" + full_name: str | None = Field(default=None, max_length=255) + email: EmailStr | None = Field(default=None, max_length=255) + + +class UpdatePassword(BaseModel): + """Schema for updating password.""" + current_password: str = Field(min_length=8, max_length=40) + new_password: str = Field(min_length=8, max_length=40) + + +# Properties to return via API, id is always required +class UserPublic(UserBase): + """Schema for public user information returned via API.""" + id: uuid.UUID + + +class UsersPublic(BaseModel): + """Schema for returning a list of users.""" + data: list[UserPublic] + count: int + + +class NewPassword(BaseModel): + """Schema for setting a new password with a reset token.""" + token: str + new_password: str = Field(min_length=8, max_length=40) \ No newline at end of file diff --git a/backend/app/services/__init__.py b/backend/app/services/__init__.py new file mode 100644 index 0000000000..8a08efe795 --- /dev/null +++ b/backend/app/services/__init__.py @@ -0,0 +1,4 @@ +from app.services import item, user +from app.services.repositories import ItemRepository, UserRepository + +__all__ = ["item", "user", "ItemRepository", "UserRepository"] \ No newline at end of file diff --git a/backend/app/services/item.py b/backend/app/services/item.py new file mode 100644 index 0000000000..bde979b457 --- /dev/null +++ b/backend/app/services/item.py @@ -0,0 +1,60 @@ +import uuid +from typing import List, Optional + +from sqlmodel import Session, func, select + +from app.db.models.item import Item +from app.schemas.item import ItemCreate, ItemUpdate +from app.services.repositories.item import ItemRepository + + +# Create a singleton instance of the repository +item_repository = ItemRepository() + + +def get_item(session: Session, item_id: uuid.UUID) -> Optional[Item]: + """Get an item by ID.""" + return item_repository.get(session=session, id=item_id) + + +def get_items(session: Session, skip: int = 0, limit: int = 100) -> List[Item]: + """Get multiple items with pagination.""" + return item_repository.get_multi(session=session, skip=skip, limit=limit) + + +def get_items_by_owner( + session: Session, owner_id: uuid.UUID, skip: int = 0, limit: int = 100 +) -> List[Item]: + """Get multiple items by owner ID with pagination.""" + return item_repository.get_multi_by_owner( + session=session, owner_id=owner_id, skip=skip, limit=limit + ) + + +def create_item(session: Session, item_create: ItemCreate, owner_id: uuid.UUID) -> Item: + """Create a new item with owner ID.""" + return item_repository.create_with_owner( + session=session, obj_in=item_create, owner_id=owner_id + ) + + +def update_item(session: Session, db_item: Item, item_update: ItemUpdate) -> Item: + """Update an item.""" + return item_repository.update(session=session, db_obj=db_item, obj_in=item_update) + + +def delete_item(session: Session, item_id: uuid.UUID) -> Optional[Item]: + """Delete an item by ID.""" + return item_repository.remove(session=session, id=item_id) + + +def count_items(session: Session) -> int: + """Count all items.""" + return session.exec(select(func.count()).select_from(Item)).one() + + +def count_items_by_owner(session: Session, owner_id: uuid.UUID) -> int: + """Count items by owner ID.""" + return session.exec( + select(func.count()).select_from(Item).where(Item.owner_id == owner_id) + ).one() \ No newline at end of file diff --git a/backend/app/services/repositories/__init__.py b/backend/app/services/repositories/__init__.py new file mode 100644 index 0000000000..a007dc6c80 --- /dev/null +++ b/backend/app/services/repositories/__init__.py @@ -0,0 +1,4 @@ +from app.services.repositories.user import UserRepository +from app.services.repositories.item import ItemRepository + +__all__ = ["UserRepository", "ItemRepository"] \ No newline at end of file diff --git a/backend/app/services/repositories/item.py b/backend/app/services/repositories/item.py new file mode 100644 index 0000000000..de45e7c7ce --- /dev/null +++ b/backend/app/services/repositories/item.py @@ -0,0 +1,38 @@ +import uuid +from typing import List, Optional + +from sqlmodel import Session, select + +from app.db.models.item import Item +from app.schemas.item import ItemCreate, ItemUpdate +from app.services.repository import BaseRepository + + +class ItemRepository(BaseRepository[Item, ItemCreate, ItemUpdate]): + """Repository for Item model operations.""" + + def __init__(self): + super().__init__(Item) + + def get_multi_by_owner( + self, session: Session, *, owner_id: uuid.UUID, skip: int = 0, limit: int = 100 + ) -> List[Item]: + """Get multiple items by owner ID with pagination.""" + statement = ( + select(Item) + .where(Item.owner_id == owner_id) + .offset(skip) + .limit(limit) + ) + return session.exec(statement).all() + + def create_with_owner( + self, session: Session, *, obj_in: ItemCreate, owner_id: uuid.UUID + ) -> Item: + """Create a new item with owner ID.""" + obj_data = obj_in.model_dump() + db_obj = Item(**obj_data, owner_id=owner_id) + session.add(db_obj) + session.commit() + session.refresh(db_obj) + return db_obj \ No newline at end of file diff --git a/backend/app/services/repositories/user.py b/backend/app/services/repositories/user.py new file mode 100644 index 0000000000..bfb98536ad --- /dev/null +++ b/backend/app/services/repositories/user.py @@ -0,0 +1,52 @@ +from typing import Optional + +from sqlmodel import Session, select + +from app.core.security import get_password_hash, verify_password +from app.db.models.user import User +from app.schemas.user import UserCreate, UserUpdate +from app.services.repository import BaseRepository + + +class UserRepository(BaseRepository[User, UserCreate, UserUpdate]): + """Repository for User model operations.""" + + def __init__(self): + super().__init__(User) + + def get_by_email(self, session: Session, *, email: str) -> Optional[User]: + """Get a user by email.""" + statement = select(User).where(User.email == email) + return session.exec(statement).first() + + def create(self, session: Session, *, obj_in: UserCreate) -> User: + """Create a new user with hashed password.""" + db_obj = User( + email=obj_in.email, + hashed_password=get_password_hash(obj_in.password), + full_name=obj_in.full_name, + is_superuser=obj_in.is_superuser, + is_active=obj_in.is_active, + ) + session.add(db_obj) + session.commit() + session.refresh(db_obj) + return db_obj + + def update(self, session: Session, *, db_obj: User, obj_in: UserUpdate) -> User: + """Update a user, handling password hashing if needed.""" + update_data = obj_in.model_dump(exclude_unset=True) + if "password" in update_data and update_data["password"]: + hashed_password = get_password_hash(update_data["password"]) + del update_data["password"] + update_data["hashed_password"] = hashed_password + return super().update(session, db_obj=db_obj, obj_in=UserUpdate(**update_data)) + + def authenticate(self, session: Session, *, email: str, password: str) -> Optional[User]: + """Authenticate a user by email and password.""" + user = self.get_by_email(session=session, email=email) + if not user: + return None + if not verify_password(password, user.hashed_password): + return None + return user \ No newline at end of file diff --git a/backend/app/services/repository.py b/backend/app/services/repository.py new file mode 100644 index 0000000000..f28a05a383 --- /dev/null +++ b/backend/app/services/repository.py @@ -0,0 +1,64 @@ +from typing import Any, Generic, List, Optional, Type, TypeVar + +from pydantic import BaseModel +from sqlmodel import Session, SQLModel, select + +# Define a type variable for the SQLModel +ModelType = TypeVar("ModelType", bound=SQLModel) +# Define a type variable for the Create schema +CreateSchemaType = TypeVar("CreateSchemaType", bound=BaseModel) +# Define a type variable for the Update schema +UpdateSchemaType = TypeVar("UpdateSchemaType", bound=BaseModel) + + +class BaseRepository(Generic[ModelType, CreateSchemaType, UpdateSchemaType]): + """ + Base repository class that provides standard CRUD operations. + + This class implements the repository pattern for database operations, + providing a clean separation between the database and business logic. + """ + + def __init__(self, model: Type[ModelType]): + """Initialize with the model class.""" + self.model = model + + def get(self, session: Session, id: Any) -> Optional[ModelType]: + """Get a single record by ID.""" + return session.get(self.model, id) + + def get_multi( + self, session: Session, *, skip: int = 0, limit: int = 100 + ) -> List[ModelType]: + """Get multiple records with pagination.""" + statement = select(self.model).offset(skip).limit(limit) + return session.exec(statement).all() + + def create(self, session: Session, *, obj_in: CreateSchemaType) -> ModelType: + """Create a new record.""" + obj_data = obj_in.model_dump() + db_obj = self.model(**obj_data) # type: ignore + session.add(db_obj) + session.commit() + session.refresh(db_obj) + return db_obj + + def update( + self, session: Session, *, db_obj: ModelType, obj_in: UpdateSchemaType + ) -> ModelType: + """Update an existing record.""" + obj_data = obj_in.model_dump(exclude_unset=True) + for key, value in obj_data.items(): + setattr(db_obj, key, value) + session.add(db_obj) + session.commit() + session.refresh(db_obj) + return db_obj + + def remove(self, session: Session, *, id: Any) -> Optional[ModelType]: + """Remove a record by ID.""" + obj = session.get(self.model, id) + if obj: + session.delete(obj) + session.commit() + return obj \ No newline at end of file diff --git a/backend/app/services/user.py b/backend/app/services/user.py new file mode 100644 index 0000000000..209395760a --- /dev/null +++ b/backend/app/services/user.py @@ -0,0 +1,110 @@ +import uuid +from typing import Any, Optional + +from sqlmodel import Session, select + +from app.core.security import get_password_hash, verify_password +from app.db.models.user import User +from app.schemas.user import UserCreate, UserUpdate +from app.services.repositories.user import UserRepository + + +# Create a singleton instance of the repository +user_repository = UserRepository() + + +def create_user(*, session: Session, user_create: UserCreate) -> User: + """ + Create a new user in the database. + + Args: + session: Database session + user_create: User create schema + + Returns: + Created user + """ + return user_repository.create(session=session, obj_in=user_create) + + +def update_user(*, session: Session, db_user: User, user_in: UserUpdate) -> User: + """ + Update a user in the database. + + Args: + session: Database session + db_user: Existing user object + user_in: User update schema + + Returns: + Updated user + """ + return user_repository.update(session=session, db_obj=db_user, obj_in=user_in) + + +def get_user_by_email(*, session: Session, email: str) -> Optional[User]: + """ + Get a user by email. + + Args: + session: Database session + email: User email + + Returns: + User if found, None otherwise + """ + return user_repository.get_by_email(session=session, email=email) + + +def authenticate(*, session: Session, email: str, password: str) -> Optional[User]: + """ + Authenticate a user. + + Args: + session: Database session + email: User email + password: User password + + Returns: + User if authentication succeeded, None otherwise + """ + return user_repository.authenticate(session=session, email=email, password=password) + + +def get_user_by_id(*, session: Session, user_id: uuid.UUID) -> Optional[User]: + """ + Get a user by ID. + + Args: + session: Database session + user_id: User ID + + Returns: + User if found, None otherwise + """ + return user_repository.get(session=session, id=user_id) + + +def get_users(*, session: Session, skip: int = 0, limit: int = 100) -> list[User]: + """ + Get a list of users. + + Args: + session: Database session + skip: Number of users to skip + limit: Maximum number of users to return + + Returns: + List of users + """ + return user_repository.get_multi(session=session, skip=skip, limit=limit) + + +def is_active(user: User) -> bool: + """Check if a user is active.""" + return user.is_active + + +def is_superuser(user: User) -> bool: + """Check if a user is a superuser.""" + return user.is_superuser \ No newline at end of file diff --git a/backend/app/tasks/__init__.py b/backend/app/tasks/__init__.py new file mode 100644 index 0000000000..9ee24630cf --- /dev/null +++ b/backend/app/tasks/__init__.py @@ -0,0 +1,15 @@ +from app.tasks.celery_app import celery_app +from app.tasks.worker import ( + analyze_social_data, + generate_reports, + process_data_pipeline, + scrape_social_media, +) + +__all__ = [ + "celery_app", + "scrape_social_media", + "analyze_social_data", + "generate_reports", + "process_data_pipeline", +] \ No newline at end of file diff --git a/backend/app/tasks/celery_app.py b/backend/app/tasks/celery_app.py new file mode 100644 index 0000000000..36c166f2c4 --- /dev/null +++ b/backend/app/tasks/celery_app.py @@ -0,0 +1,40 @@ +from celery import Celery +import os + +from app.core.config import settings + +# Get Redis connection details from environment variables +redis_server = os.environ.get("REDIS_SERVER", "localhost") +redis_port = os.environ.get("REDIS_PORT", "6379") +redis_url = f"redis://{redis_server}:{redis_port}/0" + +# Get RabbitMQ connection details from environment variables +rabbitmq_user = os.environ.get("RABBITMQ_USER", "guest") +rabbitmq_password = os.environ.get("RABBITMQ_PASSWORD", "guest") +rabbitmq_server = os.environ.get("RABBITMQ_SERVER", "localhost") +broker_url = f"amqp://{rabbitmq_user}:{rabbitmq_password}@{rabbitmq_server}:5672//" + +celery_app = Celery( + "worker", + broker=broker_url, + backend=redis_url, +) + +# Configure Celery +celery_app.conf.update( + task_serializer="json", + accept_content=["json"], + result_serializer="json", + timezone="UTC", + enable_utc=True, +) + +# Define task routes to organize workers +celery_app.conf.task_routes = { + "app.tasks.worker.scrape_social_media": "scraper-queue", + "app.tasks.worker.analyze_social_data": "analysis-queue", + "app.tasks.worker.generate_reports": "reporting-queue", +} + +# Set a default result expiration time (in seconds) +celery_app.conf.result_expires = 60 * 60 * 24 # 1 day \ No newline at end of file diff --git a/backend/app/tasks/worker.py b/backend/app/tasks/worker.py new file mode 100644 index 0000000000..11682c4584 --- /dev/null +++ b/backend/app/tasks/worker.py @@ -0,0 +1,125 @@ +import logging +from datetime import datetime +from typing import Any, Dict, List, Optional + +from app.db.session import mongodb +from app.tasks.celery_app import celery_app + +logger = logging.getLogger(__name__) + + +@celery_app.task +def scrape_social_media(platform: str, query: str, limit: int = 100) -> Dict[str, Any]: + """ + Task to scrape social media platforms for political content. + + Args: + platform: The social media platform to scrape (twitter/x, facebook, instagram, etc.) + query: The search query + limit: Maximum number of posts to scrape + + Returns: + Dict with information about the scraping operation + """ + logger.info(f"Scraping {platform} for query: {query} (limit: {limit})") + + # This would be implemented to use APIFY or similar service + # For now, just return a mock result + return { + "task_id": scrape_social_media.request.id, + "platform": platform, + "query": query, + "limit": limit, + "posts_scraped": 0, # Mock value + "timestamp": datetime.utcnow().isoformat(), + } + + +@celery_app.task +def analyze_social_data(post_ids: List[str]) -> Dict[str, Any]: + """ + Task to analyze social media posts. + + Args: + post_ids: List of post IDs to analyze + + Returns: + Dict with information about the analysis operation + """ + logger.info(f"Analyzing {len(post_ids)} social media posts") + + # This would be implemented to perform sentiment analysis, topic extraction, etc. + # For now, just return a mock result + return { + "task_id": analyze_social_data.request.id, + "posts_analyzed": len(post_ids), + "timestamp": datetime.utcnow().isoformat(), + } + + +@celery_app.task +def generate_reports(entity_id: str, time_period: str) -> Dict[str, Any]: + """ + Task to generate reports for political entities. + + Args: + entity_id: ID of the political entity + time_period: Time period for the report (e.g., "last_24h", "last_week", "last_month") + + Returns: + Dict with information about the report generation operation + """ + logger.info(f"Generating report for entity {entity_id} for period {time_period}") + + # This would be implemented to generate comprehensive reports + # For now, just return a mock result + return { + "task_id": generate_reports.request.id, + "entity_id": entity_id, + "time_period": time_period, + "report_url": f"/reports/{entity_id}/{time_period}.pdf", # Mock URL + "timestamp": datetime.utcnow().isoformat(), + } + + +@celery_app.task +def process_data_pipeline( + platform: str, + query: str, + entity_id: Optional[str] = None, + time_period: str = "last_24h", +) -> Dict[str, Any]: + """ + Task to coordinate the entire data processing pipeline. + + This is a higher-level task that coordinates the execution of other tasks. + + Args: + platform: The social media platform to scrape + query: The search query + entity_id: Optional entity ID to associate with the analysis + time_period: Time period for the report + + Returns: + Dict with information about the pipeline execution + """ + logger.info(f"Starting data pipeline for {platform}, query: {query}") + + # Execute the pipeline steps + scrape_result = scrape_social_media.delay(platform, query, 100) + # This would normally wait for the scrape to complete and get actual post IDs + mock_post_ids = ["mock_id_1", "mock_id_2", "mock_id_3"] + analysis_result = analyze_social_data.delay(mock_post_ids) + + # If an entity ID is provided, generate a report + report_result = None + if entity_id: + report_result = generate_reports.delay(entity_id, time_period) + + return { + "task_id": process_data_pipeline.request.id, + "scrape_task_id": scrape_result.id, + "analysis_task_id": analysis_result.id, + "report_task_id": report_result.id if report_result else None, + "timestamp": datetime.utcnow().isoformat(), + } \ No newline at end of file diff --git a/backend/celerybeat-schedule b/backend/celerybeat-schedule new file mode 100644 index 0000000000..4a0759c1c1 Binary files /dev/null and b/backend/celerybeat-schedule differ diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 1c77b83ded..586883997a 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "app" version = "0.1.0" -description = "" +description = "A modern FastAPI backend template with SQLModel, PostgreSQL, and JWT authentication" requires-python = ">=3.10,<4.0" dependencies = [ "fastapi[standard]<1.0.0,>=0.114.2", @@ -21,6 +21,28 @@ dependencies = [ "pydantic-settings<3.0.0,>=2.2.1", "sentry-sdk[fastapi]<2.0.0,>=1.40.6", "pyjwt<3.0.0,>=2.8.0", + + # Database Clients + "motor<3.3.0,>=3.2.0", # MongoDB async driver + "pymongo<5.0.0,>=4.5.0", # MongoDB sync driver + "redis<5.0.0,>=4.6.0", # Redis client + "pinecone-client<3.0.0,>=2.2.1", # Pinecone vector DB client + + # Task Processing + "celery<6.0.0,>=5.3.0", # Task queue + "flower<2.0.0,>=1.2.0", # Celery monitoring tool + # "confluent-kafka<2.0.0,>=1.0.0", # Kafka client - requires system dependencies + "aiokafka<1.0.0,>=0.8.1", # Async Kafka client (pure Python alternative) + "pika<2.0.0,>=1.3.2", # RabbitMQ client + + # ML/NLP + "spacy>=3.7.0,<3.8.0", # NLP library + "transformers<5.0.0,>=4.28.0", # Hugging Face Transformers + "sentence-transformers<3.0.0,>=2.2.2", # Sentence embeddings + "scikit-learn<1.4.0,>=1.3.0", # Machine learning library + # "torch>=2.0.0,<3.0.0", # PyTorch - install separately if needed + "numpy<2.0.0,>=1.24.0", # Required for ML operations + "pandas<2.0.0,>=1.5.3", # Data processing ] [tool.uv] @@ -29,6 +51,7 @@ dev-dependencies = [ "mypy<2.0.0,>=1.8.0", "ruff<1.0.0,>=0.2.2", "pre-commit<4.0.0,>=3.6.2", + "pytest-cov<5.0.0,>=4.1.0", "types-passlib<2.0.0.0,>=1.7.7.20240106", "coverage<8.0.0,>=7.4.3", ] diff --git a/docker-compose.override.yml b/docker-compose.override.yml index 0751abe901..151fe7ac01 100644 --- a/docker-compose.override.yml +++ b/docker-compose.override.yml @@ -50,6 +50,93 @@ services: ports: - "5432:5432" + mongodb: + image: mongo:6 + restart: "no" + ports: + - "27017:27017" + environment: + MONGO_INITDB_ROOT_USERNAME: ${MONGO_USER:-mongo} + MONGO_INITDB_ROOT_PASSWORD: ${MONGO_PASSWORD:-mongo} + MONGO_INITDB_DATABASE: ${MONGODB_DATABASE:-political_analysis} + volumes: + - mongodb_data:/data/db + healthcheck: + test: echo 'db.runCommand("ping").ok' | mongosh localhost:27017/test --quiet + interval: 10s + timeout: 10s + retries: 5 + + redis: + image: redis:7-alpine + restart: "no" + ports: + - "6379:6379" + volumes: + - redis_data:/data + healthcheck: + test: [ "CMD", "redis-cli", "ping" ] + interval: 5s + timeout: 5s + retries: 5 + + rabbitmq: + image: rabbitmq:3.12-management + restart: "no" + ports: + - "5672:5672" # AMQP port + - "15672:15672" # Management UI + environment: + - RABBITMQ_DEFAULT_USER=${RABBITMQ_USER:-guest} + - RABBITMQ_DEFAULT_PASS=${RABBITMQ_PASSWORD:-guest} + volumes: + - rabbitmq_data:/var/lib/rabbitmq + healthcheck: + test: [ "CMD", "rabbitmq-diagnostics", "check_port_connectivity" ] + interval: 10s + timeout: 5s + retries: 5 + + zookeeper: + image: bitnami/zookeeper:latest + restart: "no" + ports: + - "2181:2181" + environment: + - ALLOW_ANONYMOUS_LOGIN=yes + volumes: + - zookeeper_data:/bitnami/zookeeper + + kafka: + image: bitnami/kafka:3.4 + restart: "no" + ports: + - "9092:9092" + environment: + - KAFKA_CFG_ZOOKEEPER_CONNECT=zookeeper:2181 + - ALLOW_PLAINTEXT_LISTENER=yes + - KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP=CLIENT:PLAINTEXT,EXTERNAL:PLAINTEXT + - KAFKA_CFG_LISTENERS=CLIENT://:9093,EXTERNAL://:9092 + - KAFKA_CFG_ADVERTISED_LISTENERS=CLIENT://kafka:9093,EXTERNAL://localhost:9092 + - KAFKA_CFG_INTER_BROKER_LISTENER_NAME=CLIENT + volumes: + - kafka_data:/bitnami/kafka + depends_on: + - zookeeper + + mongo-express: + image: mongo-express + restart: "no" + ports: + - "8081:8081" + environment: + ME_CONFIG_MONGODB_ADMINUSERNAME: ${MONGO_USER:-mongo} + ME_CONFIG_MONGODB_ADMINPASSWORD: ${MONGO_PASSWORD:-mongo} + ME_CONFIG_MONGODB_URL: mongodb://${MONGO_USER:-mongo}:${MONGO_PASSWORD:-mongo}@mongodb:27017/ + depends_on: + mongodb: + condition: service_healthy + adminer: restart: "no" ports: @@ -85,6 +172,100 @@ services: SMTP_PORT: "1025" SMTP_TLS: "false" EMAILS_FROM_EMAIL: "noreply@example.com" + MONGODB_SERVER: "mongodb" + MONGODB_PORT: "27017" + MONGODB_USER: ${MONGO_USER:-mongo} + MONGODB_PASSWORD: ${MONGO_PASSWORD:-mongo} + MONGODB_DATABASE: ${MONGODB_DATABASE:-political_analysis} + REDIS_SERVER: "redis" + REDIS_PORT: "6379" + depends_on: + mongodb: + condition: service_healthy + redis: + condition: service_healthy + + celery-worker: + build: + context: ./backend + dockerfile: Dockerfile.celery + restart: "no" + command: celery -A app.tasks.celery_app worker --loglevel=info + volumes: + - ./backend:/app + environment: + MONGODB_SERVER: "mongodb" + MONGODB_PORT: "27017" + MONGODB_USER: ${MONGO_USER:-mongo} + MONGODB_PASSWORD: ${MONGO_PASSWORD:-mongo} + MONGODB_DATABASE: ${MONGODB_DATABASE:-political_analysis} + REDIS_SERVER: "redis" + REDIS_PORT: "6379" + RABBITMQ_SERVER: "rabbitmq" + RABBITMQ_USER: ${RABBITMQ_USER:-guest} + RABBITMQ_PASSWORD: ${RABBITMQ_PASSWORD:-guest} + KAFKA_BOOTSTRAP_SERVERS: "kafka:9093,localhost:9092" + CELERY_BROKER: "amqp://${RABBITMQ_USER:-guest}:${RABBITMQ_PASSWORD:-guest}@rabbitmq:5672//" + depends_on: + - backend + - redis + - mongodb + - rabbitmq + + celery-beat: + build: + context: ./backend + dockerfile: Dockerfile.celery + restart: "no" + command: celery -A app.tasks.celery_app beat --loglevel=info + volumes: + - ./backend:/app + environment: + MONGODB_SERVER: "mongodb" + MONGODB_PORT: "27017" + MONGODB_USER: ${MONGO_USER:-mongo} + MONGODB_PASSWORD: ${MONGO_PASSWORD:-mongo} + MONGODB_DATABASE: ${MONGODB_DATABASE:-political_analysis} + REDIS_SERVER: "redis" + REDIS_PORT: "6379" + RABBITMQ_SERVER: "rabbitmq" + RABBITMQ_USER: ${RABBITMQ_USER:-guest} + RABBITMQ_PASSWORD: ${RABBITMQ_PASSWORD:-guest} + KAFKA_BOOTSTRAP_SERVERS: "kafka:9093,localhost:9092" + CELERY_BROKER: "amqp://${RABBITMQ_USER:-guest}:${RABBITMQ_PASSWORD:-guest}@rabbitmq:5672//" + depends_on: + - celery-worker + + celery-flower: + build: + context: ./backend + dockerfile: Dockerfile.celery + restart: "no" + command: celery flower --broker=amqp://${RABBITMQ_USER:-guest}:${RABBITMQ_PASSWORD:-guest}@rabbitmq:5672// --port=5555 + ports: + - "5555:5555" + environment: + MONGODB_SERVER: "mongodb" + MONGODB_PORT: "27017" + MONGODB_USER: ${MONGO_USER:-mongo} + MONGODB_PASSWORD: ${MONGO_PASSWORD:-mongo} + MONGODB_DATABASE: ${MONGODB_DATABASE:-political_analysis} + REDIS_SERVER: "redis" + REDIS_PORT: "6379" + RABBITMQ_SERVER: "rabbitmq" + RABBITMQ_USER: ${RABBITMQ_USER:-guest} + RABBITMQ_PASSWORD: ${RABBITMQ_PASSWORD:-guest} + CELERY_BROKER: "amqp://${RABBITMQ_USER:-guest}:${RABBITMQ_PASSWORD:-guest}@rabbitmq:5672//" + PROJECT_NAME: "Political Social Media Analysis Platform" + POSTGRES_SERVER: "db" + POSTGRES_PORT: "5432" + POSTGRES_DB: "app" + POSTGRES_USER: "postgres" + POSTGRES_PASSWORD: "changethis" + FIRST_SUPERUSER: "admin@example.com" + FIRST_SUPERUSER_PASSWORD: "password" + depends_on: + - celery-worker mailcatcher: image: schickling/mailcatcher @@ -131,3 +312,10 @@ networks: traefik-public: # For local dev, don't expect an external Traefik network external: false + +volumes: + mongodb_data: + redis_data: + rabbitmq_data: + zookeeper_data: + kafka_data: diff --git a/docker-compose.yml b/docker-compose.yml index c92d5d4451..aa71dabfe9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,7 +4,7 @@ services: image: postgres:12 restart: always healthcheck: - test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"] + test: [ "CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}" ] interval: 10s retries: 5 start_period: 30s @@ -19,6 +19,107 @@ services: - POSTGRES_USER=${POSTGRES_USER?Variable not set} - POSTGRES_DB=${POSTGRES_DB?Variable not set} + # MongoDB service for document data + mongodb: + image: mongo:6.0 + restart: always + networks: + - traefik-public + - default + ports: + - "27017:27017" + environment: + - MONGO_INITDB_ROOT_USERNAME=${MONGO_USER:-mongouser} + - MONGO_INITDB_ROOT_PASSWORD=${MONGO_PASSWORD:-mongopassword} + - MONGO_INITDB_DATABASE=${MONGO_DB:-socialmediadb} + volumes: + - mongodb-data:/data/db + healthcheck: + test: echo 'db.runCommand("ping").ok' | mongosh --quiet + interval: 10s + timeout: 10s + retries: 5 + start_period: 40s + + # Redis service for caching and real-time operations + redis: + image: redis:7.0 + restart: always + networks: + - traefik-public + - default + ports: + - "6379:6379" + volumes: + - redis-data:/data + command: redis-server --appendonly yes + healthcheck: + test: [ "CMD", "redis-cli", "ping" ] + interval: 5s + timeout: 5s + retries: 5 + + # RabbitMQ service for message broker + rabbitmq: + image: rabbitmq:3.12-management + restart: always + networks: + - traefik-public + - default + ports: + - "5672:5672" # AMQP port + - "15672:15672" # Management UI + environment: + - RABBITMQ_DEFAULT_USER=${RABBITMQ_USER:-rabbitmquser} + - RABBITMQ_DEFAULT_PASS=${RABBITMQ_PASSWORD:-rabbitmqpassword} + volumes: + - rabbitmq-data:/var/lib/rabbitmq + healthcheck: + test: [ "CMD", "rabbitmq-diagnostics", "check_port_connectivity" ] + interval: 10s + timeout: 5s + retries: 5 + start_period: 40s + + # Kafka service with Zookeeper for stream processing + zookeeper: + image: bitnami/zookeeper:latest + restart: always + networks: + - traefik-public + - default + ports: + - "2181:2181" + environment: + - ALLOW_ANONYMOUS_LOGIN=yes + volumes: + - zookeeper-data:/bitnami/zookeeper + + kafka: + image: bitnami/kafka:3.4 + restart: always + networks: + - traefik-public + - default + ports: + - "9092:9092" + environment: + - KAFKA_BROKER_ID=1 + - KAFKA_CFG_LISTENERS=PLAINTEXT://:9092 + - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://kafka:9092 + - KAFKA_CFG_ZOOKEEPER_CONNECT=zookeeper:2181 + - ALLOW_PLAINTEXT_LISTENER=yes + volumes: + - kafka-data:/bitnami/kafka + depends_on: + - zookeeper + healthcheck: + test: [ "CMD-SHELL", "kafka-topics.sh --bootstrap-server 127.0.0.1:9092 --list" ] + interval: 10s + timeout: 5s + retries: 5 + start_period: 40s + adminer: image: adminer restart: always @@ -74,6 +175,16 @@ services: - POSTGRES_USER=${POSTGRES_USER?Variable not set} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD?Variable not set} - SENTRY_DSN=${SENTRY_DSN} + - MONGO_SERVER=mongodb + - MONGO_USER=${MONGO_USER:-mongouser} + - MONGO_PASSWORD=${MONGO_PASSWORD:-mongopassword} + - MONGO_DB=${MONGO_DB:-socialmediadb} + - REDIS_SERVER=redis + - REDIS_PORT=6379 + - RABBITMQ_SERVER=rabbitmq + - RABBITMQ_USER=${RABBITMQ_USER:-rabbitmquser} + - RABBITMQ_PASSWORD=${RABBITMQ_PASSWORD:-rabbitmqpassword} + - KAFKA_BOOTSTRAP_SERVERS=kafka:9092 backend: image: '${DOCKER_IMAGE_BACKEND?Variable not set}:${TAG-latest}' @@ -85,6 +196,15 @@ services: db: condition: service_healthy restart: true + mongodb: + condition: service_healthy + restart: true + redis: + condition: service_healthy + restart: true + rabbitmq: + condition: service_healthy + restart: true prestart: condition: service_completed_successfully env_file: @@ -107,9 +227,19 @@ services: - POSTGRES_USER=${POSTGRES_USER?Variable not set} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD?Variable not set} - SENTRY_DSN=${SENTRY_DSN} + - MONGO_SERVER=mongodb + - MONGO_USER=${MONGO_USER:-mongouser} + - MONGO_PASSWORD=${MONGO_PASSWORD:-mongopassword} + - MONGO_DB=${MONGO_DB:-socialmediadb} + - REDIS_SERVER=redis + - REDIS_PORT=6379 + - RABBITMQ_SERVER=rabbitmq + - RABBITMQ_USER=${RABBITMQ_USER:-rabbitmquser} + - RABBITMQ_PASSWORD=${RABBITMQ_PASSWORD:-rabbitmqpassword} + - KAFKA_BOOTSTRAP_SERVERS=kafka:9092 healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000/api/v1/utils/health-check/"] + test: [ "CMD", "curl", "-f", "http://localhost:8000/api/v1/utils/health-check/" ] interval: 10s timeout: 5s retries: 5 @@ -134,6 +264,97 @@ services: # Enable redirection for HTTP and HTTPS - traefik.http.routers.${STACK_NAME?Variable not set}-backend-http.middlewares=https-redirect + # Celery Worker for background task processing + celery-worker: + image: '${DOCKER_IMAGE_BACKEND?Variable not set}:${TAG-latest}' + restart: always + networks: + - traefik-public + - default + depends_on: + db: + condition: service_healthy + restart: true + mongodb: + condition: service_healthy + restart: true + redis: + condition: service_healthy + restart: true + rabbitmq: + condition: service_healthy + restart: true + command: celery -A app.tasks.worker worker --loglevel=info --concurrency=2 + env_file: + - .env + environment: + - DOMAIN=${DOMAIN} + - ENVIRONMENT=${ENVIRONMENT} + - SECRET_KEY=${SECRET_KEY?Variable not set} + - POSTGRES_SERVER=db + - POSTGRES_PORT=${POSTGRES_PORT} + - POSTGRES_DB=${POSTGRES_DB} + - POSTGRES_USER=${POSTGRES_USER?Variable not set} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD?Variable not set} + - SENTRY_DSN=${SENTRY_DSN} + - MONGO_SERVER=mongodb + - MONGO_USER=${MONGO_USER:-mongouser} + - MONGO_PASSWORD=${MONGO_PASSWORD:-mongopassword} + - MONGO_DB=${MONGO_DB:-socialmediadb} + - REDIS_SERVER=redis + - REDIS_PORT=6379 + - RABBITMQ_SERVER=rabbitmq + - RABBITMQ_USER=${RABBITMQ_USER:-rabbitmquser} + - RABBITMQ_PASSWORD=${RABBITMQ_PASSWORD:-rabbitmqpassword} + - KAFKA_BOOTSTRAP_SERVERS=kafka:9092 + - C_FORCE_ROOT=true + + # Celery Beat for scheduled tasks + celery-beat: + image: '${DOCKER_IMAGE_BACKEND?Variable not set}:${TAG-latest}' + restart: always + networks: + - traefik-public + - default + depends_on: + db: + condition: service_healthy + restart: true + mongodb: + condition: service_healthy + restart: true + redis: + condition: service_healthy + restart: true + rabbitmq: + condition: service_healthy + restart: true + celery-worker: + condition: service_started + command: celery -A app.tasks.worker beat --loglevel=info + env_file: + - .env + environment: + - DOMAIN=${DOMAIN} + - ENVIRONMENT=${ENVIRONMENT} + - SECRET_KEY=${SECRET_KEY?Variable not set} + - POSTGRES_SERVER=db + - POSTGRES_PORT=${POSTGRES_PORT} + - POSTGRES_DB=${POSTGRES_DB} + - POSTGRES_USER=${POSTGRES_USER?Variable not set} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD?Variable not set} + - SENTRY_DSN=${SENTRY_DSN} + - MONGO_SERVER=mongodb + - MONGO_USER=${MONGO_USER:-mongouser} + - MONGO_PASSWORD=${MONGO_PASSWORD:-mongopassword} + - MONGO_DB=${MONGO_DB:-socialmediadb} + - REDIS_SERVER=redis + - REDIS_PORT=6379 + - RABBITMQ_SERVER=rabbitmq + - RABBITMQ_USER=${RABBITMQ_USER:-rabbitmquser} + - RABBITMQ_PASSWORD=${RABBITMQ_PASSWORD:-rabbitmqpassword} + - KAFKA_BOOTSTRAP_SERVERS=kafka:9092 + frontend: image: '${DOCKER_IMAGE_FRONTEND?Variable not set}:${TAG-latest}' restart: always @@ -162,8 +383,15 @@ services: # Enable redirection for HTTP and HTTPS - traefik.http.routers.${STACK_NAME?Variable not set}-frontend-http.middlewares=https-redirect + volumes: app-db-data: + mongodb-data: + redis-data: + rabbitmq-data: + zookeeper-data: + kafka-data: + networks: traefik-public: