From 9d25a010820e4c9a34524813495e08897dc98877 Mon Sep 17 00:00:00 2001
From: Pascal <admin@pixelogi.com>
Date: Wed, 4 Jun 2025 15:41:03 +0200
Subject: [PATCH] Adding sql lite et reorganizing services folder

---
 ETF_Portal/services/data_service.py           | 144 +++--
 ETF_Portal/services/etf_selection_service.py  | 491 ------------------
 .../etf_selection_service/__init__.py         |  23 +
 .../etf_selection_service/database.py         | 179 +++++++
 .../etf_selection_service/exceptions.py       |  31 ++
 .../services/etf_selection_service/models.py  |  52 ++
 .../services/etf_selection_service/service.py | 418 +++++++++++++++
 .../services/etf_selection_service/utils.py   |  49 ++
 etf_data.db                                   | Bin 0 -> 20480 bytes
 pages/ETF_Portfolio_Builder.py                |  20 +-
 10 files changed, 854 insertions(+), 553 deletions(-)
 delete mode 100644 ETF_Portal/services/etf_selection_service.py
 create mode 100644 ETF_Portal/services/etf_selection_service/__init__.py
 create mode 100644 ETF_Portal/services/etf_selection_service/database.py
 create mode 100644 ETF_Portal/services/etf_selection_service/exceptions.py
 create mode 100644 ETF_Portal/services/etf_selection_service/models.py
 create mode 100644 ETF_Portal/services/etf_selection_service/service.py
 create mode 100644 ETF_Portal/services/etf_selection_service/utils.py
 create mode 100644 etf_data.db

diff --git a/ETF_Portal/services/data_service.py b/ETF_Portal/services/data_service.py
index 6968a37..76df0ed 100644
--- a/ETF_Portal/services/data_service.py
+++ b/ETF_Portal/services/data_service.py
@@ -11,6 +11,7 @@ from typing import Dict, Optional, List
 import yfinance as yf
 import logging
 from pathlib import Path
+from ..api.factory import APIFactory
 
 logger = logging.getLogger(__name__)
 
@@ -32,6 +33,9 @@ class DataService:
         for directory in [self.cache_dir, self.yf_cache_dir, self.fmp_cache_dir, 
                          self.fmp_profiles_dir, self.fmp_historical_dir, self.fmp_holdings_dir]:
             directory.mkdir(parents=True, exist_ok=True)
+            
+        # Initialize API factory
+        self.api_factory = APIFactory()
         
     def get_etf_data(self, ticker: str) -> Dict:
         """Get ETF data using fallback logic:
@@ -196,8 +200,81 @@ class DataService:
     
     def _get_from_fmp(self, ticker: str) -> Optional[Dict]:
         """Get data from FMP API"""
-        # TODO: Implement FMP API integration
-        return None
+        try:
+            # Get FMP client
+            fmp_client = self.api_factory.get_client('fmp')
+            
+            # Get ETF profile
+            profile = fmp_client.get_etf_profile(ticker)
+            if not profile:
+                return None
+                
+            # Get historical data
+            hist_data = fmp_client.get_etf_historical_data(ticker)
+            if hist_data.empty:
+                return None
+                
+            # Get holdings
+            holdings = fmp_client.get_etf_holdings(ticker)
+            
+            # Get dividend history
+            dividend_history = fmp_client.get_dividend_history(ticker)
+            
+            # Get sector weightings
+            sector_weightings = fmp_client.get_sector_weightings(ticker)
+            
+            # Calculate metrics
+            hist_data['log_returns'] = np.log(hist_data['close'] / hist_data['close'].shift(1))
+            returns = hist_data['log_returns'].dropna()
+            
+            # Calculate annualized volatility using daily log returns
+            volatility = returns.std() * np.sqrt(252)
+            
+            # Calculate max drawdown using rolling window
+            rolling_max = hist_data['close'].rolling(window=252, min_periods=1).max()
+            daily_drawdown = hist_data['close'] / rolling_max - 1.0
+            max_drawdown = abs(daily_drawdown.min())
+            
+            # Calculate Sharpe ratio (assuming risk-free rate of 0.02)
+            risk_free_rate = 0.02
+            excess_returns = returns - risk_free_rate/252
+            sharpe_ratio = np.sqrt(252) * excess_returns.mean() / returns.std()
+            
+            # Calculate Sortino ratio
+            downside_returns = returns[returns < 0]
+            sortino_ratio = np.sqrt(252) * excess_returns.mean() / downside_returns.std()
+            
+            # Calculate dividend trend
+            if not dividend_history.empty:
+                dividend_history['date'] = pd.to_datetime(dividend_history['date'])
+                dividend_history = dividend_history.sort_values('date')
+                dividend_trend = dividend_history['dividend'].pct_change().mean() * 100
+            else:
+                dividend_trend = 0.0
+            
+            # Calculate age in years
+            if 'inceptionDate' in profile:
+                inception_date = pd.to_datetime(profile['inceptionDate'])
+                age_years = (pd.Timestamp.now() - inception_date).days / 365.25
+            else:
+                age_years = 0.0
+            
+            return {
+                'info': profile,
+                'hist': hist_data.to_dict('records'),
+                'holdings': holdings,
+                'volatility': volatility,
+                'max_drawdown': max_drawdown,
+                'sharpe_ratio': sharpe_ratio,
+                'sortino_ratio': sortino_ratio,
+                'dividend_trend': dividend_trend,
+                'age_years': age_years,
+                'is_new': age_years < 2
+            }
+            
+        except Exception as e:
+            logger.error(f"Error fetching FMP data for {ticker}: {str(e)}")
+            return None
     
     def _get_from_yfinance(self, ticker: str) -> Optional[Dict]:
         """Get data from yfinance"""
@@ -342,60 +419,19 @@ class DataService:
         }
     
     def get_etf_list(self) -> List[str]:
-        """Get list of available ETFs with fallback logic:
-        1. Try FMP API
-        2. Try yfinance
-        3. Use default list as last resort
-        """
+        """Get list of available ETFs"""
         try:
-            # Try FMP API first
-            api_key = os.environ.get("FMP_API_KEY")
-            if api_key:
-                from ..api import APIFactory
-                api_factory = APIFactory(api_key)
-                fmp_client = api_factory.get_client('fmp')
-                
-                etfs = fmp_client.get_all_etfs()
-                if etfs:
-                    logger.info(f"Retrieved {len(etfs)} ETFs from FMP API")
-                    return [etf['symbol'] for etf in etfs if etf.get('symbol')]
-            
-            # Try yfinance as fallback
-            try:
-                import yfinance as yf
-                # Get list of ETFs from yfinance
-                etf_list = yf.download("^GSPC", period="1d", progress=False).index  # Just to initialize yfinance
-                etf_list = yf.download("^GSPC", period="1d", progress=False).index  # Get S&P 500 components
-                if not etf_list.empty:
-                    logger.info("Retrieved ETF list from yfinance")
-                    return list(etf_list)
-            except Exception as e:
-                logger.warning(f"Error getting ETF list from yfinance: {str(e)}")
-            
-            # Use default list as last resort
-            logger.warning("Using default ETF list as fallback")
-            return [
-                # Core ETFs
-                'VTI', 'VOO', 'VEA', 'VWO', 'BND',  # Vanguard
-                'SPY', 'QQQ', 'DIA', 'IWM', 'EFA',  # iShares
-                'AGG', 'TLT', 'LQD', 'HYG',         # Fixed Income
-                # Income ETFs
-                'JEPI', 'FEPI', 'MSTY', 'SCHD', 'VIG',
-                # Sector ETFs
-                'XLK', 'XLF', 'XLV', 'XLE', 'XLU',  # Tech, Financial, Healthcare, Energy, Utilities
-                # Factor ETFs
-                'MTUM', 'VLUE', 'QUAL', 'SIZE',     # Momentum, Value, Quality, Size
-                # International
-                'EWJ', 'EWU', 'EWZ', 'EWC',         # Japan, UK, Brazil, Canada
-                # Alternative
-                'GLD', 'VNQ', 'REM'                 # Gold, REITs, Mortgage REITs
+            # Define a list of high-yield ETFs to track
+            etf_list = [
+                'JEPI', 'JEPQ', 'FEPI', 'CONY', 'MSTY', 'SDIV', 'DIV', 'VIGI',
+                'VYM', 'VIG', 'DVY', 'SCHD', 'DGRO', 'VIGI', 'VIG', 'VYM',
+                'DVY', 'SCHD', 'DGRO', 'VIGI', 'VIG', 'VYM', 'DVY', 'SCHD',
+                'DGRO', 'VIGI', 'VIG', 'VYM', 'DVY', 'SCHD', 'DGRO'
             ]
             
+            # Remove duplicates while preserving order
+            return list(dict.fromkeys(etf_list))
+            
         except Exception as e:
             logger.error(f"Error getting ETF list: {str(e)}")
-            # Return default list as last resort
-            return [
-                'VTI', 'VOO', 'SPY', 'QQQ', 'BND',  # Core ETFs
-                'JEPI', 'FEPI', 'MSTY', 'SCHD',     # Income ETFs
-                'XLK', 'XLF', 'XLV', 'XLE', 'XLU'   # Sector ETFs
-            ] 
\ No newline at end of file
+            return [] 
\ No newline at end of file
diff --git a/ETF_Portal/services/etf_selection_service.py b/ETF_Portal/services/etf_selection_service.py
deleted file mode 100644
index 7a150a6..0000000
--- a/ETF_Portal/services/etf_selection_service.py
+++ /dev/null
@@ -1,491 +0,0 @@
-"""
-ETF Selection Service for optimizing ETF selection based on investment goals
-"""
-
-import logging
-from typing import Dict, List, Optional, Tuple
-from dataclasses import dataclass
-from enum import Enum
-import pandas as pd
-import numpy as np
-from datetime import datetime
-import os
-
-logger = logging.getLogger(__name__)
-
-class RiskTolerance(Enum):
-    CONSERVATIVE = "Conservative"
-    MODERATE = "Moderate"
-    AGGRESSIVE = "Aggressive"
-
-@dataclass
-class InvestmentGoal:
-    capital_target: float
-    income_target: Optional[float] = None
-    risk_tolerance: RiskTolerance = RiskTolerance.MODERATE
-    investment_horizon: int = 5  # years
-
-class ETFSelectionService:
-    def __init__(self, data_service):
-        self.data_service = data_service
-        self.selection_criteria = {
-            'tier1': {
-                'expense_ratio': 0.10,  # 0.10% or less
-                'aum': 5_000_000_000,   # $5B or more
-                'tracking_error': 0.05,  # 0.05% or less
-                'avg_volume': 100_000    # 100K shares/day
-            },
-            'tier2': {
-                'expense_ratio': 0.25,  # 0.25% or less
-                'aum': 1_000_000_000,   # $1B or more
-                'tracking_error': 0.10,  # 0.10% or less
-                'avg_volume': 50_000     # 50K shares/day
-            }
-        }
-        
-    def select_etfs(self, goal: InvestmentGoal) -> List[Dict]:
-        """
-        Select ETFs based on investment goals and risk tolerance
-        Returns a list of recommended ETFs with allocation percentages
-        """
-        try:
-            logger.info(f"Starting ETF selection with goal: {goal}")
-            
-            # Validate investment goal
-            self._validate_investment_goal(goal)
-            
-            # Get all available ETFs
-            etfs = self._get_available_etfs()
-            if not etfs:
-                error_msg = "No ETFs available for selection. Please check your data source connection."
-                logger.error(error_msg)
-                raise ValueError(error_msg)
-            
-            logger.info(f"Found {len(etfs)} available ETFs")
-            
-            # Filter ETFs based on criteria
-            filtered_etfs = self._filter_etfs(etfs, goal)
-            if not filtered_etfs:
-                error_msg = (
-                    "No ETFs passed the filtering criteria. This could be due to:\n"
-                    "1. High expense ratios (>0.25%)\n"
-                    "2. Low assets under management (<$1B)\n"
-                    "3. High tracking error (>0.10%)\n"
-                    "4. Low trading volume (<50K shares/day)\n"
-                    "Please try adjusting your risk tolerance or investment goals."
-                )
-                logger.error(error_msg)
-                raise ValueError(error_msg)
-            
-            logger.info(f"{len(filtered_etfs)} ETFs passed filtering criteria")
-            
-            # Score ETFs based on criteria
-            scored_etfs = self._score_etfs(filtered_etfs, goal)
-            if not scored_etfs:
-                error_msg = (
-                    "No ETFs passed the scoring criteria. This could be due to:\n"
-                    "1. Poor risk-adjusted returns (low Sharpe ratio)\n"
-                    "2. High volatility\n"
-                    "3. Large drawdowns\n"
-                    "4. Insufficient dividend yield for income goals\n"
-                    "Please try adjusting your risk tolerance or investment goals."
-                )
-                logger.error(error_msg)
-                raise ValueError(error_msg)
-            
-            logger.info(f"{len(scored_etfs)} ETFs passed scoring criteria")
-            
-            # Optimize portfolio allocation
-            portfolio = self._optimize_portfolio(scored_etfs, goal)
-            if not portfolio:
-                error_msg = (
-                    "Failed to optimize portfolio allocation. This could be due to:\n"
-                    "1. Insufficient diversification opportunities\n"
-                    "2. Conflicting investment goals\n"
-                    "3. Risk tolerance constraints\n"
-                    "Please try adjusting your investment goals or risk tolerance."
-                )
-                logger.error(error_msg)
-                raise ValueError(error_msg)
-            
-            logger.info(f"Successfully generated portfolio with {len(portfolio)} ETFs")
-            return portfolio
-            
-        except ValueError as ve:
-            # Re-raise ValueError with the same message
-            raise ve
-        except Exception as e:
-            error_msg = f"Unexpected error during ETF selection: {str(e)}"
-            logger.error(error_msg, exc_info=True)
-            raise ValueError(f"Unable to generate portfolio suggestions: {str(e)}")
-    
-    def _validate_investment_goal(self, goal: InvestmentGoal) -> None:
-        """Validate investment goal parameters"""
-        if goal.capital_target <= 0:
-            raise ValueError("Capital target must be greater than 0")
-        
-        if goal.income_target and goal.income_target <= 0:
-            raise ValueError("Income target must be greater than 0")
-        
-        if not isinstance(goal.risk_tolerance, RiskTolerance):
-            raise ValueError("Risk tolerance must be a valid RiskTolerance enum value")
-    
-    def _get_available_etfs(self) -> List[Dict]:
-        """Get list of available ETFs with their data"""
-        try:
-            # Get list of ETFs
-            etf_list = self.data_service.get_etf_list()
-            if not etf_list:
-                error_msg = "No ETFs available from data service. Please check your data source connection."
-                logger.error(error_msg)
-                raise ValueError(error_msg)
-            
-            logger.info(f"Retrieved {len(etf_list)} ETFs from data service")
-            
-            # Process each ETF
-            processed_etfs = []
-            for ticker in etf_list:
-                try:
-                    # Get ETF data
-                    etf_data = self.data_service.get_etf_data(ticker)
-                    if not etf_data:
-                        logger.warning(f"No data available for {ticker}")
-                        continue
-                    
-                    # Process ETF data
-                    processed_etf = {
-                        'ticker': ticker,
-                        'name': etf_data.get('info', {}).get('longName', ticker),
-                        'expense_ratio': etf_data.get('info', {}).get('annualReportExpenseRatio', 0.5) / 100,
-                        'aum': etf_data.get('info', {}).get('totalAssets', 0),
-                        'avg_volume': etf_data.get('info', {}).get('averageVolume', 0),
-                        'tracking_error': 0.0,  # Not available from yfinance
-                        'volatility': float(etf_data.get('volatility', 0)),
-                        'max_drawdown': float(etf_data.get('max_drawdown', 0)),
-                        'sharpe_ratio': float(etf_data.get('sharpe_ratio', 0)),
-                        'top_holding_weight': 0.0,  # Not available from yfinance
-                        'dividend_yield': float(etf_data.get('dividend_yield', 0)) / 100,
-                        'category': etf_data.get('info', {}).get('category', 'Unknown'),
-                        'asset_class': etf_data.get('info', {}).get('assetClass', 'Unknown'),
-                        'sector': etf_data.get('info', {}).get('sector', 'Unknown'),
-                        'region': etf_data.get('info', {}).get('region', 'Unknown'),
-                        'strategy': etf_data.get('info', {}).get('strategy', 'Unknown')
-                    }
-                    
-                    processed_etfs.append(processed_etf)
-                    
-                except Exception as e:
-                    logger.warning(f"Error processing ETF {ticker}: {str(e)}")
-                    continue
-            
-            if not processed_etfs:
-                error_msg = "No ETFs could be processed. Please check your data source connection."
-                logger.error(error_msg)
-                raise ValueError(error_msg)
-            
-            logger.info(f"Successfully processed {len(processed_etfs)} ETFs")
-            return processed_etfs
-            
-        except Exception as e:
-            error_msg = f"Error fetching available ETFs: {str(e)}"
-            logger.error(error_msg)
-            raise ValueError(error_msg)
-    
-    def _get_etfs_from_yfinance(self) -> List[Dict]:
-        """Get ETF data from yfinance as fallback"""
-        try:
-            # Get list of ETFs from data service
-            etf_list = self.data_service.get_etf_list()
-            if not etf_list:
-                logger.error("No ETF list available from data service")
-                return []
-            
-            processed_etfs = []
-            
-            for ticker in etf_list:
-                try:
-                    # Get ETF data from data service
-                    etf_data = self.data_service.get_etf_data(ticker)
-                    if not etf_data:
-                        continue
-                    
-                    # Process ETF data
-                    processed_etf = {
-                        'ticker': ticker,
-                        'name': etf_data.get('info', {}).get('longName', ticker),
-                        'expense_ratio': etf_data.get('info', {}).get('annualReportExpenseRatio', 0.5) / 100,
-                        'aum': etf_data.get('info', {}).get('totalAssets', 0),
-                        'avg_volume': etf_data.get('info', {}).get('averageVolume', 0),
-                        'tracking_error': 0.0,  # Not available from yfinance
-                        'volatility': float(etf_data.get('volatility', 0)),
-                        'max_drawdown': float(etf_data.get('max_drawdown', 0)),
-                        'sharpe_ratio': float(etf_data.get('sharpe_ratio', 0)),
-                        'top_holding_weight': 0.0,  # Not available from yfinance
-                        'dividend_yield': float(etf_data.get('dividend_yield', 0)) / 100,
-                        'category': etf_data.get('info', {}).get('category', 'Unknown'),
-                        'asset_class': etf_data.get('info', {}).get('assetClass', 'Unknown'),
-                        'sector': etf_data.get('info', {}).get('sector', 'Unknown'),
-                        'region': etf_data.get('info', {}).get('region', 'Unknown'),
-                        'strategy': etf_data.get('info', {}).get('strategy', 'Unknown')
-                    }
-                    
-                    processed_etfs.append(processed_etf)
-                    
-                except Exception as e:
-                    logger.warning(f"Error processing ETF {ticker} from yfinance: {str(e)}")
-                    continue
-            
-            return processed_etfs
-            
-        except Exception as e:
-            logger.error(f"Error fetching ETFs from yfinance: {str(e)}")
-            return []
-    
-    def _filter_etfs(self, etfs: List[Dict], goal: InvestmentGoal) -> List[Dict]:
-        """Filter ETFs based on selection criteria and investment goals"""
-        filtered = []
-        
-        for etf in etfs:
-            try:
-                # Skip ETFs with red flags
-                if self._has_red_flags(etf):
-                    continue
-                
-                # Apply investment goal specific filters
-                if goal.income_target:
-                    # For income-focused goals, ensure minimum dividend yield
-                    min_yield = 0.02  # 2% minimum yield
-                    if goal.income_target > 0.05:  # If income target is high
-                        min_yield = 0.04  # Require higher yield
-                    if etf.get('dividend_yield', 0) < min_yield:
-                        continue
-                
-                # Apply risk tolerance specific filters
-                if goal.risk_tolerance == RiskTolerance.CONSERVATIVE:
-                    # For conservative investors, focus on:
-                    # - Lower volatility
-                    # - Higher dividend yield
-                    # - Lower max drawdown
-                    # - More established ETFs
-                    if (etf.get('volatility', 1.0) > 0.15 or
-                        etf.get('max_drawdown', 1.0) > 0.20 or
-                        etf.get('aum', 0) < 1_000_000_000 or  # $1B minimum
-                        etf.get('asset_class', '').lower() in ['leveraged', 'inverse']):
-                        continue
-                elif goal.risk_tolerance == RiskTolerance.AGGRESSIVE:
-                    # For aggressive investors, focus on:
-                    # - Higher potential returns (Sharpe ratio)
-                    # - Growth potential
-                    # - Sector/theme exposure
-                    if (etf.get('sharpe_ratio', 0) < 0.8 or
-                        etf.get('category', '').lower() in ['bond', 'fixed income'] or
-                        etf.get('aum', 0) < 500_000_000):  # $500M minimum
-                        continue
-                
-                # Check if ETF meets tier criteria
-                tier = self._get_etf_tier(etf)
-                if tier > 0:  # Only include ETFs that meet at least tier 2 criteria
-                    etf['tier'] = tier
-                    filtered.append(etf)
-                    
-            except Exception as e:
-                logger.warning(f"Error filtering ETF {etf.get('ticker', 'unknown')}: {str(e)}")
-                continue
-        
-        return filtered
-    
-    def _has_red_flags(self, etf: Dict) -> bool:
-        """Check if ETF has any red flags"""
-        try:
-            # High expense ratio
-            if etf.get('expense_ratio', 1.0) > 0.50:
-                logger.debug(f"ETF {etf.get('ticker')} rejected: High expense ratio")
-                return True
-                
-            # Small AUM
-            if etf.get('aum', 0) < 100_000_000:  # $100M
-                logger.debug(f"ETF {etf.get('ticker')} rejected: Small AUM")
-                return True
-                
-            # High tracking error
-            if etf.get('tracking_error', 1.0) > 0.50:
-                logger.debug(f"ETF {etf.get('ticker')} rejected: High tracking error")
-                return True
-                
-            # Concentrated holdings
-            if etf.get('top_holding_weight', 0) > 0.20:  # 20%
-                logger.debug(f"ETF {etf.get('ticker')} rejected: Concentrated holdings")
-                return True
-                
-            return False
-            
-        except Exception as e:
-            logger.warning(f"Error checking red flags for ETF {etf.get('ticker', 'unknown')}: {str(e)}")
-            return True
-    
-    def _get_etf_tier(self, etf: Dict) -> int:
-        """Determine ETF tier based on criteria"""
-        try:
-            # Tier 1 criteria
-            if (etf.get('expense_ratio', 1.0) <= self.selection_criteria['tier1']['expense_ratio'] and
-                etf.get('aum', 0) >= self.selection_criteria['tier1']['aum'] and
-                etf.get('tracking_error', 1.0) <= self.selection_criteria['tier1']['tracking_error'] and
-                etf.get('avg_volume', 0) >= self.selection_criteria['tier1']['avg_volume']):
-                return 1
-                
-            # Tier 2 criteria
-            if (etf.get('expense_ratio', 1.0) <= self.selection_criteria['tier2']['expense_ratio'] and
-                etf.get('aum', 0) >= self.selection_criteria['tier2']['aum'] and
-                etf.get('tracking_error', 1.0) <= self.selection_criteria['tier2']['tracking_error'] and
-                etf.get('avg_volume', 0) >= self.selection_criteria['tier2']['avg_volume']):
-                return 2
-                
-            return 0
-            
-        except Exception as e:
-            logger.warning(f"Error determining tier for ETF {etf.get('ticker', 'unknown')}: {str(e)}")
-            return 0
-    
-    def _score_etfs(self, etfs: List[Dict], goal: InvestmentGoal) -> List[Dict]:
-        """Score ETFs based on investment goals and risk tolerance"""
-        scored = []
-        
-        for etf in etfs:
-            try:
-                score = 0
-                score_components = {}
-                
-                # Base score from tier
-                tier_score = (3 - etf['tier']) * 10  # Higher score for better tier
-                score += tier_score
-                score_components['tier'] = tier_score
-                
-                # Risk-adjusted return score
-                if etf.get('sharpe_ratio'):
-                    sharpe_score = min(etf['sharpe_ratio'] * 5, 20)  # Max 20 points
-                    score += sharpe_score
-                    score_components['sharpe'] = sharpe_score
-                    
-                # Volatility score (lower is better)
-                if etf.get('volatility'):
-                    vol_score = max(0, 20 - (etf['volatility'] * 100))  # Max 20 points
-                    score += vol_score
-                    score_components['volatility'] = vol_score
-                    
-                # Income goal specific scoring
-                if goal.income_target:
-                    # Score based on dividend yield and stability
-                    if etf.get('dividend_yield'):
-                        # Higher score for ETFs with yield closer to target
-                        target_yield = goal.income_target / goal.capital_target
-                        yield_diff = abs(etf['dividend_yield'] - target_yield)
-                        div_score = max(0, 20 - (yield_diff * 100))  # Max 20 points
-                        score += div_score
-                        score_components['dividend'] = div_score
-                        
-                        # Bonus for stable dividends
-                        if etf.get('dividend_trend', 0) > 0:
-                            score += 5
-                            score_components['dividend_stability'] = 5
-                
-                # AUM score
-                aum_billions = etf.get('aum', 0) / 1_000_000_000
-                aum_score = min(aum_billions, 10)  # Max 10 points
-                score += aum_score
-                score_components['aum'] = aum_score
-                
-                # Risk tolerance specific scoring
-                if goal.risk_tolerance == RiskTolerance.CONSERVATIVE:
-                    # Favor stability and income
-                    if etf.get('dividend_yield', 0) > 0.03:  # 3% yield
-                        score += 10
-                        score_components['income_focus'] = 10
-                    if etf.get('volatility', 1.0) < 0.12:  # Low volatility
-                        score += 10
-                        score_components['stability'] = 10
-                    if etf.get('asset_class', '').lower() in ['equity', 'fixed income']:
-                        score += 5
-                        score_components['asset_class'] = 5
-                elif goal.risk_tolerance == RiskTolerance.AGGRESSIVE:
-                    # Favor growth and momentum
-                    if etf.get('sharpe_ratio', 0) > 1.2:  # High Sharpe
-                        score += 10
-                        score_components['risk_adjusted_return'] = 10
-                    if etf.get('category', '').lower() in ['technology', 'growth']:
-                        score += 10
-                        score_components['growth_potential'] = 10
-                    if etf.get('strategy', '').lower() in ['momentum', 'growth']:
-                        score += 5
-                        score_components['strategy'] = 5
-                
-                # Add score and components to ETF data
-                etf['score'] = score
-                etf['score_components'] = score_components
-                scored.append(etf)
-                
-            except Exception as e:
-                logger.warning(f"Error scoring ETF {etf.get('ticker', 'unknown')}: {str(e)}")
-                continue
-        
-        # Sort by score in descending order
-        scored.sort(key=lambda x: x.get('score', 0), reverse=True)
-        return scored
-    
-    def _optimize_portfolio(self, scored_etfs: List[Dict], goal: InvestmentGoal) -> List[Dict]:
-        """Optimize portfolio allocation based on investment goals"""
-        try:
-            # Select top ETFs based on score
-            top_etfs = scored_etfs[:5]  # Limit to top 5 ETFs
-            
-            # Calculate initial weights based on scores
-            total_score = sum(etf['score'] for etf in top_etfs)
-            if total_score == 0:
-                raise ValueError("No valid scores for portfolio optimization")
-                
-            # Assign weights based on scores
-            for etf in top_etfs:
-                etf['weight'] = etf['score'] / total_score
-                
-            # Adjust weights based on risk tolerance
-            if goal.risk_tolerance == RiskTolerance.CONSERVATIVE:
-                # Favor ETFs with lower volatility and higher Sharpe ratio
-                for etf in top_etfs:
-                    if etf.get('volatility', 0) > 0.15 or etf.get('sharpe_ratio', 0) < 1.0:
-                        etf['weight'] *= 0.5
-            elif goal.risk_tolerance == RiskTolerance.AGGRESSIVE:
-                # Favor ETFs with higher potential returns
-                for etf in top_etfs:
-                    if etf.get('sharpe_ratio', 0) > 1.5:
-                        etf['weight'] *= 1.5
-                        
-            # Normalize weights
-            total_weight = sum(etf['weight'] for etf in top_etfs)
-            for etf in top_etfs:
-                etf['weight'] /= total_weight
-                
-            # Format portfolio output
-            portfolio = []
-            for etf in top_etfs:
-                portfolio.append({
-                    'ticker': etf['ticker'],
-                    'name': etf['name'],
-                    'allocation': etf['weight'],
-                    'amount': goal.capital_target * etf['weight'],
-                    'score': etf['score'],
-                    'tier': etf['tier'],
-                    'metrics': {
-                        'expense_ratio': etf.get('expense_ratio'),
-                        'aum': etf.get('aum'),
-                        'tracking_error': etf.get('tracking_error'),
-                        'volatility': etf.get('volatility'),
-                        'sharpe_ratio': etf.get('sharpe_ratio'),
-                        'dividend_yield': etf.get('dividend_yield'),
-                        'max_drawdown': etf.get('max_drawdown')
-                    }
-                })
-                
-            return portfolio
-            
-        except Exception as e:
-            logger.error(f"Error optimizing portfolio: {str(e)}")
-            raise ValueError(f"Failed to optimize portfolio allocation: {str(e)}") 
\ No newline at end of file
diff --git a/ETF_Portal/services/etf_selection_service/__init__.py b/ETF_Portal/services/etf_selection_service/__init__.py
new file mode 100644
index 0000000..13bd70f
--- /dev/null
+++ b/ETF_Portal/services/etf_selection_service/__init__.py
@@ -0,0 +1,23 @@
+"""
+ETF Selection Service package initialization
+"""
+
+from .service import ETFSelectionService
+from .models import InvestmentGoal, RiskTolerance, ETF, ETFUniverse
+from .exceptions import (
+    ETFSelectionError, ETFDataError, ETFNotFoundError,
+    ValidationError, PortfolioOptimizationError
+)
+
+__all__ = [
+    'ETFSelectionService',
+    'InvestmentGoal',
+    'RiskTolerance',
+    'ETF',
+    'ETFUniverse',
+    'ETFSelectionError',
+    'ETFDataError',
+    'ETFNotFoundError',
+    'ValidationError',
+    'PortfolioOptimizationError'
+] 
\ No newline at end of file
diff --git a/ETF_Portal/services/etf_selection_service/database.py b/ETF_Portal/services/etf_selection_service/database.py
new file mode 100644
index 0000000..0c10471
--- /dev/null
+++ b/ETF_Portal/services/etf_selection_service/database.py
@@ -0,0 +1,179 @@
+"""
+Database operations for ETF Selection Service
+"""
+
+import logging
+import sqlite3
+from typing import Dict, List, Optional
+from datetime import datetime
+import json
+
+from .models import ETF, ETFUniverse
+from .exceptions import DatabaseError, DataUpdateError
+
+logger = logging.getLogger(__name__)
+
+class ETFDatabase:
+    def __init__(self, db_path: str = "etf_data.db"):
+        self.db_path = db_path
+        self._init_db()
+    
+    def _init_db(self):
+        """Initialize database tables"""
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                
+                # Create ETFs table
+                cursor.execute("""
+                    CREATE TABLE IF NOT EXISTS etfs (
+                        ticker TEXT PRIMARY KEY,
+                        name TEXT,
+                        expense_ratio REAL,
+                        aum REAL,
+                        avg_volume INTEGER,
+                        tracking_error REAL,
+                        volatility REAL,
+                        max_drawdown REAL,
+                        sharpe_ratio REAL,
+                        top_holding_weight REAL,
+                        dividend_yield REAL,
+                        category TEXT,
+                        asset_class TEXT,
+                        sector TEXT,
+                        region TEXT,
+                        strategy TEXT,
+                        last_updated TEXT
+                    )
+                """)
+                
+                # Create categories table
+                cursor.execute("""
+                    CREATE TABLE IF NOT EXISTS categories (
+                        category TEXT PRIMARY KEY,
+                        etfs TEXT,
+                        last_updated TEXT
+                    )
+                """)
+                
+                conn.commit()
+        except sqlite3.Error as e:
+            raise DatabaseError(f"Failed to initialize database: {str(e)}")
+    
+    def save_etf(self, etf: ETF):
+        """Save ETF data to database"""
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute("""
+                    INSERT OR REPLACE INTO etfs (
+                        ticker, name, expense_ratio, aum, avg_volume,
+                        tracking_error, volatility, max_drawdown,
+                        sharpe_ratio, top_holding_weight, dividend_yield,
+                        category, asset_class, sector, region, strategy,
+                        last_updated
+                    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                """, (
+                    etf.ticker, etf.name, etf.expense_ratio, etf.aum,
+                    etf.avg_volume, etf.tracking_error, etf.volatility,
+                    etf.max_drawdown, etf.sharpe_ratio, etf.top_holding_weight,
+                    etf.dividend_yield, etf.category, etf.asset_class,
+                    etf.sector, etf.region, etf.strategy,
+                    datetime.now().isoformat()
+                ))
+                conn.commit()
+        except sqlite3.Error as e:
+            raise DataUpdateError(f"Failed to save ETF data: {str(e)}")
+    
+    def get_etf(self, ticker: str) -> Optional[ETF]:
+        """Get ETF data from database"""
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute("SELECT * FROM etfs WHERE ticker = ?", (ticker,))
+                row = cursor.fetchone()
+                
+                if row:
+                    return ETF(
+                        ticker=row[0],
+                        name=row[1],
+                        expense_ratio=row[2],
+                        aum=row[3],
+                        avg_volume=row[4],
+                        tracking_error=row[5],
+                        volatility=row[6],
+                        max_drawdown=row[7],
+                        sharpe_ratio=row[8],
+                        top_holding_weight=row[9],
+                        dividend_yield=row[10],
+                        category=row[11],
+                        asset_class=row[12],
+                        sector=row[13],
+                        region=row[14],
+                        strategy=row[15]
+                    )
+                return None
+        except sqlite3.Error as e:
+            raise DatabaseError(f"Failed to get ETF data: {str(e)}")
+    
+    def save_universe(self, universe: ETFUniverse):
+        """Save ETF universe to database"""
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                
+                # Save categories
+                for category, etfs in universe.categories.items():
+                    cursor.execute("""
+                        INSERT OR REPLACE INTO categories (
+                            category, etfs, last_updated
+                        ) VALUES (?, ?, ?)
+                    """, (
+                        category,
+                        json.dumps(etfs),
+                        datetime.now().isoformat()
+                    ))
+                
+                conn.commit()
+        except sqlite3.Error as e:
+            raise DataUpdateError(f"Failed to save ETF universe: {str(e)}")
+    
+    def get_universe(self) -> ETFUniverse:
+        """Get ETF universe from database"""
+        try:
+            universe = ETFUniverse()
+            
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                
+                # Get all ETFs
+                cursor.execute("SELECT * FROM etfs")
+                for row in cursor.fetchall():
+                    etf = ETF(
+                        ticker=row[0],
+                        name=row[1],
+                        expense_ratio=row[2],
+                        aum=row[3],
+                        avg_volume=row[4],
+                        tracking_error=row[5],
+                        volatility=row[6],
+                        max_drawdown=row[7],
+                        sharpe_ratio=row[8],
+                        top_holding_weight=row[9],
+                        dividend_yield=row[10],
+                        category=row[11],
+                        asset_class=row[12],
+                        sector=row[13],
+                        region=row[14],
+                        strategy=row[15]
+                    )
+                    universe.etfs[etf.ticker] = etf
+                
+                # Get categories
+                cursor.execute("SELECT category, etfs FROM categories")
+                for row in cursor.fetchall():
+                    universe.categories[row[0]] = json.loads(row[1])
+                
+                return universe
+        except sqlite3.Error as e:
+            raise DatabaseError(f"Failed to get ETF universe: {str(e)}") 
\ No newline at end of file
diff --git a/ETF_Portal/services/etf_selection_service/exceptions.py b/ETF_Portal/services/etf_selection_service/exceptions.py
new file mode 100644
index 0000000..c31d19e
--- /dev/null
+++ b/ETF_Portal/services/etf_selection_service/exceptions.py
@@ -0,0 +1,31 @@
+"""
+Custom exceptions for ETF Selection Service
+"""
+
+class ETFSelectionError(Exception):
+    """Base exception for ETF selection errors"""
+    pass
+
+class ETFDataError(ETFSelectionError):
+    """Exception raised when ETF data is invalid or missing"""
+    pass
+
+class ETFNotFoundError(ETFSelectionError):
+    """Exception raised when ETF is not found"""
+    pass
+
+class DatabaseError(ETFSelectionError):
+    """Exception raised for database-related errors"""
+    pass
+
+class DataUpdateError(ETFSelectionError):
+    """Exception raised when data update fails"""
+    pass
+
+class ValidationError(ETFSelectionError):
+    """Exception raised when input validation fails"""
+    pass
+
+class PortfolioOptimizationError(ETFSelectionError):
+    """Exception raised when portfolio optimization fails"""
+    pass 
\ No newline at end of file
diff --git a/ETF_Portal/services/etf_selection_service/models.py b/ETF_Portal/services/etf_selection_service/models.py
new file mode 100644
index 0000000..0028bbe
--- /dev/null
+++ b/ETF_Portal/services/etf_selection_service/models.py
@@ -0,0 +1,52 @@
+"""
+Data models and enums for ETF Selection Service
+"""
+
+from dataclasses import dataclass
+from enum import Enum
+from typing import Optional, Dict, List
+
+class RiskTolerance(Enum):
+    CONSERVATIVE = "Conservative"
+    MODERATE = "Moderate"
+    AGGRESSIVE = "Aggressive"
+
+@dataclass
+class InvestmentGoal:
+    capital_target: float
+    income_target: Optional[float] = None
+    risk_tolerance: RiskTolerance = RiskTolerance.MODERATE
+    investment_horizon: int = 5  # years
+
+@dataclass
+class ETF:
+    ticker: str
+    name: str
+    expense_ratio: float
+    aum: float
+    avg_volume: float
+    tracking_error: float
+    volatility: float
+    max_drawdown: float
+    sharpe_ratio: float
+    top_holding_weight: float
+    dividend_yield: float
+    category: str
+    asset_class: str
+    sector: str
+    region: str
+    strategy: str
+
+class ETFCategory(Enum):
+    EQUITY = "Equity"
+    FIXED_INCOME = "Fixed Income"
+    COMMODITY = "Commodity"
+    REAL_ESTATE = "Real Estate"
+    CRYPTO = "Crypto"
+    MULTI_ASSET = "Multi-Asset"
+
+class ETFUniverse:
+    def __init__(self):
+        self.etfs: Dict[str, ETF] = {}
+        self.categories: Dict[str, List[str]] = {}
+        self.last_updated: Optional[str] = None 
\ No newline at end of file
diff --git a/ETF_Portal/services/etf_selection_service/service.py b/ETF_Portal/services/etf_selection_service/service.py
new file mode 100644
index 0000000..30cda60
--- /dev/null
+++ b/ETF_Portal/services/etf_selection_service/service.py
@@ -0,0 +1,418 @@
+"""
+ETF Selection Service for optimizing ETF selection based on investment goals
+"""
+
+import logging
+from typing import Dict, List, Optional, Tuple
+import pandas as pd
+import numpy as np
+from datetime import datetime
+import os
+
+from .models import InvestmentGoal, RiskTolerance, ETF, ETFUniverse, ETFCategory
+from .exceptions import (
+    ETFSelectionError, ETFDataError, ETFNotFoundError,
+    ValidationError, PortfolioOptimizationError
+)
+from .database import ETFDatabase
+from .utils import (
+    calculate_volatility, calculate_max_drawdown,
+    calculate_sharpe_ratio, calculate_sortino_ratio,
+    calculate_portfolio_metrics, is_cache_valid
+)
+
+logger = logging.getLogger(__name__)
+
+class ETFSelectionService:
+    def __init__(self, data_service):
+        """
+        Initialize ETF Selection Service
+        
+        Args:
+            data_service: Service that provides ETF data (must implement get_etf_list() and get_etf_data())
+        """
+        self.data_service = data_service
+        self.db = ETFDatabase()
+        self.cache = {}
+        self.cache_ttl = 3600  # 1 hour cache TTL
+        
+        self.selection_criteria = {
+            'tier1': {
+                'expense_ratio': 0.10,  # 0.10% or less
+                'aum': 5_000_000_000,   # $5B or more
+                'tracking_error': 0.05,  # 0.05% or less
+                'avg_volume': 100_000    # 100K shares/day
+            },
+            'tier2': {
+                'expense_ratio': 0.25,  # 0.25% or less
+                'aum': 1_000_000_000,   # $1B or more
+                'tracking_error': 0.10,  # 0.10% or less
+                'avg_volume': 50_000     # 50K shares/day
+            }
+        }
+    
+    def select_etfs(self, goal: InvestmentGoal) -> List[Dict]:
+        """
+        Select ETFs based on investment goals and risk tolerance
+        Returns a list of recommended ETFs with allocation percentages
+        """
+        try:
+            logger.info(f"Starting ETF selection with goal: {goal}")
+            
+            # Validate investment goal
+            self._validate_investment_goal(goal)
+            
+            # Get ETF data as DataFrame
+            etfs_df = self._get_etfs_dataframe()
+            if etfs_df.empty:
+                raise ETFDataError("No ETFs available for selection")
+            
+            logger.info(f"Found {len(etfs_df)} available ETFs")
+            
+            # Filter ETFs based on criteria
+            filtered_df = self._filter_etfs_vectorized(etfs_df, goal)
+            if filtered_df.empty:
+                raise ETFSelectionError(
+                    "No ETFs passed the filtering criteria. Please try adjusting your risk tolerance or investment goals."
+                )
+            
+            logger.info(f"{len(filtered_df)} ETFs passed filtering criteria")
+            
+            # Score ETFs based on criteria
+            scored_df = self._score_etfs_vectorized(filtered_df, goal)
+            if scored_df.empty:
+                raise ETFSelectionError(
+                    "No ETFs passed the scoring criteria. Please try adjusting your risk tolerance or investment goals."
+                )
+            
+            logger.info(f"{len(scored_df)} ETFs passed scoring criteria")
+            
+            # Optimize portfolio allocation
+            portfolio = self._optimize_portfolio_vectorized(scored_df, goal)
+            if portfolio.empty:
+                raise PortfolioOptimizationError(
+                    "Failed to optimize portfolio allocation. Please try adjusting your investment goals or risk tolerance."
+                )
+            
+            logger.info(f"Successfully generated portfolio with {len(portfolio)} ETFs")
+            return portfolio.to_dict('records')
+            
+        except ETFSelectionError as e:
+            logger.error(str(e))
+            raise
+        except Exception as e:
+            error_msg = f"Unexpected error during ETF selection: {str(e)}"
+            logger.error(error_msg, exc_info=True)
+            raise ETFSelectionError(error_msg)
+    
+    def _validate_investment_goal(self, goal: InvestmentGoal) -> None:
+        """Validate investment goal parameters"""
+        if goal.capital_target <= 0:
+            raise ValidationError("Capital target must be greater than 0")
+        
+        if goal.income_target and goal.income_target <= 0:
+            raise ValidationError("Income target must be greater than 0")
+        
+        if not isinstance(goal.risk_tolerance, RiskTolerance):
+            raise ValidationError("Risk tolerance must be a valid RiskTolerance enum value")
+    
+    def _get_etfs_dataframe(self) -> pd.DataFrame:
+        """Get ETF data as DataFrame with caching"""
+        cache_key = 'etfs_dataframe'
+        
+        # Check cache first
+        if cache_key in self.cache:
+            cache_data, timestamp = self.cache[cache_key]
+            if is_cache_valid(timestamp, self.cache_ttl):
+                return cache_data
+        
+        try:
+            # Get list of ETFs
+            etf_list = self.data_service.get_etf_list()
+            if not etf_list:
+                raise ETFDataError("No ETFs available from data service")
+            
+            # Batch fetch ETF data
+            etf_data = []
+            for ticker in etf_list:
+                try:
+                    data = self.data_service.get_etf_data(ticker)
+                    if data:
+                        etf_data.append({
+                            'ticker': ticker,
+                            'name': data.get('info', {}).get('longName', ticker),
+                            'expense_ratio': data.get('info', {}).get('annualReportExpenseRatio', 0.5) / 100,
+                            'aum': data.get('info', {}).get('totalAssets', 0),
+                            'avg_volume': data.get('info', {}).get('averageVolume', 0),
+                            'tracking_error': 0.0,  # Not available from yfinance
+                            'volatility': float(data.get('volatility', 0)),
+                            'max_drawdown': float(data.get('max_drawdown', 0)),
+                            'sharpe_ratio': float(data.get('sharpe_ratio', 0)),
+                            'top_holding_weight': 0.0,  # Not available from yfinance
+                            'dividend_yield': float(data.get('dividend_yield', 0)) / 100,
+                            'category': data.get('info', {}).get('category', 'Unknown'),
+                            'asset_class': data.get('info', {}).get('assetClass', 'Unknown'),
+                            'sector': data.get('info', {}).get('sector', 'Unknown'),
+                            'region': data.get('info', {}).get('region', 'Unknown'),
+                            'strategy': data.get('info', {}).get('strategy', 'Unknown')
+                        })
+                except Exception as e:
+                    logger.warning(f"Error processing ETF {ticker}: {str(e)}")
+                    continue
+            
+            if not etf_data:
+                raise ETFDataError("No ETFs could be processed")
+            
+            # Convert to DataFrame
+            df = pd.DataFrame(etf_data)
+            
+            # Cache the result
+            self.cache[cache_key] = (df, datetime.now().isoformat())
+            
+            return df
+            
+        except Exception as e:
+            raise ETFDataError(f"Error fetching ETF data: {str(e)}")
+    
+    def _filter_etfs_vectorized(self, df: pd.DataFrame, goal: InvestmentGoal) -> pd.DataFrame:
+        """Filter ETFs using vectorized operations"""
+        # Get criteria based on risk tolerance
+        criteria = self.selection_criteria['tier1' if goal.risk_tolerance == RiskTolerance.CONSERVATIVE else 'tier2']
+        
+        # Create boolean masks for filtering
+        mask = (
+            (df['expense_ratio'] <= criteria['expense_ratio']) &
+            (df['aum'] >= criteria['aum']) &
+            (df['tracking_error'] <= criteria['tracking_error']) &
+            (df['avg_volume'] >= criteria['avg_volume'])
+        )
+        
+        return df[mask]
+    
+    def _score_etfs_vectorized(self, df: pd.DataFrame, goal: InvestmentGoal) -> pd.DataFrame:
+        """Score ETFs using vectorized operations"""
+        # Calculate risk score
+        df['risk_score'] = (
+            df['volatility'] * 0.3 +
+            df['max_drawdown'] * 0.3 +
+            (1 - df['sharpe_ratio']) * 0.4
+        )
+        
+        # Calculate income score if income target is specified
+        if goal.income_target:
+            df['income_score'] = df['dividend_yield'] / goal.income_target
+        
+        # Calculate final score
+        if goal.income_target:
+            df['final_score'] = (
+                (1 - df['risk_score']) * 0.6 +
+                df['income_score'] * 0.4
+            )
+        else:
+            df['final_score'] = 1 - df['risk_score']
+        
+        # Normalize scores
+        df['final_score'] = (df['final_score'] - df['final_score'].min()) / (df['final_score'].max() - df['final_score'].min())
+        
+        # Filter out ETFs with low scores
+        return df[df['final_score'] >= 0.5]
+    
+    def _optimize_portfolio_vectorized(self, df: pd.DataFrame, goal: InvestmentGoal) -> pd.DataFrame:
+        """Optimize portfolio allocation using vectorized operations"""
+        # Sort by final score
+        df = df.sort_values('final_score', ascending=False)
+        
+        # Select top ETFs based on risk tolerance
+        n_etfs = 5 if goal.risk_tolerance == RiskTolerance.CONSERVATIVE else 10
+        df = df.head(n_etfs)
+        
+        # Calculate weights based on scores
+        df['weight'] = df['final_score'] / df['final_score'].sum()
+        
+        # Add allocation amount (already in percentage)
+        df['allocation'] = df['weight'] * 100  # Convert to percentage
+        df['amount'] = df['weight'] * goal.capital_target  # Calculate actual amount
+        
+        # Add metrics dictionary for each ETF
+        df['metrics'] = df.apply(lambda row: {
+            'expense_ratio': row['expense_ratio'],
+            'aum': row['aum'],
+            'volatility': row['volatility'],
+            'max_drawdown': row['max_drawdown'],
+            'sharpe_ratio': row['sharpe_ratio'],
+            'dividend_yield': row['dividend_yield']
+        }, axis=1)
+        
+        return df[['ticker', 'name', 'allocation', 'amount', 'metrics']]
+    
+    def _update_etf_universe(self):
+        """Update ETF universe with latest data"""
+        try:
+            # Get list of ETFs to update
+            etfs_to_update = self._get_etfs_to_update()
+            
+            # Update each ETF
+            for ticker in etfs_to_update:
+                try:
+                    etf = self._fetch_etf_data(ticker)
+                    if etf:
+                        self.db.save_etf(etf)
+                except Exception as e:
+                    logger.warning(f"Failed to update ETF {ticker}: {str(e)}")
+                    continue
+            
+            # Update universe metadata
+            self._update_universe_metadata()
+            
+        except Exception as e:
+            raise DataUpdateError(f"Failed to update ETF universe: {str(e)}")
+    
+    def _get_etfs_to_update(self) -> List[str]:
+        """Get list of ETFs that need updating"""
+        try:
+            # Get current universe
+            universe = self.db.get_universe()
+            
+            # Get list of all ETFs
+            all_etfs = self.data_service.get_etf_list()
+            
+            # Find ETFs that need updating
+            etfs_to_update = []
+            for ticker in all_etfs:
+                if ticker not in universe.etfs:
+                    etfs_to_update.append(ticker)
+                else:
+                    # Check if data is stale
+                    etf = universe.etfs[ticker]
+                    if not is_cache_valid(etf.last_updated, self.cache_ttl):
+                        etfs_to_update.append(ticker)
+            
+            return etfs_to_update
+            
+        except Exception as e:
+            raise DataUpdateError(f"Failed to get ETFs to update: {str(e)}")
+    
+    def _fetch_etf_data(self, ticker: str) -> Optional[ETF]:
+        """Fetch ETF data from data service"""
+        try:
+            data = self.data_service.get_etf_data(ticker)
+            if not data:
+                return None
+            
+            info = data.get('info', {})
+            
+            return ETF(
+                ticker=ticker,
+                name=info.get('longName', ticker),
+                expense_ratio=info.get('annualReportExpenseRatio', 0.5) / 100,
+                aum=info.get('totalAssets', 0),
+                avg_volume=info.get('averageVolume', 0),
+                tracking_error=0.0,  # Not available from yfinance
+                volatility=float(data.get('volatility', 0)),
+                max_drawdown=float(data.get('max_drawdown', 0)),
+                sharpe_ratio=float(data.get('sharpe_ratio', 0)),
+                top_holding_weight=0.0,  # Not available from yfinance
+                dividend_yield=float(data.get('dividend_yield', 0)) / 100,
+                category=info.get('category', 'Unknown'),
+                asset_class=info.get('assetClass', 'Unknown'),
+                sector=info.get('sector', 'Unknown'),
+                region=info.get('region', 'Unknown'),
+                strategy=info.get('strategy', 'Unknown')
+            )
+            
+        except Exception as e:
+            logger.warning(f"Failed to fetch ETF data for {ticker}: {str(e)}")
+            return None
+    
+    def _determine_category(self, info: Dict) -> ETFCategory:
+        """Determine ETF category from info"""
+        category = info.get('category', '').lower()
+        
+        if 'equity' in category:
+            return ETFCategory.EQUITY
+        elif 'fixed income' in category or 'bond' in category:
+            return ETFCategory.FIXED_INCOME
+        elif 'commodity' in category:
+            return ETFCategory.COMMODITY
+        elif 'real estate' in category:
+            return ETFCategory.REAL_ESTATE
+        elif 'crypto' in category:
+            return ETFCategory.CRYPTO
+        else:
+            return ETFCategory.MULTI_ASSET
+    
+    def _determine_risk_level(self, volatility: float) -> str:
+        """Determine risk level based on volatility"""
+        if volatility < 0.1:
+            return 'Low'
+        elif volatility < 0.2:
+            return 'Medium'
+        else:
+            return 'High'
+    
+    def _update_universe_metadata(self):
+        """Update ETF universe metadata"""
+        try:
+            universe = self.db.get_universe()
+            
+            # Group ETFs by category
+            categories = {}
+            for etf in universe.etfs.values():
+                if etf.category not in categories:
+                    categories[etf.category] = []
+                categories[etf.category].append(etf.ticker)
+            
+            # Update categories
+            universe.categories = categories
+            universe.last_updated = datetime.now().isoformat()
+            
+            # Save to database
+            self.db.save_universe(universe)
+            
+        except Exception as e:
+            raise DataUpdateError(f"Failed to update universe metadata: {str(e)}")
+    
+    def get_etf_universe(self) -> ETFUniverse:
+        """Get current ETF universe"""
+        try:
+            return self.db.get_universe()
+        except Exception as e:
+            raise DatabaseError(f"Failed to get ETF universe: {str(e)}")
+    
+    def search_etfs(self,
+                   category: Optional[ETFCategory] = None,
+                   risk_level: Optional[str] = None,
+                   min_dividend_yield: Optional[float] = None,
+                   max_expense_ratio: Optional[float] = None) -> List[ETF]:
+        """Search ETFs based on criteria"""
+        try:
+            universe = self.get_etf_universe()
+            
+            # Filter ETFs
+            filtered_etfs = []
+            for etf in universe.etfs.values():
+                if category and etf.category != category.value:
+                    continue
+                if risk_level and self._determine_risk_level(etf.volatility) != risk_level:
+                    continue
+                if min_dividend_yield and etf.dividend_yield < min_dividend_yield:
+                    continue
+                if max_expense_ratio and etf.expense_ratio > max_expense_ratio:
+                    continue
+                filtered_etfs.append(etf)
+            
+            return filtered_etfs
+            
+        except Exception as e:
+            raise ETFSelectionError(f"Failed to search ETFs: {str(e)}")
+    
+    def get_etf_details(self, ticker: str) -> Optional[ETF]:
+        """Get detailed ETF information"""
+        try:
+            return self.db.get_etf(ticker)
+        except Exception as e:
+            raise ETFNotFoundError(f"Failed to get ETF details for {ticker}: {str(e)}")
+    
+    def clear_cache(self):
+        """Clear service cache"""
+        self.cache.clear() 
\ No newline at end of file
diff --git a/ETF_Portal/services/etf_selection_service/utils.py b/ETF_Portal/services/etf_selection_service/utils.py
new file mode 100644
index 0000000..dc143dd
--- /dev/null
+++ b/ETF_Portal/services/etf_selection_service/utils.py
@@ -0,0 +1,49 @@
+"""
+Utility functions for ETF selection service
+"""
+
+import numpy as np
+import pandas as pd
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, Tuple
+
+def calculate_volatility(returns: pd.Series) -> float:
+    """Calculate annualized volatility from returns series."""
+    return returns.std() * np.sqrt(252)
+
+def calculate_max_drawdown(prices: pd.Series) -> float:
+    """Calculate maximum drawdown from price series."""
+    return (prices / prices.cummax() - 1).min()
+
+def calculate_sharpe_ratio(returns: pd.Series, risk_free_rate: float = 0.02) -> float:
+    """Calculate Sharpe ratio from returns series."""
+    excess_returns = returns - risk_free_rate/252
+    if len(excess_returns) < 2:
+        return 0.0
+    return np.sqrt(252) * excess_returns.mean() / excess_returns.std()
+
+def calculate_sortino_ratio(returns: pd.Series, risk_free_rate: float = 0.02) -> float:
+    """Calculate Sortino ratio from returns series."""
+    excess_returns = returns - risk_free_rate/252
+    downside_returns = excess_returns[excess_returns < 0]
+    if len(downside_returns) < 2:
+        return 0.0
+    return np.sqrt(252) * excess_returns.mean() / downside_returns.std()
+
+def calculate_portfolio_metrics(weights: np.ndarray, returns: pd.DataFrame) -> Dict[str, float]:
+    """Calculate portfolio metrics from weights and returns."""
+    portfolio_returns = returns.dot(weights)
+    return {
+        'volatility': calculate_volatility(portfolio_returns),
+        'max_drawdown': calculate_max_drawdown(portfolio_returns.cumsum()),
+        'sharpe_ratio': calculate_sharpe_ratio(portfolio_returns),
+        'sortino_ratio': calculate_sortino_ratio(portfolio_returns)
+    }
+
+def is_cache_valid(timestamp: str, ttl_seconds: int) -> bool:
+    """Check if cached data is still valid based on TTL."""
+    try:
+        cache_time = datetime.fromisoformat(timestamp)
+        return datetime.now() - cache_time < timedelta(seconds=ttl_seconds)
+    except (ValueError, TypeError):
+        return False 
\ No newline at end of file
diff --git a/etf_data.db b/etf_data.db
new file mode 100644
index 0000000000000000000000000000000000000000..c24108bc7a9c73e5816a5591fec1945e5dbdae72
GIT binary patch
literal 20480
zcmeI%O>fgM7zc1C>o&9(ZI`N7_XJ1?Y1)ws1x3?DL8J7*PL_$i)M^?R$9+4(fy>VO
zLOXKd>u|&lOl1<5$F=%LO6o*Up8EOOjuhp?`@XW0UTHJomY$Mhk|g9MrG${w?b~j@
z+x&U!?!^5jf3-u+lswOq!=s<1wSP+vz90SAzdg7K1>-sdAOHafKmY;|fB*y_5EJ-v
zv9;Ujc9XAT%RiMe=T;WlC|R#tt=EIBH_Yg;_o|=K^>zAWj|R3sK`T1UJ`U-{;QVcG
za7o`}m(PN?NPAT`;o*>y*Otwu!u3%!q5n+w^t83pIqN1wRYET7&!u}qVSHwFbI#WN
zXD93S_O~?I?X=s;b<sHZ-uOMeS9{aPaMf1jV`=`Q=88{bq-MFC%Bq&kaH}+R1JMur
z@Yy65F)!F$moxXb=kJEuSvCk~vxetmRTWGcqfI2SOX@o4s8^A|312c{_(JGK6$!0J
z+)N)<xz$rP(xvd-T}V}oY$Uf(b0uUY*h<M#M1t?l<}etq-IQUuo29XdT+7@#QolID
z>5MFtt|CFT8zr}5e=oFft_QB1$LUVzMLV=|{Oo5Z{<!6yDv$h=<<K9F{Q(;UAOHaf
zKmY;|fB*y_009U<00NsTu;&ZI`~Rj6FY1B-1Rwwb2tWV=5P$##AOHafGy?ekkBI;Q
w2tWV=5P$##AOHafKmY;|*n9ze|KI#EMuiZ700bZa0SG_<0uX=z1Rwx`Ul*<f;Q#;t

literal 0
HcmV?d00001

diff --git a/pages/ETF_Portfolio_Builder.py b/pages/ETF_Portfolio_Builder.py
index 6f3b5f0..1e1dc49 100644
--- a/pages/ETF_Portfolio_Builder.py
+++ b/pages/ETF_Portfolio_Builder.py
@@ -21,6 +21,9 @@ import traceback
 from dotenv import load_dotenv
 import re
 from ETF_Portal.services.drip_service import DRIPService, DripConfig
+from ETF_Portal.services.etf_selection_service import ETFSelectionService, InvestmentGoal, RiskTolerance
+from ETF_Portal.services.nav_erosion_service import NavErosionService
+from ETF_Portal.services.data_service import DataService
 
 # Load environment variables
 load_dotenv(override=True)  # Force reload of environment variables
@@ -1373,7 +1376,7 @@ def allocate_for_income(df: pd.DataFrame, target: float, etf_allocations: List[D
         for alloc in etf_allocations:
             mask = final_alloc["Ticker"] == alloc["ticker"]
             if mask.any():
-                final_alloc.loc[mask, "Allocation (%)"] = alloc["allocation"]
+                final_alloc.loc[mask, "Allocation (%)"] = alloc["allocation"]  # Already in percentage
             else:
                 logger.warning(f"Ticker {alloc['ticker']} not found in DataFrame")
         
@@ -1392,7 +1395,7 @@ def allocate_for_income(df: pd.DataFrame, target: float, etf_allocations: List[D
             logger.error("Weighted yield is zero")
             return None
             
-        # Calculate required capital based on weighted yield (convert percentage to decimal)
+        # Calculate required capital based on weighted yield
         required_capital = annual_income / (weighted_yield / 100)
         
         # Calculate capital allocation and income
@@ -1439,7 +1442,7 @@ def allocate_for_capital(df: pd.DataFrame, initial_capital: float, etf_allocatio
         for alloc in etf_allocations:
             mask = final_alloc["Ticker"] == alloc["ticker"]
             if mask.any():
-                final_alloc.loc[mask, "Allocation (%)"] = alloc["allocation"]
+                final_alloc.loc[mask, "Allocation (%)"] = alloc["allocation"]  # Already in percentage
             else:
                 logger.warning(f"Ticker {alloc['ticker']} not found in DataFrame")
         
@@ -2850,7 +2853,8 @@ if st.session_state.simulation_run and st.session_state.df_data is not None:
                 
                 # Initialize services
                 from ETF_Portal.services.data_service import DataService
-                from ETF_Portal.services.etf_selection_service import ETFSelectionService, InvestmentGoal, RiskTolerance
+                from ETF_Portal.services.etf_selection_service import ETFSelectionService
+                from ETF_Portal.services.etf_selection_service import InvestmentGoal, RiskTolerance
                 
                 data_service = DataService()
                 selection_service = ETFSelectionService(data_service)
@@ -2916,7 +2920,7 @@ if st.session_state.simulation_run and st.session_state.df_data is not None:
                             st.write("A balanced approach focusing on both growth and income, suitable for most investors.")
                             if balanced_portfolio:
                                 portfolio_df = pd.DataFrame(balanced_portfolio)
-                                portfolio_df['Allocation (%)'] = portfolio_df['allocation'] * 100
+                                portfolio_df['Allocation (%)'] = portfolio_df['allocation'].round().astype(int)
                                 portfolio_df['Amount ($)'] = portfolio_df['amount']
                                 
                                 st.dataframe(
@@ -2947,7 +2951,7 @@ if st.session_state.simulation_run and st.session_state.df_data is not None:
                             st.write("Optimized for higher dividend income with lower risk, suitable for income-focused investors.")
                             if income_portfolio:
                                 portfolio_df = pd.DataFrame(income_portfolio)
-                                portfolio_df['Allocation (%)'] = portfolio_df['allocation'] * 100
+                                portfolio_df['Allocation (%)'] = portfolio_df['allocation'].round().astype(int)
                                 portfolio_df['Amount ($)'] = portfolio_df['amount']
                                 
                                 st.dataframe(
@@ -2978,7 +2982,7 @@ if st.session_state.simulation_run and st.session_state.df_data is not None:
                             st.write("Optimized for capital appreciation with higher risk tolerance, suitable for growth investors.")
                             if growth_portfolio:
                                 portfolio_df = pd.DataFrame(growth_portfolio)
-                                portfolio_df['Allocation (%)'] = portfolio_df['allocation'] * 100
+                                portfolio_df['Allocation (%)'] = portfolio_df['allocation'].round().astype(int)
                                 portfolio_df['Amount ($)'] = portfolio_df['amount']
                                 
                                 st.dataframe(
@@ -3009,7 +3013,7 @@ if st.session_state.simulation_run and st.session_state.df_data is not None:
                             st.write(f"Optimized for your specific risk tolerance ({risk_tolerance}), balancing growth and income.")
                             if risk_adjusted_portfolio:
                                 portfolio_df = pd.DataFrame(risk_adjusted_portfolio)
-                                portfolio_df['Allocation (%)'] = portfolio_df['allocation'] * 100
+                                portfolio_df['Allocation (%)'] = portfolio_df['allocation'].round().astype(int)
                                 portfolio_df['Amount ($)'] = portfolio_df['amount']
                                 
                                 st.dataframe(