fix: correct yield calculation in ETF metrics to use TTM dividends

2025-05-25 15:14:47 +00:00 · 2025-05-25 15:14:47 +00:00 · fd623ac6b9
commit fd623ac6b9
parent 2687b63d3f
1 changed files with 882 additions and 296 deletions
--- a/pages/ETF_Portfolio_Builder.py
+++ b/pages/ETF_Portfolio_Builder.py
@ -31,6 +31,449 @@ logger = logging.getLogger(__name__)
 FMP_API_KEY = st.session_state.get('fmp_api_key', os.getenv('FMP_API_KEY', ''))
 FMP_BASE_URL = "https://financialmodelingprep.com/api/v3"

+# High-yield ETFs reference data
+HIGH_YIELD_ETFS = {
+    "MSTY": {"expected_yield": 125.0, "frequency": "Monthly"},
+    "SMCY": {"expected_yield": 100.0, "frequency": "Monthly"},
+    "TSLY": {"expected_yield": 85.0, "frequency": "Monthly"},
+    "NVDY": {"expected_yield": 75.0, "frequency": "Monthly"},
+    "ULTY": {"expected_yield": 70.0, "frequency": "Monthly"},
+    "JEPQ": {"expected_yield": 9.5, "frequency": "Monthly"},
+    "JEPI": {"expected_yield": 7.8, "frequency": "Monthly"},
+    "XYLD": {"expected_yield": 12.0, "frequency": "Monthly"},
+    "QYLD": {"expected_yield": 12.0, "frequency": "Monthly"},
+    "RYLD": {"expected_yield": 12.0, "frequency": "Monthly"}
+}
+
+def calculate_etf_metrics(ticker: str, price_data: pd.DataFrame, dividend_data: pd.DataFrame) -> Dict[str, Any]:
+    """
+    Calculate ETF metrics based on available data.
+    
+    Args:
+        ticker: ETF ticker
+        price_data: DataFrame with price history
+        dividend_data: DataFrame with dividend history
+        
+    Returns:
+        Dictionary with calculated metrics
+    """
+    metrics = {
+        "Ticker": ticker,
+        "Yield (%)": 0.0,
+        "Price": 0.0,
+        "volatility": 0.0,
+        "sharpe_ratio": 0.0,
+        "sortino_ratio": 0.0,
+        "correlation": 0.0,
+        "payout_ratio": 0.0,
+        "score": 0.0,
+        "Risk Level": "Unknown",
+        "missing_metrics": []
+    }
+    
+    try:
+        # Get current price from price data
+        if not price_data.empty:
+            metrics["Price"] = price_data["close"].iloc[-1]
+        else:
+            metrics["missing_metrics"].append("Price")
+        
+        # Calculate yield if dividend data is available
+        if not dividend_data.empty and metrics["Price"] > 0:
+            # Convert date column to datetime if it's not already
+            dividend_data["date"] = pd.to_datetime(dividend_data["date"])
+            
+            # Get dividends from the last 12 months
+            one_year_ago = pd.Timestamp.now() - pd.Timedelta(days=365)
+            recent_dividends = dividend_data[dividend_data["date"] >= one_year_ago]
+            
+            if not recent_dividends.empty:
+                # Calculate TTM dividend
+                ttm_dividend = recent_dividends["dividend"].sum()
+                
+                # Calculate annual yield
+                metrics["Yield (%)"] = (ttm_dividend / metrics["Price"]) * 100
+                
+                logger.info(f"Calculated yield for {ticker}: {metrics['Yield (%)']:.2f}% (TTM dividend: ${ttm_dividend:.2f}, Price: ${metrics['Price']:.2f})")
+            else:
+                logger.warning(f"No recent dividends found for {ticker}")
+                metrics["missing_metrics"].append("Yield (%)")
+        else:
+            metrics["missing_metrics"].append("Yield (%)")
+        
+        # Calculate volatility if price data is available
+        if len(price_data) > 1:
+            returns = price_data["close"].pct_change().dropna()
+            metrics["volatility"] = returns.std() * np.sqrt(252) * 100  # Annualized volatility
+        else:
+            metrics["missing_metrics"].append("volatility")
+        
+        # Calculate Sharpe ratio if we have returns and risk-free rate
+        if len(price_data) > 1:
+            risk_free_rate = 0.05  # Assuming 5% risk-free rate
+            excess_returns = returns - (risk_free_rate / 252)
+            if excess_returns.std() != 0:
+                metrics["sharpe_ratio"] = (excess_returns.mean() / excess_returns.std()) * np.sqrt(252)
+        else:
+            metrics["missing_metrics"].append("sharpe_ratio")
+        
+        # Calculate Sortino ratio if we have returns
+        if len(price_data) > 1:
+            downside_returns = returns[returns < 0]
+            if len(downside_returns) > 0 and downside_returns.std() != 0:
+                metrics["sortino_ratio"] = (returns.mean() / downside_returns.std()) * np.sqrt(252)
+        else:
+            metrics["missing_metrics"].append("sortino_ratio")
+        
+        # Categorize risk based on available metrics
+        metrics["Risk Level"] = categorize_etf_risk(metrics)
+        
+        # Calculate overall score
+        metrics["score"] = calculate_etf_score(metrics)
+        
+        logger.info(f"Calculated metrics for {ticker}: {metrics}")
+        return metrics
+        
+    except Exception as e:
+        logger.error(f"Error calculating metrics for {ticker}: {str(e)}")
+        logger.error(traceback.format_exc())
+        return metrics
+
+def categorize_etf_risk(metrics: Dict[str, Any]) -> str:
+    """
+    Categorize ETF risk based on available metrics.
+    
+    Args:
+        metrics: Dictionary with ETF metrics
+        
+    Returns:
+        Risk category: "Low", "Medium", or "High"
+    """
+    try:
+        # Initialize risk score
+        risk_score = 0
+        available_metrics = 0
+        
+        # Yield-based risk (higher yield = higher risk)
+        if "Yield (%)" not in metrics["missing_metrics"]:
+            if metrics["Yield (%)"] > 10:
+                risk_score += 3
+            elif metrics["Yield (%)"] > 6:
+                risk_score += 2
+            else:
+                risk_score += 1
+            available_metrics += 1
+        
+        # Volatility-based risk
+        if "volatility" not in metrics["missing_metrics"]:
+            if metrics["volatility"] > 20:
+                risk_score += 3
+            elif metrics["volatility"] > 15:
+                risk_score += 2
+            else:
+                risk_score += 1
+            available_metrics += 1
+        
+        # Sharpe ratio-based risk (lower Sharpe = higher risk)
+        if "sharpe_ratio" not in metrics["missing_metrics"]:
+            if metrics["sharpe_ratio"] < 0.5:
+                risk_score += 3
+            elif metrics["sharpe_ratio"] < 1.0:
+                risk_score += 2
+            else:
+                risk_score += 1
+            available_metrics += 1
+        
+        # Sortino ratio-based risk (lower Sortino = higher risk)
+        if "sortino_ratio" not in metrics["missing_metrics"]:
+            if metrics["sortino_ratio"] < 0.5:
+                risk_score += 3
+            elif metrics["sortino_ratio"] < 1.0:
+                risk_score += 2
+            else:
+                risk_score += 1
+            available_metrics += 1
+        
+        # Calculate average risk score
+        if available_metrics > 0:
+            avg_risk_score = risk_score / available_metrics
+            if avg_risk_score > 2.5:
+                return "High"
+            elif avg_risk_score > 1.5:
+                return "Medium"
+            else:
+                return "Low"
+        
+        # If no metrics available, use yield as fallback
+        if metrics["Yield (%)"] > 10:
+            return "High"
+        elif metrics["Yield (%)"] > 6:
+            return "Medium"
+        else:
+            return "Low"
+            
+    except Exception as e:
+        logger.error(f"Error categorizing ETF risk: {str(e)}")
+        return "Unknown"
+
+def calculate_etf_score(metrics: Dict[str, Any]) -> float:
+    """
+    Calculate overall ETF score based on available metrics.
+    
+    Args:
+        metrics: Dictionary with ETF metrics
+        
+    Returns:
+        Overall score (0-100)
+    """
+    try:
+        score = 0
+        available_metrics = 0
+        
+        # Yield score (0-25 points)
+        if "Yield (%)" not in metrics["missing_metrics"]:
+            if metrics["Yield (%)"] > 10:
+                score += 25
+            elif metrics["Yield (%)"] > 6:
+                score += 20
+            elif metrics["Yield (%)"] > 3:
+                score += 15
+            else:
+                score += 10
+            available_metrics += 1
+        
+        # Volatility score (0-25 points)
+        if "volatility" not in metrics["missing_metrics"]:
+            if metrics["volatility"] < 10:
+                score += 25
+            elif metrics["volatility"] < 15:
+                score += 20
+            elif metrics["volatility"] < 20:
+                score += 15
+            else:
+                score += 10
+            available_metrics += 1
+        
+        # Sharpe ratio score (0-25 points)
+        if "sharpe_ratio" not in metrics["missing_metrics"]:
+            if metrics["sharpe_ratio"] > 1.5:
+                score += 25
+            elif metrics["sharpe_ratio"] > 1.0:
+                score += 20
+            elif metrics["sharpe_ratio"] > 0.5:
+                score += 15
+            else:
+                score += 10
+            available_metrics += 1
+        
+        # Sortino ratio score (0-25 points)
+        if "sortino_ratio" not in metrics["missing_metrics"]:
+            if metrics["sortino_ratio"] > 1.5:
+                score += 25
+            elif metrics["sortino_ratio"] > 1.0:
+                score += 20
+            elif metrics["sortino_ratio"] > 0.5:
+                score += 15
+            else:
+                score += 10
+            available_metrics += 1
+        
+        # Calculate final score
+        if available_metrics > 0:
+            return score / available_metrics
+        return 0
+        
+    except Exception as e:
+        logger.error(f"Error calculating ETF score: {str(e)}")
+        return 0
+
+def calculate_correlation_matrix(price_data_dict: Dict[str, pd.DataFrame]) -> pd.DataFrame:
+    """
+    Calculate correlation matrix between ETFs.
+    
+    Args:
+        price_data_dict: Dictionary of price DataFrames for each ETF
+        
+    Returns:
+        DataFrame with correlation matrix
+    """
+    try:
+        # Create a DataFrame with returns for all ETFs
+        returns_df = pd.DataFrame()
+        
+        for ticker, price_data in price_data_dict.items():
+            if len(price_data) > 1:
+                returns = price_data["close"].pct_change().dropna()
+                returns_df[ticker] = returns
+        
+        if returns_df.empty:
+            logger.warning("No valid price data for correlation calculation")
+            return pd.DataFrame()
+        
+        # Calculate correlation matrix
+        corr_matrix = returns_df.corr()
+        logger.info(f"Correlation matrix calculated:\n{corr_matrix}")
+        return corr_matrix
+        
+    except Exception as e:
+        logger.error(f"Error calculating correlation matrix: {str(e)}")
+        logger.error(traceback.format_exc())
+        return pd.DataFrame()
+
+def optimize_portfolio_allocation(
+    etf_metrics: List[Dict[str, Any]],
+    risk_tolerance: str,
+    correlation_matrix: pd.DataFrame
+) -> Dict[str, float]:
+    """
+    Optimize portfolio allocation based on risk tolerance and ETF metrics.
+    
+    Args:
+        etf_metrics: List of ETF metrics dictionaries
+        risk_tolerance: Risk tolerance level ("Conservative", "Moderate", "Aggressive")
+        correlation_matrix: Correlation matrix between ETFs
+        
+    Returns:
+        Dictionary with ETF tickers and their allocations
+    """
+    try:
+        # Group ETFs by risk category
+        low_risk = [etf for etf in etf_metrics if etf["Risk Level"] == "Low"]
+        medium_risk = [etf for etf in etf_metrics if etf["Risk Level"] == "Medium"]
+        high_risk = [etf for etf in etf_metrics if etf["Risk Level"] == "High"]
+        
+        # Sort ETFs by score within each risk category
+        low_risk.sort(key=lambda x: x["score"], reverse=True)
+        medium_risk.sort(key=lambda x: x["score"], reverse=True)
+        high_risk.sort(key=lambda x: x["score"], reverse=True)
+        
+        # Initialize allocations
+        allocations = {}
+        
+        if risk_tolerance == "Conservative":
+            # Conservative allocation
+            if low_risk:
+                # Allocate 50% to low-risk ETFs
+                low_risk_alloc = 50.0 / len(low_risk)
+                for etf in low_risk:
+                    allocations[etf["Ticker"]] = low_risk_alloc
+            
+            if medium_risk:
+                # Allocate 30% to medium-risk ETFs
+                medium_risk_alloc = 30.0 / len(medium_risk)
+                for etf in medium_risk:
+                    allocations[etf["Ticker"]] = medium_risk_alloc
+            
+            if high_risk:
+                # Allocate 20% to high-risk ETFs
+                high_risk_alloc = 20.0 / len(high_risk)
+                for etf in high_risk:
+                    allocations[etf["Ticker"]] = high_risk_alloc
+                    
+        elif risk_tolerance == "Moderate":
+            # Moderate allocation
+            if low_risk:
+                # Allocate 30% to low-risk ETFs
+                low_risk_alloc = 30.0 / len(low_risk)
+                for etf in low_risk:
+                    allocations[etf["Ticker"]] = low_risk_alloc
+            
+            if medium_risk:
+                # Allocate 40% to medium-risk ETFs
+                medium_risk_alloc = 40.0 / len(medium_risk)
+                for etf in medium_risk:
+                    allocations[etf["Ticker"]] = medium_risk_alloc
+            
+            if high_risk:
+                # Allocate 30% to high-risk ETFs
+                high_risk_alloc = 30.0 / len(high_risk)
+                for etf in high_risk:
+                    allocations[etf["Ticker"]] = high_risk_alloc
+                    
+        else:  # Aggressive
+            # Aggressive allocation
+            if low_risk:
+                # Allocate 20% to low-risk ETFs
+                low_risk_alloc = 20.0 / len(low_risk)
+                for etf in low_risk:
+                    allocations[etf["Ticker"]] = low_risk_alloc
+            
+            if medium_risk:
+                # Allocate 40% to medium-risk ETFs
+                medium_risk_alloc = 40.0 / len(medium_risk)
+                for etf in medium_risk:
+                    allocations[etf["Ticker"]] = medium_risk_alloc
+            
+            if high_risk:
+                # Allocate 40% to high-risk ETFs
+                high_risk_alloc = 40.0 / len(high_risk)
+                for etf in high_risk:
+                    allocations[etf["Ticker"]] = high_risk_alloc
+        
+        # Adjust allocations based on correlation
+        if not correlation_matrix.empty:
+            allocations = adjust_allocations_for_correlation(allocations, correlation_matrix)
+        
+        # Normalize allocations to ensure they sum to 100%
+        total_alloc = sum(allocations.values())
+        if total_alloc > 0:
+            allocations = {k: (v / total_alloc) * 100 for k, v in allocations.items()}
+        
+        logger.info(f"Optimized allocations for {risk_tolerance} risk tolerance: {allocations}")
+        return allocations
+        
+    except Exception as e:
+        logger.error(f"Error optimizing portfolio allocation: {str(e)}")
+        logger.error(traceback.format_exc())
+        return {}
+
+def adjust_allocations_for_correlation(
+    allocations: Dict[str, float],
+    correlation_matrix: pd.DataFrame
+) -> Dict[str, float]:
+    """
+    Adjust allocations to reduce correlation between ETFs.
+    
+    Args:
+        allocations: Dictionary with current allocations
+        correlation_matrix: Correlation matrix between ETFs
+        
+    Returns:
+        Dictionary with adjusted allocations
+    """
+    try:
+        adjusted_allocations = allocations.copy()
+        
+        # Get highly correlated pairs (correlation > 0.7)
+        high_corr_pairs = []
+        for i in range(len(correlation_matrix.columns)):
+            for j in range(i + 1, len(correlation_matrix.columns)):
+                ticker1 = correlation_matrix.columns[i]
+                ticker2 = correlation_matrix.columns[j]
+                if abs(correlation_matrix.iloc[i, j]) > 0.7:
+                    high_corr_pairs.append((ticker1, ticker2))
+        
+        # Adjust allocations for highly correlated pairs
+        for ticker1, ticker2 in high_corr_pairs:
+            if ticker1 in adjusted_allocations and ticker2 in adjusted_allocations:
+                # Reduce allocation to the ETF with lower score
+                if adjusted_allocations[ticker1] > adjusted_allocations[ticker2]:
+                    reduction = adjusted_allocations[ticker1] * 0.1  # Reduce by 10%
+                    adjusted_allocations[ticker1] -= reduction
+                    adjusted_allocations[ticker2] += reduction
+                else:
+                    reduction = adjusted_allocations[ticker2] * 0.1  # Reduce by 10%
+                    adjusted_allocations[ticker2] -= reduction
+                    adjusted_allocations[ticker1] += reduction
+        
+        logger.info(f"Adjusted allocations for correlation: {adjusted_allocations}")
+        return adjusted_allocations
+        
+    except Exception as e:
+        logger.error(f"Error adjusting allocations for correlation: {str(e)}")
+        logger.error(traceback.format_exc())
+        return allocations
+
 def get_fmp_session():
    """Create a session with retry logic for FMP API calls."""
    session = requests.Session()
@ -62,9 +505,11 @@ def fetch_etf_data_fmp(ticker: str) -> Optional[Dict[str, Any]]:
        
        if profile_response.status_code != 200:
            logger.error(f"FMP API error for {ticker}: {profile_response.status_code}")
+            logger.error(f"Response content: {profile_response.text}")
            return None
        
        profile_data = profile_response.json()
+        logger.info(f"FMP profile response for {ticker}: {profile_data}")
        
        if not profile_data or not isinstance(profile_data, list) or len(profile_data) == 0:
            logger.warning(f"No profile data found for {ticker} in FMP")
@ -83,9 +528,11 @@ def fetch_etf_data_fmp(ticker: str) -> Optional[Dict[str, Any]]:
        
        if dividend_response.status_code != 200:
            logger.error(f"FMP API error for dividend data: {dividend_response.status_code}")
+            logger.error(f"Response content: {dividend_response.text}")
            return None
        
        dividend_data = dividend_response.json()
+        logger.info(f"FMP dividend response for {ticker}: {dividend_data}")
        
        if not dividend_data or "historical" not in dividend_data or not dividend_data["historical"]:
            logger.warning(f"No dividend history found for {ticker}")
@ -109,20 +556,45 @@ def fetch_etf_data_fmp(ticker: str) -> Optional[Dict[str, Any]]:
        
        # Calculate yield
        yield_pct = (ttm_dividend / current_price) * 100
-        
        logger.info(f"Calculated yield for {ticker}: {yield_pct:.2f}% (TTM dividend: ${ttm_dividend:.2f}, Price: ${current_price:.2f})")
        
+        # For high-yield ETFs, verify the yield is reasonable
+        if ticker in HIGH_YIELD_ETFS:
+            expected_yield = HIGH_YIELD_ETFS[ticker]["expected_yield"]
+            if yield_pct < expected_yield * 0.5:  # If yield is less than 50% of expected
+                logger.error(f"Calculated yield {yield_pct:.2f}% for {ticker} is much lower than expected {expected_yield}%")
+                logger.error(f"TTM dividend: ${ttm_dividend:.2f}")
+                logger.error(f"Current price: ${current_price:.2f}")
+                logger.error(f"Recent dividends:\n{recent_dividends}")
+        
+        # Determine distribution period
+        if len(recent_dividends) >= 2:
+            intervals = recent_dividends["date"].diff().dt.days.dropna()
+            avg_interval = intervals.mean()
+            if avg_interval <= 45:
+                dist_period = "Monthly"
+            elif avg_interval <= 100:
+                dist_period = "Quarterly"
+            elif avg_interval <= 200:
+                dist_period = "Semi-Annually"
+            else:
+                dist_period = "Annually"
+        else:
+            dist_period = "Unknown"
+        
        etf_data = {
            "Ticker": ticker,
            "Price": current_price,
            "Yield (%)": yield_pct,
-            "Risk Level": "High"  # Default for high-yield ETFs
+            "Distribution Period": dist_period,
+            "Risk Level": "High" if ticker in HIGH_YIELD_ETFS else "Moderate"
        }
        logger.info(f"FMP data for {ticker}: {etf_data}")
        return etf_data
        
    except Exception as e:
        logger.error(f"Error fetching FMP data for {ticker}: {str(e)}")
+        logger.error(traceback.format_exc())
        return None

 def fetch_etf_data_yfinance(ticker: str) -> Optional[Dict[str, Any]]:
@ -178,6 +650,7 @@ def fetch_etf_data_yfinance(ticker: str) -> Optional[Dict[str, Any]]:
 def fetch_etf_data(tickers: List[str]) -> pd.DataFrame:
    """
    Fetch ETF data using FMP API with yfinance fallback.
+    Uses HIGH_YIELD_ETFS data only as a last resort.
    
    Args:
        tickers: List of ETF tickers
@ -201,13 +674,22 @@ def fetch_etf_data(tickers: List[str]) -> pd.DataFrame:
                logger.info(f"Falling back to yfinance for {ticker}")
                etf_data = fetch_etf_data_yfinance(ticker)
            
+            # Only use HIGH_YIELD_ETFS data if both FMP and yfinance failed
+            if etf_data is None and ticker in HIGH_YIELD_ETFS:
+                logger.info(f"Using fallback data from HIGH_YIELD_ETFS for {ticker}")
+                etf_data = {
+                    "Ticker": ticker,
+                    "Price": 25.0,  # Default price for fallback
+                    "Yield (%)": HIGH_YIELD_ETFS[ticker]["expected_yield"],
+                    "Distribution Period": HIGH_YIELD_ETFS[ticker]["frequency"],
+                    "Risk Level": "High"
+                }
+            
            if etf_data is not None:
-                # Validate and cap yield at a reasonable maximum (e.g., 30%)
-                etf_data["Yield (%)"] = min(etf_data["Yield (%)"], 30.0)
                data[ticker] = etf_data
                logger.info(f"Final data for {ticker}: {etf_data}")
            else:
-                logger.error(f"Failed to fetch data for {ticker} from both sources")
+                logger.error(f"Failed to fetch data for {ticker} from all sources")
        
        if not data:
            st.error("No ETF data could be fetched")
@ -245,7 +727,7 @@ def run_portfolio_simulation(
    enable_erosion: bool
 ) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
-    Run the portfolio simulation.
+    Run the portfolio simulation using the new optimization system.
    
    Args:
        mode: Simulation mode ("income_target" or "capital_target")
@ -259,30 +741,92 @@ def run_portfolio_simulation(
        Tuple of (ETF data DataFrame, Final allocation DataFrame)
    """
    try:
-        # Fetch real ETF data
-        tickers = [input["ticker"] for input in etf_inputs]
-        etf_data = fetch_etf_data(tickers)
+        logger.info(f"Starting portfolio simulation with mode: {mode}, target: {target}")
+        logger.info(f"ETF inputs: {etf_inputs}")
        
-        if etf_data is None or etf_data.empty:
+        # Fetch real ETF data
+        tickers = [input["ticker"] for input in etf_inputs if input["ticker"]]  # Filter out empty tickers
+        logger.info(f"Processing tickers: {tickers}")
+        
+        if not tickers:
+            st.error("No valid tickers provided")
+            return pd.DataFrame(), pd.DataFrame()
+        
+        # Fetch price and dividend data for all ETFs
+        price_data_dict = {}
+        dividend_data_dict = {}
+        etf_metrics_list = []
+        
+        for ticker in tickers:
+            try:
+                # Fetch price history
+                price_url = f"{FMP_BASE_URL}/historical-price-full/{ticker}?apikey={FMP_API_KEY}"
+                price_response = get_fmp_session().get(price_url)
+                if price_response.status_code == 200:
+                    price_data = pd.DataFrame(price_response.json().get("historical", []))
+                    if not price_data.empty:
+                        price_data_dict[ticker] = price_data
+                
+                # Fetch dividend history
+                dividend_url = f"{FMP_BASE_URL}/historical-price-full/stock_dividend/{ticker}?apikey={FMP_API_KEY}"
+                dividend_response = get_fmp_session().get(dividend_url)
+                if dividend_response.status_code == 200:
+                    dividend_data = pd.DataFrame(dividend_response.json().get("historical", []))
+                    if not dividend_data.empty:
+                        dividend_data_dict[ticker] = dividend_data
+                
+                # Calculate metrics
+                if ticker in price_data_dict and ticker in dividend_data_dict:
+                    metrics = calculate_etf_metrics(
+                        ticker,
+                        price_data_dict[ticker],
+                        dividend_data_dict[ticker]
+                    )
+                    etf_metrics_list.append(metrics)
+                else:
+                    logger.warning(f"Missing price or dividend data for {ticker}")
+                    
+            except Exception as e:
+                logger.error(f"Error processing {ticker}: {str(e)}")
+                continue
+        
+        if not etf_metrics_list:
            st.error("Failed to fetch ETF data")
            return pd.DataFrame(), pd.DataFrame()
        
-        # Calculate allocations based on risk tolerance
-        if risk_tolerance == "Conservative":
-            # Higher allocation to lower yield ETFs
-            sorted_data = etf_data.sort_values("Yield (%)")
-            allocations = [40.0, 40.0, 20.0]  # More to lower yield
-        elif risk_tolerance == "Moderate":
-            # Balanced allocation
-            allocations = [33.33, 33.34, 33.33]
-        else:  # Aggressive
-            # Higher allocation to higher yield ETFs
-            sorted_data = etf_data.sort_values("Yield (%)", ascending=False)
-            allocations = [20.0, 30.0, 50.0]  # More to higher yield
+        # Calculate correlation matrix
+        correlation_matrix = calculate_correlation_matrix(price_data_dict)
+        
+        # Optimize portfolio allocation
+        allocations = optimize_portfolio_allocation(
+            etf_metrics_list,
+            risk_tolerance,
+            correlation_matrix
+        )
+        
+        if not allocations:
+            st.error("Failed to optimize portfolio allocation")
+            return pd.DataFrame(), pd.DataFrame()
        
        # Create final allocation DataFrame
-        final_alloc = etf_data.copy()
-        final_alloc["Allocation (%)"] = allocations
+        final_alloc = pd.DataFrame(etf_metrics_list)
+        
+        # Ensure all required columns exist
+        required_columns = [
+            "Ticker",
+            "Yield (%)",
+            "Price",
+            "Risk Level"
+        ]
+        
+        for col in required_columns:
+            if col not in final_alloc.columns:
+                logger.error(f"Missing required column: {col}")
+                st.error(f"Missing required column: {col}")
+                return pd.DataFrame(), pd.DataFrame()
+        
+        # Add allocation column
+        final_alloc["Allocation (%)"] = final_alloc["Ticker"].map(allocations)
        
        if mode == "income_target":
            # Calculate required capital for income target
@ -291,22 +835,27 @@ def run_portfolio_simulation(
            
            # Calculate weighted average yield
            weighted_yield = (final_alloc["Allocation (%)"] * final_alloc["Yield (%)"]).sum() / 100
+            logger.info(f"Calculated weighted yield: {weighted_yield:.2f}%")
            
            # Validate weighted yield
-            if weighted_yield <= 0 or weighted_yield > 30:
+            if weighted_yield <= 0:
                st.error(f"Invalid weighted yield calculated: {weighted_yield:.2f}%")
                return pd.DataFrame(), pd.DataFrame()
            
            # Calculate required capital based on weighted yield
            required_capital = (annual_income / weighted_yield) * 100
+            logger.info(f"Calculated required capital: ${required_capital:,.2f}")
        else:
            required_capital = target
+            logger.info(f"Using provided capital: ${required_capital:,.2f}")
        
        # Calculate capital allocation and income
        final_alloc["Capital Allocated ($)"] = (final_alloc["Allocation (%)"] / 100) * required_capital
        final_alloc["Shares"] = final_alloc["Capital Allocated ($)"] / final_alloc["Price"]
        final_alloc["Income Contributed ($)"] = (final_alloc["Capital Allocated ($)"] * final_alloc["Yield (%)"]) / 100
        
+        logger.info(f"Final allocation calculated:\n{final_alloc}")
+        
        # Apply erosion if enabled
        if enable_erosion:
            # Apply a small erosion factor to yield and price
@ -314,17 +863,24 @@ def run_portfolio_simulation(
            final_alloc["Yield (%)"] = final_alloc["Yield (%)"] * erosion_factor
            final_alloc["Price"] = final_alloc["Price"] * erosion_factor
            final_alloc["Income Contributed ($)"] = (final_alloc["Capital Allocated ($)"] * final_alloc["Yield (%)"]) / 100
+            logger.info("Applied erosion factor to yield and price")
        
        # Validate final calculations
        total_capital = final_alloc["Capital Allocated ($)"].sum()
        total_income = final_alloc["Income Contributed ($)"].sum()
        effective_yield = (total_income / total_capital) * 100
        
-        if effective_yield <= 0 or effective_yield > 30:
+        logger.info(f"Final validation - Total Capital: ${total_capital:,.2f}, Total Income: ${total_income:,.2f}, Effective Yield: {effective_yield:.2f}%")
+        
+        if effective_yield <= 0:
            st.error(f"Invalid effective yield calculated: {effective_yield:.2f}%")
            return pd.DataFrame(), pd.DataFrame()
        
+        # Create ETF data DataFrame for display
+        etf_data = pd.DataFrame(etf_metrics_list)
+        
        return etf_data, final_alloc
+        
    except Exception as e:
        st.error(f"Error in portfolio simulation: {str(e)}")
        logger.error(f"Error in run_portfolio_simulation: {str(e)}")
@ -384,17 +940,6 @@ def portfolio_summary(final_alloc: pd.DataFrame) -> None:
        st.subheader("Detailed Allocation")
        display_df = final_alloc.copy()
        display_df["Monthly Income"] = display_df["Income Contributed ($)"] / 12
-        display_df = display_df[[
-            "Ticker", 
-            "Allocation (%)", 
-            "Yield (%)", 
-            "Price", 
-            "Shares",
-            "Capital Allocated ($)", 
-            "Monthly Income",
-            "Income Contributed ($)",
-            "Risk Level"
-        ]]
        
        # Format the display
        st.dataframe(
@ -726,13 +1271,19 @@ with st.sidebar:
        etf_inputs = []
        for i in range(num_etfs):
            ticker = st.text_input(f"ETF {i+1} Ticker", key=f"ticker_{i}")
-            etf_inputs.append({"ticker": ticker})
+            if ticker:  # Only add non-empty tickers
+                etf_inputs.append({"ticker": ticker.upper().strip()})
        
        # Submit button
        submitted = st.form_submit_button("Run Portfolio Simulation", type="primary")
        
        if submitted:
            try:
+                if not etf_inputs:
+                    st.error("Please enter at least one ETF ticker")
+                else:
+                    logger.info(f"Form submitted with {len(etf_inputs)} ETFs: {etf_inputs}")
+                    
                    # Store parameters in session state
                    st.session_state.mode = simulation_mode
                    st.session_state.enable_drip = enable_drip == "Yes"
@ -754,16 +1305,23 @@ with st.sidebar:
                        st.session_state.enable_erosion
                    )
                    
+                    if df_data is not None and not df_data.empty and final_alloc is not None and not final_alloc.empty:
                        # Store results in session state
                        st.session_state.simulation_run = True
                        st.session_state.df_data = df_data
                        st.session_state.final_alloc = final_alloc
-                
                        st.success("Portfolio simulation completed!")
                        st.rerun()
+                    else:
+                        st.error("Simulation failed to generate valid results. Please check your inputs and try again.")
+                        logger.error("Simulation returned empty DataFrames")
+                        logger.error(f"df_data: {df_data}")
+                        logger.error(f"final_alloc: {final_alloc}")
                
            except Exception as e:
                st.error(f"Error running simulation: {str(e)}")
+                logger.error(f"Error in form submission: {str(e)}")
+                logger.error(traceback.format_exc())

    # Add reset simulation button at the bottom of sidebar
    if st.button("🔄 Reset Simulation", use_container_width=True, type="secondary"):
@ -783,6 +1341,19 @@ if st.session_state.simulation_run and st.session_state.df_data is not None:
    df = st.session_state.df_data
    final_alloc = st.session_state.final_alloc if hasattr(st.session_state, 'final_alloc') else None
    
+    # Validate final_alloc DataFrame
+    if final_alloc is None or final_alloc.empty:
+        st.error("No portfolio data available. Please run the simulation again.")
+        st.session_state.simulation_run = False
+    else:
+        # Verify required columns exist
+        required_columns = ["Capital Allocated ($)", "Yield (%)", "Price", "Ticker"]
+        missing_columns = [col for col in required_columns if col not in final_alloc.columns]
+        
+        if missing_columns:
+            st.error(f"Missing required columns in portfolio data: {', '.join(missing_columns)}")
+            st.session_state.simulation_run = False
+        else:
            # Create tabs for better organization
            tab1, tab2, tab3, tab4, tab5 = st.tabs(["📈 Portfolio Overview", "📊 DRIP Forecast", "📉 Erosion Risk Assessment", "🤖 AI Suggestions", "📊 ETF Details"])
            
@ -792,11 +1363,21 @@ if st.session_state.simulation_run and st.session_state.df_data is not None:
                
                # Display mode-specific information
                if st.session_state.mode == "Income Target":
-            st.info(f"🎯 **Income Target Mode**: You need ${final_alloc['Capital Allocated ($)'].sum():,.2f} to generate ${monthly_target:,.2f} in monthly income (${ANNUAL_TARGET:,.2f} annually).")
+                    try:
+                        monthly_target = st.session_state.target
+                        ANNUAL_TARGET = monthly_target * 12
+                        total_capital = final_alloc["Capital Allocated ($)"].sum()
+                        st.info(f"🎯 **Income Target Mode**: You need ${total_capital:,.2f} to generate ${monthly_target:,.2f} in monthly income (${ANNUAL_TARGET:,.2f} annually).")
+                    except Exception as e:
+                        st.error(f"Error displaying income target information: {str(e)}")
                else:
+                    try:
+                        initial_capital = st.session_state.initial_capital
                        annual_income = final_alloc["Income Contributed ($)"].sum()
                        monthly_income = annual_income / 12
                        st.info(f"💲 **Capital Investment Mode**: Your ${initial_capital:,.2f} investment generates ${monthly_income:,.2f} in monthly income (${annual_income:,.2f} annually).")
+                    except Exception as e:
+                        st.error(f"Error displaying capital investment information: {str(e)}")
                
                # Add save/load section
                st.subheader("💾 Save/Load Portfolio")
@ -836,6 +1417,7 @@ if st.session_state.simulation_run and st.session_state.df_data is not None:
                # Display full detailed allocation table
                st.subheader("📊 Capital Allocation Details")
                
+                try:
                    # Format currencies for better readability
                    display_df = final_alloc.copy()
                    # Calculate shares for each ETF
@ -1014,3 +1596,7 @@ if st.session_state.simulation_run and st.session_state.df_data is not None:
                                    st.rerun()
                            except Exception as e:
                                st.error(f"Error focusing on capital: {str(e)}")
+                except Exception as e:
+                    st.error(f"Error displaying allocation details: {str(e)}")
+                    logger.error(f"Error in allocation display: {str(e)}")
+                    logger.error(traceback.format_exc())