From 027febf7da124527588c9d659ffde60daea21039 Mon Sep 17 00:00:00 2001 From: Pascal Date: Tue, 27 May 2025 16:02:15 +0200 Subject: [PATCH] feat: improve logging for cache and API calls tracking --- pages/ETF_Portfolio_Builder.py | 687 ++++++++++++++++++++++----------- 1 file changed, 459 insertions(+), 228 deletions(-) diff --git a/pages/ETF_Portfolio_Builder.py b/pages/ETF_Portfolio_Builder.py index badaf5f..e7f0f9a 100644 --- a/pages/ETF_Portfolio_Builder.py +++ b/pages/ETF_Portfolio_Builder.py @@ -5,7 +5,7 @@ import plotly.express as px import plotly.graph_objects as go from pathlib import Path import json -from datetime import datetime +from datetime import datetime, timedelta from typing import List, Dict, Tuple, Optional, Any, Callable, T import time import threading @@ -21,14 +21,34 @@ import traceback from dotenv import load_dotenv # Load environment variables -load_dotenv() +load_dotenv(override=True) # Force reload of environment variables # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) +# Global settings +USE_FMP_API = True # Default to using FMP API if available + # FMP API configuration -FMP_API_KEY = st.session_state.get('fmp_api_key', os.getenv('FMP_API_KEY', '')) +FMP_API_KEY = os.getenv('FMP_API_KEY') +if not FMP_API_KEY: + logger.warning("FMP_API_KEY not found in environment variables") + logger.warning("Current environment variables: %s", dict(os.environ)) + logger.warning("Current working directory: %s", os.getcwd()) + logger.warning("Files in current directory: %s", os.listdir('.')) + if os.path.exists('.env'): + logger.warning(".env file exists") + with open('.env', 'r') as f: + logger.warning("Contents of .env file: %s", f.read()) + else: + logger.warning(".env file does not exist") +else: + logger.info("FMP_API_KEY loaded successfully") + # Mask the API key for security in logs + masked_key = FMP_API_KEY[:4] + '*' * (len(FMP_API_KEY) - 8) + FMP_API_KEY[-4:] + logger.info("FMP_API_KEY (masked): %s", masked_key) + FMP_BASE_URL = "https://financialmodelingprep.com/api/v3" # High-yield ETFs reference data @@ -324,7 +344,7 @@ def optimize_portfolio_allocation( etf_metrics: List[Dict[str, Any]], risk_tolerance: str, correlation_matrix: pd.DataFrame -) -> Dict[str, float]: +) -> List[Dict[str, Any]]: """ Optimize portfolio allocation based on risk tolerance and ETF metrics. @@ -334,21 +354,26 @@ def optimize_portfolio_allocation( correlation_matrix: Correlation matrix between ETFs Returns: - Dictionary with ETF tickers and their allocations + List of dictionaries with ETF tickers and their allocations """ try: + logger.info(f"Optimizing portfolio allocation for {risk_tolerance} risk tolerance") + logger.info(f"ETF metrics: {etf_metrics}") + # Group ETFs by risk category - low_risk = [etf for etf in etf_metrics if etf["Risk Level"] == "Low"] - medium_risk = [etf for etf in etf_metrics if etf["Risk Level"] == "Medium"] - high_risk = [etf for etf in etf_metrics if etf["Risk Level"] == "High"] + low_risk = [etf for etf in etf_metrics if etf.get("Risk Level", "Unknown") == "Low"] + medium_risk = [etf for etf in etf_metrics if etf.get("Risk Level", "Unknown") == "Medium"] + high_risk = [etf for etf in etf_metrics if etf.get("Risk Level", "Unknown") == "High"] + + logger.info(f"Risk groups - Low: {len(low_risk)}, Medium: {len(medium_risk)}, High: {len(high_risk)}") # Sort ETFs by score within each risk category - low_risk.sort(key=lambda x: x["score"], reverse=True) - medium_risk.sort(key=lambda x: x["score"], reverse=True) - high_risk.sort(key=lambda x: x["score"], reverse=True) + low_risk.sort(key=lambda x: x.get("score", 0), reverse=True) + medium_risk.sort(key=lambda x: x.get("score", 0), reverse=True) + high_risk.sort(key=lambda x: x.get("score", 0), reverse=True) # Initialize allocations - allocations = {} + allocations = [] if risk_tolerance == "Conservative": # Conservative allocation @@ -356,19 +381,19 @@ def optimize_portfolio_allocation( # Allocate 50% to low-risk ETFs low_risk_alloc = 50.0 / len(low_risk) for etf in low_risk: - allocations[etf["Ticker"]] = low_risk_alloc + allocations.append({"ticker": etf["Ticker"], "allocation": low_risk_alloc}) if medium_risk: # Allocate 30% to medium-risk ETFs medium_risk_alloc = 30.0 / len(medium_risk) for etf in medium_risk: - allocations[etf["Ticker"]] = medium_risk_alloc + allocations.append({"ticker": etf["Ticker"], "allocation": medium_risk_alloc}) if high_risk: # Allocate 20% to high-risk ETFs high_risk_alloc = 20.0 / len(high_risk) for etf in high_risk: - allocations[etf["Ticker"]] = high_risk_alloc + allocations.append({"ticker": etf["Ticker"], "allocation": high_risk_alloc}) elif risk_tolerance == "Moderate": # Moderate allocation @@ -376,19 +401,19 @@ def optimize_portfolio_allocation( # Allocate 30% to low-risk ETFs low_risk_alloc = 30.0 / len(low_risk) for etf in low_risk: - allocations[etf["Ticker"]] = low_risk_alloc + allocations.append({"ticker": etf["Ticker"], "allocation": low_risk_alloc}) if medium_risk: # Allocate 40% to medium-risk ETFs medium_risk_alloc = 40.0 / len(medium_risk) for etf in medium_risk: - allocations[etf["Ticker"]] = medium_risk_alloc + allocations.append({"ticker": etf["Ticker"], "allocation": medium_risk_alloc}) if high_risk: # Allocate 30% to high-risk ETFs high_risk_alloc = 30.0 / len(high_risk) for etf in high_risk: - allocations[etf["Ticker"]] = high_risk_alloc + allocations.append({"ticker": etf["Ticker"], "allocation": high_risk_alloc}) else: # Aggressive # Aggressive allocation @@ -396,36 +421,34 @@ def optimize_portfolio_allocation( # Allocate 20% to low-risk ETFs low_risk_alloc = 20.0 / len(low_risk) for etf in low_risk: - allocations[etf["Ticker"]] = low_risk_alloc + allocations.append({"ticker": etf["Ticker"], "allocation": low_risk_alloc}) if medium_risk: # Allocate 40% to medium-risk ETFs medium_risk_alloc = 40.0 / len(medium_risk) for etf in medium_risk: - allocations[etf["Ticker"]] = medium_risk_alloc + allocations.append({"ticker": etf["Ticker"], "allocation": medium_risk_alloc}) if high_risk: # Allocate 40% to high-risk ETFs high_risk_alloc = 40.0 / len(high_risk) for etf in high_risk: - allocations[etf["Ticker"]] = high_risk_alloc + allocations.append({"ticker": etf["Ticker"], "allocation": high_risk_alloc}) - # Adjust allocations based on correlation - if not correlation_matrix.empty: - allocations = adjust_allocations_for_correlation(allocations, correlation_matrix) + # If no allocations were made, use equal weighting + if not allocations: + logger.warning("No risk-based allocations made, using equal weighting") + total_etfs = len(etf_metrics) + equal_alloc = 100.0 / total_etfs + allocations = [{"ticker": etf["Ticker"], "allocation": equal_alloc} for etf in etf_metrics] - # Normalize allocations to ensure they sum to 100% - total_alloc = sum(allocations.values()) - if total_alloc > 0: - allocations = {k: (v / total_alloc) * 100 for k, v in allocations.items()} - - logger.info(f"Optimized allocations for {risk_tolerance} risk tolerance: {allocations}") + logger.info(f"Final allocations: {allocations}") return allocations except Exception as e: logger.error(f"Error optimizing portfolio allocation: {str(e)}") logger.error(traceback.format_exc()) - return {} + return [] def adjust_allocations_for_correlation( allocations: Dict[str, float], @@ -493,15 +516,18 @@ def fetch_etf_data_fmp(ticker: str) -> Optional[Dict[str, Any]]: """ try: if not FMP_API_KEY: - logger.warning("FMP API key not configured, skipping FMP data fetch") + logger.warning("FMP API key not configured in environment variables") + st.warning("FMP API key not found in environment variables. Some features may be limited.") return None session = get_fmp_session() # Get profile data for current price profile_url = f"{FMP_BASE_URL}/profile/{ticker}?apikey={FMP_API_KEY}" - logger.info(f"Fetching FMP profile data for {ticker}") + logger.info(f"Making FMP API call to {profile_url}") profile_response = session.get(profile_url) + st.session_state.api_calls += 1 + logger.info(f"FMP API call count: {st.session_state.api_calls}") if profile_response.status_code != 200: logger.error(f"FMP API error for {ticker}: {profile_response.status_code}") @@ -523,8 +549,10 @@ def fetch_etf_data_fmp(ticker: str) -> Optional[Dict[str, Any]]: # Get dividend history dividend_url = f"{FMP_BASE_URL}/historical-price-full/stock_dividend/{ticker}?apikey={FMP_API_KEY}" - logger.info(f"Fetching FMP dividend data for {ticker}") + logger.info(f"Making FMP API call to {dividend_url}") dividend_response = session.get(dividend_url) + st.session_state.api_calls += 1 + logger.info(f"FMP API call count: {st.session_state.api_calls}") if dividend_response.status_code != 200: logger.error(f"FMP API error for dividend data: {dividend_response.status_code}") @@ -660,22 +688,89 @@ def fetch_etf_data(tickers: List[str]) -> pd.DataFrame: """ try: data = {} + cache_dir = Path("cache") + cache_dir.mkdir(exist_ok=True) + + logger.info("=== Starting ETF data fetch ===") + logger.info(f"Force refresh enabled: {st.session_state.get('force_refresh_data', False)}") + logger.info(f"Cache directory: {cache_dir.absolute()}") + for ticker in tickers: if not ticker: # Skip empty tickers continue - logger.info(f"Processing {ticker}") + logger.info(f"\n=== Processing {ticker} ===") - # Try FMP first - etf_data = fetch_etf_data_fmp(ticker) + # Check cache first if not forcing refresh + cache_file = cache_dir / f"{ticker}_data.json" + logger.info(f"Cache file path: {cache_file.absolute()}") + logger.info(f"Cache file exists: {cache_file.exists()}") + + if not st.session_state.get("force_refresh_data", False) and cache_file.exists(): + try: + with open(cache_file, 'r') as f: + cached_data = json.load(f) + cache_time = datetime.fromisoformat(cached_data.get('timestamp', '2000-01-01')) + cache_age = datetime.now() - cache_time + logger.info(f"Cache age: {cache_age.total_seconds() / 3600:.2f} hours") + + if cache_age < timedelta(hours=24): + logger.info(f"Using cached data for {ticker}") + data[ticker] = cached_data['data'] + continue + else: + logger.info(f"Cache expired for {ticker} (age: {cache_age.total_seconds() / 3600:.2f} hours)") + except Exception as e: + logger.warning(f"Error reading cache for {ticker}: {str(e)}") + logger.warning(traceback.format_exc()) + else: + logger.info(f"No cache found or force refresh enabled for {ticker}") + + # Try FMP first if enabled + if USE_FMP_API and FMP_API_KEY: + logger.info(f"Making FMP API call for {ticker}") + etf_data = fetch_etf_data_fmp(ticker) + if etf_data is not None: + # Cache the data + try: + cache_data = { + 'timestamp': datetime.now().isoformat(), + 'data': etf_data + } + with open(cache_file, 'w') as f: + json.dump(cache_data, f) + logger.info(f"Cached FMP data for {ticker}") + except Exception as e: + logger.warning(f"Error caching FMP data for {ticker}: {str(e)}") + logger.warning(traceback.format_exc()) + + data[ticker] = etf_data + st.session_state.api_calls += 1 + logger.info(f"Total API calls: {st.session_state.api_calls}") + continue # If FMP fails, try yfinance - if etf_data is None: - logger.info(f"Falling back to yfinance for {ticker}") - etf_data = fetch_etf_data_yfinance(ticker) + logger.info(f"Falling back to yfinance for {ticker}") + etf_data = fetch_etf_data_yfinance(ticker) + if etf_data is not None: + # Cache the data + try: + cache_data = { + 'timestamp': datetime.now().isoformat(), + 'data': etf_data + } + with open(cache_file, 'w') as f: + json.dump(cache_data, f) + logger.info(f"Cached yfinance data for {ticker}") + except Exception as e: + logger.warning(f"Error caching yfinance data for {ticker}: {str(e)}") + logger.warning(traceback.format_exc()) + + data[ticker] = etf_data + continue # Only use HIGH_YIELD_ETFS data if both FMP and yfinance failed - if etf_data is None and ticker in HIGH_YIELD_ETFS: + if ticker in HIGH_YIELD_ETFS: logger.info(f"Using fallback data from HIGH_YIELD_ETFS for {ticker}") etf_data = { "Ticker": ticker, @@ -684,10 +779,7 @@ def fetch_etf_data(tickers: List[str]) -> pd.DataFrame: "Distribution Period": HIGH_YIELD_ETFS[ticker]["frequency"], "Risk Level": "High" } - - if etf_data is not None: data[ticker] = etf_data - logger.info(f"Final data for {ticker}: {etf_data}") else: logger.error(f"Failed to fetch data for {ticker} from all sources") @@ -719,173 +811,109 @@ def fetch_etf_data(tickers: List[str]) -> pd.DataFrame: return pd.DataFrame() def run_portfolio_simulation( - mode: str, - target: float, - risk_tolerance: str, - etf_inputs: List[Dict[str, str]], - enable_drip: bool, - enable_erosion: bool -) -> Tuple[pd.DataFrame, pd.DataFrame]: + tickers: List[str], + weights: List[float], + initial_investment: float, + start_date: str, + end_date: str, + rebalance_frequency: str = 'monthly', + use_fmp: bool = True +) -> Dict[str, Any]: """ - Run the portfolio simulation using the new optimization system. + Run portfolio simulation with the given parameters. Args: - mode: Simulation mode ("income_target" or "capital_target") - target: Target value (monthly income or initial capital) - risk_tolerance: Risk tolerance level - etf_inputs: List of ETF inputs - enable_drip: Whether to enable dividend reinvestment - enable_erosion: Whether to enable NAV & yield erosion + tickers: List of ETF tickers + weights: List of portfolio weights + initial_investment: Initial investment amount + start_date: Start date for simulation + end_date: End date for simulation + rebalance_frequency: Frequency of rebalancing + use_fmp: Whether to use FMP API for data Returns: - Tuple of (ETF data DataFrame, Final allocation DataFrame) + Dictionary with simulation results """ try: - logger.info(f"Starting portfolio simulation with mode: {mode}, target: {target}") - logger.info(f"ETF inputs: {etf_inputs}") - - # Fetch real ETF data - tickers = [input["ticker"] for input in etf_inputs if input["ticker"]] # Filter out empty tickers - logger.info(f"Processing tickers: {tickers}") - - if not tickers: - st.error("No valid tickers provided") - return pd.DataFrame(), pd.DataFrame() - - # Fetch price and dividend data for all ETFs - price_data_dict = {} - dividend_data_dict = {} - etf_metrics_list = [] - + # Validate inputs + if not tickers or not weights: + raise ValueError("No tickers or weights provided") + if len(tickers) != len(weights): + raise ValueError("Number of tickers must match number of weights") + if not all(0 <= w <= 1 for w in weights): + raise ValueError("Weights must be between 0 and 1") + if sum(weights) != 1: + raise ValueError("Weights must sum to 1") + + # Get historical data + historical_data = {} for ticker in tickers: - try: - # Fetch price history - price_url = f"{FMP_BASE_URL}/historical-price-full/{ticker}?apikey={FMP_API_KEY}" - price_response = get_fmp_session().get(price_url) - if price_response.status_code == 200: - price_data = pd.DataFrame(price_response.json().get("historical", [])) - if not price_data.empty: - price_data_dict[ticker] = price_data - - # Fetch dividend history - dividend_url = f"{FMP_BASE_URL}/historical-price-full/stock_dividend/{ticker}?apikey={FMP_API_KEY}" - dividend_response = get_fmp_session().get(dividend_url) - if dividend_response.status_code == 200: - dividend_data = pd.DataFrame(dividend_response.json().get("historical", [])) - if not dividend_data.empty: - dividend_data_dict[ticker] = dividend_data - - # Calculate metrics - if ticker in price_data_dict and ticker in dividend_data_dict: - metrics = calculate_etf_metrics( - ticker, - price_data_dict[ticker], - dividend_data_dict[ticker] - ) - etf_metrics_list.append(metrics) + if use_fmp and FMP_API_KEY: + data = fetch_etf_data_fmp(ticker) + if data and 'historical' in data: + historical_data[ticker] = data['historical'] else: - logger.warning(f"Missing price or dividend data for {ticker}") + logger.warning(f"Falling back to yfinance for {ticker}") + data = fetch_etf_data_yfinance(ticker) + if data and 'historical' in data: + historical_data[ticker] = data['historical'] + else: + data = fetch_etf_data_yfinance(ticker) + if data and 'historical' in data: + historical_data[ticker] = data['historical'] - except Exception as e: - logger.error(f"Error processing {ticker}: {str(e)}") - continue - - if not etf_metrics_list: - st.error("Failed to fetch ETF data") - return pd.DataFrame(), pd.DataFrame() - - # Calculate correlation matrix - correlation_matrix = calculate_correlation_matrix(price_data_dict) - - # Optimize portfolio allocation - allocations = optimize_portfolio_allocation( - etf_metrics_list, - risk_tolerance, - correlation_matrix - ) - - if not allocations: - st.error("Failed to optimize portfolio allocation") - return pd.DataFrame(), pd.DataFrame() - - # Create final allocation DataFrame - final_alloc = pd.DataFrame(etf_metrics_list) - - # Ensure all required columns exist - required_columns = [ - "Ticker", - "Yield (%)", - "Price", - "Risk Level" - ] - - for col in required_columns: - if col not in final_alloc.columns: - logger.error(f"Missing required column: {col}") - st.error(f"Missing required column: {col}") - return pd.DataFrame(), pd.DataFrame() - - # Add allocation column - final_alloc["Allocation (%)"] = final_alloc["Ticker"].map(allocations) - - if mode == "income_target": - # Calculate required capital for income target - monthly_income = target - annual_income = monthly_income * 12 + if not historical_data: + raise ValueError("No historical data available for any tickers") - # Calculate weighted average yield - weighted_yield = (final_alloc["Allocation (%)"] * final_alloc["Yield (%)"]).sum() / 100 - logger.info(f"Calculated weighted yield: {weighted_yield:.2f}%") + # Create portfolio DataFrame + portfolio = pd.DataFrame() + for ticker, data in historical_data.items(): + portfolio[ticker] = data['close'] - # Validate weighted yield - if weighted_yield <= 0: - st.error(f"Invalid weighted yield calculated: {weighted_yield:.2f}%") - return pd.DataFrame(), pd.DataFrame() + # Calculate portfolio returns + portfolio_returns = portfolio.pct_change() + portfolio_returns = portfolio_returns.fillna(0) + + # Calculate weighted returns + weighted_returns = pd.DataFrame() + for i, ticker in enumerate(tickers): + weighted_returns[ticker] = portfolio_returns[ticker] * weights[i] - # Calculate required capital based on weighted yield - required_capital = (annual_income / weighted_yield) * 100 - logger.info(f"Calculated required capital: ${required_capital:,.2f}") - else: - required_capital = target - logger.info(f"Using provided capital: ${required_capital:,.2f}") + portfolio_returns['portfolio'] = weighted_returns.sum(axis=1) - # Calculate capital allocation and income - final_alloc["Capital Allocated ($)"] = (final_alloc["Allocation (%)"] / 100) * required_capital - final_alloc["Shares"] = final_alloc["Capital Allocated ($)"] / final_alloc["Price"] - final_alloc["Income Contributed ($)"] = (final_alloc["Capital Allocated ($)"] * final_alloc["Yield (%)"]) / 100 + # Calculate cumulative returns + cumulative_returns = (1 + portfolio_returns).cumprod() - logger.info(f"Final allocation calculated:\n{final_alloc}") + # Calculate portfolio value + portfolio_value = initial_investment * cumulative_returns['portfolio'] - # Apply erosion if enabled - if enable_erosion: - # Apply a small erosion factor to yield and price - erosion_factor = 0.98 # 2% erosion per year - final_alloc["Yield (%)"] = final_alloc["Yield (%)"] * erosion_factor - final_alloc["Price"] = final_alloc["Price"] * erosion_factor - final_alloc["Income Contributed ($)"] = (final_alloc["Capital Allocated ($)"] * final_alloc["Yield (%)"]) / 100 - logger.info("Applied erosion factor to yield and price") + # Calculate metrics + total_return = (portfolio_value.iloc[-1] / initial_investment) - 1 + annual_return = (1 + total_return) ** (252 / len(portfolio_value)) - 1 + volatility = portfolio_returns['portfolio'].std() * np.sqrt(252) + sharpe_ratio = annual_return / volatility if volatility != 0 else 0 - # Validate final calculations - total_capital = final_alloc["Capital Allocated ($)"].sum() - total_income = final_alloc["Income Contributed ($)"].sum() - effective_yield = (total_income / total_capital) * 100 + # Calculate drawdown + rolling_max = portfolio_value.expanding().max() + drawdown = (portfolio_value - rolling_max) / rolling_max + max_drawdown = drawdown.min() - logger.info(f"Final validation - Total Capital: ${total_capital:,.2f}, Total Income: ${total_income:,.2f}, Effective Yield: {effective_yield:.2f}%") - - if effective_yield <= 0: - st.error(f"Invalid effective yield calculated: {effective_yield:.2f}%") - return pd.DataFrame(), pd.DataFrame() - - # Create ETF data DataFrame for display - etf_data = pd.DataFrame(etf_metrics_list) - - return etf_data, final_alloc + return { + 'portfolio_value': portfolio_value, + 'returns': portfolio_returns, + 'cumulative_returns': cumulative_returns, + 'total_return': total_return, + 'annual_return': annual_return, + 'volatility': volatility, + 'sharpe_ratio': sharpe_ratio, + 'max_drawdown': max_drawdown, + 'drawdown': drawdown + } except Exception as e: - st.error(f"Error in portfolio simulation: {str(e)}") - logger.error(f"Error in run_portfolio_simulation: {str(e)}") - logger.error(traceback.format_exc()) - return pd.DataFrame(), pd.DataFrame() + logger.error(f"Error in portfolio simulation: {str(e)}") + st.error(f"Error running portfolio simulation: {str(e)}") + return None def portfolio_summary(final_alloc: pd.DataFrame) -> None: """ @@ -1090,25 +1118,54 @@ def allocate_for_income(df: pd.DataFrame, target: float, etf_allocations: List[D # Create final allocation DataFrame final_alloc = df.copy() + # Initialize allocation column if it doesn't exist + if "Allocation (%)" not in final_alloc.columns: + final_alloc["Allocation (%)"] = 0.0 + # Set allocations for alloc in etf_allocations: mask = final_alloc["Ticker"] == alloc["ticker"] - final_alloc.loc[mask, "Allocation (%)"] = alloc["allocation"] + if mask.any(): + final_alloc.loc[mask, "Allocation (%)"] = alloc["allocation"] + else: + logger.warning(f"Ticker {alloc['ticker']} not found in DataFrame") + + # Verify allocations are set + if final_alloc["Allocation (%)"].sum() == 0: + logger.error("No allocations were set") + return None # Calculate required capital for income target monthly_income = target annual_income = monthly_income * 12 - avg_yield = final_alloc["Yield (%)"].mean() - required_capital = (annual_income / avg_yield) * 100 + + # Calculate weighted average yield + weighted_yield = (final_alloc["Allocation (%)"] * final_alloc["Yield (%)"]).sum() / 100 + if weighted_yield == 0: + logger.error("Weighted yield is zero") + return None + + # Calculate required capital + required_capital = (annual_income / weighted_yield) * 100 # Calculate capital allocation and income final_alloc["Capital Allocated ($)"] = (final_alloc["Allocation (%)"] / 100) * required_capital final_alloc["Shares"] = final_alloc["Capital Allocated ($)"] / final_alloc["Price"] final_alloc["Income Contributed ($)"] = (final_alloc["Capital Allocated ($)"] * final_alloc["Yield (%)"]) / 100 + # Verify calculations + total_income = final_alloc["Income Contributed ($)"].sum() + if abs(total_income - annual_income) > 1.0: # Allow for small rounding errors + logger.warning(f"Total income ({total_income}) does not match target ({annual_income})") + + logger.info(f"Income allocation completed. Required capital: ${required_capital:,.2f}") + logger.info(f"Final allocations:\n{final_alloc}") + return final_alloc + except Exception as e: - st.error(f"Error in income allocation: {str(e)}") + logger.error(f"Error in income allocation: {str(e)}") + logger.error(traceback.format_exc()) return None def allocate_for_capital(df: pd.DataFrame, initial_capital: float, etf_allocations: List[Dict[str, Any]]) -> pd.DataFrame: @@ -1127,19 +1184,41 @@ def allocate_for_capital(df: pd.DataFrame, initial_capital: float, etf_allocatio # Create final allocation DataFrame final_alloc = df.copy() + # Initialize allocation column if it doesn't exist + if "Allocation (%)" not in final_alloc.columns: + final_alloc["Allocation (%)"] = 0.0 + # Set allocations for alloc in etf_allocations: mask = final_alloc["Ticker"] == alloc["ticker"] - final_alloc.loc[mask, "Allocation (%)"] = alloc["allocation"] + if mask.any(): + final_alloc.loc[mask, "Allocation (%)"] = alloc["allocation"] + else: + logger.warning(f"Ticker {alloc['ticker']} not found in DataFrame") + + # Verify allocations are set + if final_alloc["Allocation (%)"].sum() == 0: + logger.error("No allocations were set") + return None # Calculate capital allocation and income final_alloc["Capital Allocated ($)"] = (final_alloc["Allocation (%)"] / 100) * initial_capital final_alloc["Shares"] = final_alloc["Capital Allocated ($)"] / final_alloc["Price"] final_alloc["Income Contributed ($)"] = (final_alloc["Capital Allocated ($)"] * final_alloc["Yield (%)"]) / 100 + # Verify calculations + total_capital = final_alloc["Capital Allocated ($)"].sum() + if abs(total_capital - initial_capital) > 1.0: # Allow for small rounding errors + logger.warning(f"Total capital ({total_capital}) does not match initial capital ({initial_capital})") + + logger.info(f"Capital allocation completed. Initial capital: ${initial_capital:,.2f}") + logger.info(f"Final allocations:\n{final_alloc}") + return final_alloc + except Exception as e: - st.error(f"Error in capital allocation: {str(e)}") + logger.error(f"Error in capital allocation: {str(e)}") + logger.error(traceback.format_exc()) return None def reset_simulation(): @@ -1154,23 +1233,105 @@ def reset_simulation(): st.session_state.enable_erosion = False st.rerun() -def test_fmp_connection(): - """Test the FMP API connection and display status.""" +def test_fmp_connection() -> bool: + """Test connection to FMP API.""" try: if not FMP_API_KEY: - return False, "No API key found" + st.error("FMP API key not found in environment variables") + return False session = get_fmp_session() - test_url = f"{FMP_BASE_URL}/profile/AAPL?apikey={FMP_API_KEY}" + test_url = f"{FMP_BASE_URL}/profile/SPY?apikey={FMP_API_KEY}" + logger.info(f"Making FMP API test call to {test_url}") response = session.get(test_url) + st.session_state.api_calls += 1 + logger.info(f"FMP API call count: {st.session_state.api_calls}") if response.status_code == 200: - data = response.json() - if data and isinstance(data, list) and len(data) > 0: - return True, "Connected" - return False, f"Error: {response.status_code}" + st.success("Successfully connected to FMP API") + return True + else: + st.error(f"Failed to connect to FMP API: {response.status_code}") + logger.error(f"FMP API test failed: {response.text}") + return False except Exception as e: - return False, f"Error: {str(e)}" + st.error(f"Error testing FMP connection: {str(e)}") + logger.error(f"FMP API test error: {str(e)}") + return False + +def get_cache_stats() -> Dict[str, Any]: + """ + Get statistics about the cache usage. + + Returns: + Dictionary containing cache statistics + """ + try: + cache_dir = Path("cache") + if not cache_dir.exists(): + return { + "ticker_count": 0, + "file_count": 0, + "total_size_kb": 0 + } + + # Get all cache files + cache_files = list(cache_dir.glob("**/*.json")) + + # Count unique tickers + tickers = set() + for file in cache_files: + # Extract ticker from filename (assuming format: ticker_data_type.json) + ticker = file.stem.split('_')[0] + tickers.add(ticker) + + # Calculate total size + total_size = sum(file.stat().st_size for file in cache_files) + + return { + "ticker_count": len(tickers), + "file_count": len(cache_files), + "total_size_kb": total_size / 1024 # Convert to KB + } + + except Exception as e: + logger.error(f"Error getting cache stats: {str(e)}") + return { + "ticker_count": 0, + "file_count": 0, + "total_size_kb": 0 + } + +def clear_cache(ticker: Optional[str] = None) -> None: + """ + Clear cache files for a specific ticker or all tickers. + + Args: + ticker: Optional ticker symbol to clear cache for. If None, clears all cache. + """ + try: + cache_dir = Path("cache") + if not cache_dir.exists(): + return + + if ticker: + # Clear cache for specific ticker + pattern = f"{ticker.upper()}_*.json" + cache_files = list(cache_dir.glob(f"**/{pattern}")) + else: + # Clear all cache files + cache_files = list(cache_dir.glob("**/*.json")) + + # Delete cache files + for file in cache_files: + try: + file.unlink() + logger.info(f"Deleted cache file: {file}") + except Exception as e: + logger.error(f"Error deleting cache file {file}: {str(e)}") + + except Exception as e: + logger.error(f"Error clearing cache: {str(e)}") # Set page config st.set_page_config( @@ -1197,6 +1358,10 @@ if 'enable_drip' not in st.session_state: st.session_state.enable_drip = False if 'enable_erosion' not in st.session_state: st.session_state.enable_erosion = False +if 'api_calls' not in st.session_state: + st.session_state.api_calls = 0 +if 'force_refresh_data' not in st.session_state: + st.session_state.force_refresh_data = False # Main title st.title("📈 ETF Portfolio Builder") @@ -1296,27 +1461,47 @@ with st.sidebar: st.session_state.initial_capital = initial_capital # Run simulation - df_data, final_alloc = run_portfolio_simulation( - simulation_mode.lower().replace(" ", "_"), - st.session_state.target, - risk_tolerance, - etf_inputs, - st.session_state.enable_drip, - st.session_state.enable_erosion - ) + logger.info("Starting portfolio simulation...") + logger.info(f"ETF inputs: {etf_inputs}") - if df_data is not None and not df_data.empty and final_alloc is not None and not final_alloc.empty: - # Store results in session state - st.session_state.simulation_run = True - st.session_state.df_data = df_data - st.session_state.final_alloc = final_alloc - st.success("Portfolio simulation completed!") - st.rerun() + df_data = fetch_etf_data([etf["ticker"] for etf in etf_inputs]) + logger.info(f"Fetched ETF data:\n{df_data}") + + if df_data is not None and not df_data.empty: + logger.info("Calculating optimal allocations...") + # Calculate allocations based on risk tolerance + etf_allocations = optimize_portfolio_allocation( + df_data.to_dict('records'), + risk_tolerance, + pd.DataFrame() # Empty correlation matrix for now + ) + logger.info(f"Optimal allocations: {etf_allocations}") + + if simulation_mode == "Income Target": + logger.info(f"Allocating for income target: ${monthly_target}") + final_alloc = allocate_for_income(df_data, monthly_target, etf_allocations) + else: + logger.info(f"Allocating for capital target: ${initial_capital}") + final_alloc = allocate_for_capital(df_data, initial_capital, etf_allocations) + + logger.info(f"Final allocation result:\n{final_alloc}") + + if final_alloc is not None and not final_alloc.empty: + # Store results in session state + st.session_state.simulation_run = True + st.session_state.df_data = df_data + st.session_state.final_alloc = final_alloc + st.success("Portfolio simulation completed!") + st.rerun() + else: + st.error("Failed to generate portfolio allocation. Please check your inputs and try again.") + logger.error("Allocation returned empty DataFrame") + logger.error(f"df_data columns: {df_data.columns}") + logger.error(f"df_data shape: {df_data.shape}") + logger.error(f"df_data:\n{df_data}") else: - st.error("Simulation failed to generate valid results. Please check your inputs and try again.") - logger.error("Simulation returned empty DataFrames") - logger.error(f"df_data: {df_data}") - logger.error(f"final_alloc: {final_alloc}") + st.error("Failed to fetch ETF data. Please check your tickers and try again.") + logger.error("ETF data fetch returned empty DataFrame") except Exception as e: st.error(f"Error running simulation: {str(e)}") @@ -1330,11 +1515,57 @@ with st.sidebar: # Add FMP connection status to the navigation bar st.sidebar.markdown("---") st.sidebar.subheader("FMP API Status") - connection_status, message = test_fmp_connection() + connection_status = test_fmp_connection() if connection_status: - st.sidebar.success(f"✅ FMP API: {message}") + st.sidebar.success("✅ FMP API: Connected") else: - st.sidebar.error(f"❌ FMP API: {message}") + st.sidebar.error("❌ FMP API: Connection failed") + + # Advanced Options section in sidebar + with st.sidebar.expander("Advanced Options"): + # Option to toggle FMP API usage + use_fmp_api = st.checkbox("Use FMP API for high-yield ETFs", value=USE_FMP_API, + help="Use Financial Modeling Prep API for more accurate yield data on high-yield ETFs") + if use_fmp_api != USE_FMP_API: + # Update global setting if changed + globals()["USE_FMP_API"] = use_fmp_api + st.success("FMP API usage setting updated") + + # Add cache controls + st.subheader("Cache Settings") + + # Display cache statistics + cache_stats = get_cache_stats() + st.write(f"Cache contains data for {cache_stats['ticker_count']} tickers ({cache_stats['file_count']} files, {cache_stats['total_size_kb']:.1f} KB)") + + # Force refresh option + st.session_state.force_refresh_data = st.checkbox( + "Force refresh data (ignore cache)", + value=st.session_state.get("force_refresh_data", False), + help="When enabled, always fetch fresh data from APIs" + ) + + # Cache clearing options + col1, col2 = st.columns(2) + with col1: + if st.button("Clear All Cache"): + clear_cache() + st.success("All cache files cleared!") + st.session_state.api_calls = 0 + + with col2: + ticker_to_clear = st.text_input("Clear cache for ticker:", key="cache_ticker") + if st.button("Clear") and ticker_to_clear: + clear_cache(ticker_to_clear) + st.success(f"Cache cleared for {ticker_to_clear.upper()}") + + # Show API call counter + st.write(f"API calls this session: {st.session_state.api_calls}") + + # Add option for debug mode and parallel processing + debug_mode = st.checkbox("Enable Debug Mode", help="Show detailed error logs.") + parallel_processing = st.checkbox("Enable Parallel Processing", value=True, + help="Fetch data for multiple ETFs simultaneously") # Display results and interactive allocation adjustment UI after simulation is run if st.session_state.simulation_run and st.session_state.df_data is not None: