Adding sql lite et reorganizing services folder
This commit is contained in:
parent
c30e89f82c
commit
9d25a01082
@ -11,6 +11,7 @@ from typing import Dict, Optional, List
|
||||
import yfinance as yf
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from ..api.factory import APIFactory
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -33,6 +34,9 @@ class DataService:
|
||||
self.fmp_profiles_dir, self.fmp_historical_dir, self.fmp_holdings_dir]:
|
||||
directory.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialize API factory
|
||||
self.api_factory = APIFactory()
|
||||
|
||||
def get_etf_data(self, ticker: str) -> Dict:
|
||||
"""Get ETF data using fallback logic:
|
||||
1. Try FMP cache
|
||||
@ -196,7 +200,80 @@ class DataService:
|
||||
|
||||
def _get_from_fmp(self, ticker: str) -> Optional[Dict]:
|
||||
"""Get data from FMP API"""
|
||||
# TODO: Implement FMP API integration
|
||||
try:
|
||||
# Get FMP client
|
||||
fmp_client = self.api_factory.get_client('fmp')
|
||||
|
||||
# Get ETF profile
|
||||
profile = fmp_client.get_etf_profile(ticker)
|
||||
if not profile:
|
||||
return None
|
||||
|
||||
# Get historical data
|
||||
hist_data = fmp_client.get_etf_historical_data(ticker)
|
||||
if hist_data.empty:
|
||||
return None
|
||||
|
||||
# Get holdings
|
||||
holdings = fmp_client.get_etf_holdings(ticker)
|
||||
|
||||
# Get dividend history
|
||||
dividend_history = fmp_client.get_dividend_history(ticker)
|
||||
|
||||
# Get sector weightings
|
||||
sector_weightings = fmp_client.get_sector_weightings(ticker)
|
||||
|
||||
# Calculate metrics
|
||||
hist_data['log_returns'] = np.log(hist_data['close'] / hist_data['close'].shift(1))
|
||||
returns = hist_data['log_returns'].dropna()
|
||||
|
||||
# Calculate annualized volatility using daily log returns
|
||||
volatility = returns.std() * np.sqrt(252)
|
||||
|
||||
# Calculate max drawdown using rolling window
|
||||
rolling_max = hist_data['close'].rolling(window=252, min_periods=1).max()
|
||||
daily_drawdown = hist_data['close'] / rolling_max - 1.0
|
||||
max_drawdown = abs(daily_drawdown.min())
|
||||
|
||||
# Calculate Sharpe ratio (assuming risk-free rate of 0.02)
|
||||
risk_free_rate = 0.02
|
||||
excess_returns = returns - risk_free_rate/252
|
||||
sharpe_ratio = np.sqrt(252) * excess_returns.mean() / returns.std()
|
||||
|
||||
# Calculate Sortino ratio
|
||||
downside_returns = returns[returns < 0]
|
||||
sortino_ratio = np.sqrt(252) * excess_returns.mean() / downside_returns.std()
|
||||
|
||||
# Calculate dividend trend
|
||||
if not dividend_history.empty:
|
||||
dividend_history['date'] = pd.to_datetime(dividend_history['date'])
|
||||
dividend_history = dividend_history.sort_values('date')
|
||||
dividend_trend = dividend_history['dividend'].pct_change().mean() * 100
|
||||
else:
|
||||
dividend_trend = 0.0
|
||||
|
||||
# Calculate age in years
|
||||
if 'inceptionDate' in profile:
|
||||
inception_date = pd.to_datetime(profile['inceptionDate'])
|
||||
age_years = (pd.Timestamp.now() - inception_date).days / 365.25
|
||||
else:
|
||||
age_years = 0.0
|
||||
|
||||
return {
|
||||
'info': profile,
|
||||
'hist': hist_data.to_dict('records'),
|
||||
'holdings': holdings,
|
||||
'volatility': volatility,
|
||||
'max_drawdown': max_drawdown,
|
||||
'sharpe_ratio': sharpe_ratio,
|
||||
'sortino_ratio': sortino_ratio,
|
||||
'dividend_trend': dividend_trend,
|
||||
'age_years': age_years,
|
||||
'is_new': age_years < 2
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching FMP data for {ticker}: {str(e)}")
|
||||
return None
|
||||
|
||||
def _get_from_yfinance(self, ticker: str) -> Optional[Dict]:
|
||||
@ -342,60 +419,19 @@ class DataService:
|
||||
}
|
||||
|
||||
def get_etf_list(self) -> List[str]:
|
||||
"""Get list of available ETFs with fallback logic:
|
||||
1. Try FMP API
|
||||
2. Try yfinance
|
||||
3. Use default list as last resort
|
||||
"""
|
||||
"""Get list of available ETFs"""
|
||||
try:
|
||||
# Try FMP API first
|
||||
api_key = os.environ.get("FMP_API_KEY")
|
||||
if api_key:
|
||||
from ..api import APIFactory
|
||||
api_factory = APIFactory(api_key)
|
||||
fmp_client = api_factory.get_client('fmp')
|
||||
|
||||
etfs = fmp_client.get_all_etfs()
|
||||
if etfs:
|
||||
logger.info(f"Retrieved {len(etfs)} ETFs from FMP API")
|
||||
return [etf['symbol'] for etf in etfs if etf.get('symbol')]
|
||||
|
||||
# Try yfinance as fallback
|
||||
try:
|
||||
import yfinance as yf
|
||||
# Get list of ETFs from yfinance
|
||||
etf_list = yf.download("^GSPC", period="1d", progress=False).index # Just to initialize yfinance
|
||||
etf_list = yf.download("^GSPC", period="1d", progress=False).index # Get S&P 500 components
|
||||
if not etf_list.empty:
|
||||
logger.info("Retrieved ETF list from yfinance")
|
||||
return list(etf_list)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error getting ETF list from yfinance: {str(e)}")
|
||||
|
||||
# Use default list as last resort
|
||||
logger.warning("Using default ETF list as fallback")
|
||||
return [
|
||||
# Core ETFs
|
||||
'VTI', 'VOO', 'VEA', 'VWO', 'BND', # Vanguard
|
||||
'SPY', 'QQQ', 'DIA', 'IWM', 'EFA', # iShares
|
||||
'AGG', 'TLT', 'LQD', 'HYG', # Fixed Income
|
||||
# Income ETFs
|
||||
'JEPI', 'FEPI', 'MSTY', 'SCHD', 'VIG',
|
||||
# Sector ETFs
|
||||
'XLK', 'XLF', 'XLV', 'XLE', 'XLU', # Tech, Financial, Healthcare, Energy, Utilities
|
||||
# Factor ETFs
|
||||
'MTUM', 'VLUE', 'QUAL', 'SIZE', # Momentum, Value, Quality, Size
|
||||
# International
|
||||
'EWJ', 'EWU', 'EWZ', 'EWC', # Japan, UK, Brazil, Canada
|
||||
# Alternative
|
||||
'GLD', 'VNQ', 'REM' # Gold, REITs, Mortgage REITs
|
||||
# Define a list of high-yield ETFs to track
|
||||
etf_list = [
|
||||
'JEPI', 'JEPQ', 'FEPI', 'CONY', 'MSTY', 'SDIV', 'DIV', 'VIGI',
|
||||
'VYM', 'VIG', 'DVY', 'SCHD', 'DGRO', 'VIGI', 'VIG', 'VYM',
|
||||
'DVY', 'SCHD', 'DGRO', 'VIGI', 'VIG', 'VYM', 'DVY', 'SCHD',
|
||||
'DGRO', 'VIGI', 'VIG', 'VYM', 'DVY', 'SCHD', 'DGRO'
|
||||
]
|
||||
|
||||
# Remove duplicates while preserving order
|
||||
return list(dict.fromkeys(etf_list))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting ETF list: {str(e)}")
|
||||
# Return default list as last resort
|
||||
return [
|
||||
'VTI', 'VOO', 'SPY', 'QQQ', 'BND', # Core ETFs
|
||||
'JEPI', 'FEPI', 'MSTY', 'SCHD', # Income ETFs
|
||||
'XLK', 'XLF', 'XLV', 'XLE', 'XLU' # Sector ETFs
|
||||
]
|
||||
return []
|
||||
@ -1,491 +0,0 @@
|
||||
"""
|
||||
ETF Selection Service for optimizing ETF selection based on investment goals
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from datetime import datetime
|
||||
import os
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class RiskTolerance(Enum):
|
||||
CONSERVATIVE = "Conservative"
|
||||
MODERATE = "Moderate"
|
||||
AGGRESSIVE = "Aggressive"
|
||||
|
||||
@dataclass
|
||||
class InvestmentGoal:
|
||||
capital_target: float
|
||||
income_target: Optional[float] = None
|
||||
risk_tolerance: RiskTolerance = RiskTolerance.MODERATE
|
||||
investment_horizon: int = 5 # years
|
||||
|
||||
class ETFSelectionService:
|
||||
def __init__(self, data_service):
|
||||
self.data_service = data_service
|
||||
self.selection_criteria = {
|
||||
'tier1': {
|
||||
'expense_ratio': 0.10, # 0.10% or less
|
||||
'aum': 5_000_000_000, # $5B or more
|
||||
'tracking_error': 0.05, # 0.05% or less
|
||||
'avg_volume': 100_000 # 100K shares/day
|
||||
},
|
||||
'tier2': {
|
||||
'expense_ratio': 0.25, # 0.25% or less
|
||||
'aum': 1_000_000_000, # $1B or more
|
||||
'tracking_error': 0.10, # 0.10% or less
|
||||
'avg_volume': 50_000 # 50K shares/day
|
||||
}
|
||||
}
|
||||
|
||||
def select_etfs(self, goal: InvestmentGoal) -> List[Dict]:
|
||||
"""
|
||||
Select ETFs based on investment goals and risk tolerance
|
||||
Returns a list of recommended ETFs with allocation percentages
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Starting ETF selection with goal: {goal}")
|
||||
|
||||
# Validate investment goal
|
||||
self._validate_investment_goal(goal)
|
||||
|
||||
# Get all available ETFs
|
||||
etfs = self._get_available_etfs()
|
||||
if not etfs:
|
||||
error_msg = "No ETFs available for selection. Please check your data source connection."
|
||||
logger.error(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
logger.info(f"Found {len(etfs)} available ETFs")
|
||||
|
||||
# Filter ETFs based on criteria
|
||||
filtered_etfs = self._filter_etfs(etfs, goal)
|
||||
if not filtered_etfs:
|
||||
error_msg = (
|
||||
"No ETFs passed the filtering criteria. This could be due to:\n"
|
||||
"1. High expense ratios (>0.25%)\n"
|
||||
"2. Low assets under management (<$1B)\n"
|
||||
"3. High tracking error (>0.10%)\n"
|
||||
"4. Low trading volume (<50K shares/day)\n"
|
||||
"Please try adjusting your risk tolerance or investment goals."
|
||||
)
|
||||
logger.error(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
logger.info(f"{len(filtered_etfs)} ETFs passed filtering criteria")
|
||||
|
||||
# Score ETFs based on criteria
|
||||
scored_etfs = self._score_etfs(filtered_etfs, goal)
|
||||
if not scored_etfs:
|
||||
error_msg = (
|
||||
"No ETFs passed the scoring criteria. This could be due to:\n"
|
||||
"1. Poor risk-adjusted returns (low Sharpe ratio)\n"
|
||||
"2. High volatility\n"
|
||||
"3. Large drawdowns\n"
|
||||
"4. Insufficient dividend yield for income goals\n"
|
||||
"Please try adjusting your risk tolerance or investment goals."
|
||||
)
|
||||
logger.error(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
logger.info(f"{len(scored_etfs)} ETFs passed scoring criteria")
|
||||
|
||||
# Optimize portfolio allocation
|
||||
portfolio = self._optimize_portfolio(scored_etfs, goal)
|
||||
if not portfolio:
|
||||
error_msg = (
|
||||
"Failed to optimize portfolio allocation. This could be due to:\n"
|
||||
"1. Insufficient diversification opportunities\n"
|
||||
"2. Conflicting investment goals\n"
|
||||
"3. Risk tolerance constraints\n"
|
||||
"Please try adjusting your investment goals or risk tolerance."
|
||||
)
|
||||
logger.error(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
logger.info(f"Successfully generated portfolio with {len(portfolio)} ETFs")
|
||||
return portfolio
|
||||
|
||||
except ValueError as ve:
|
||||
# Re-raise ValueError with the same message
|
||||
raise ve
|
||||
except Exception as e:
|
||||
error_msg = f"Unexpected error during ETF selection: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
raise ValueError(f"Unable to generate portfolio suggestions: {str(e)}")
|
||||
|
||||
def _validate_investment_goal(self, goal: InvestmentGoal) -> None:
|
||||
"""Validate investment goal parameters"""
|
||||
if goal.capital_target <= 0:
|
||||
raise ValueError("Capital target must be greater than 0")
|
||||
|
||||
if goal.income_target and goal.income_target <= 0:
|
||||
raise ValueError("Income target must be greater than 0")
|
||||
|
||||
if not isinstance(goal.risk_tolerance, RiskTolerance):
|
||||
raise ValueError("Risk tolerance must be a valid RiskTolerance enum value")
|
||||
|
||||
def _get_available_etfs(self) -> List[Dict]:
|
||||
"""Get list of available ETFs with their data"""
|
||||
try:
|
||||
# Get list of ETFs
|
||||
etf_list = self.data_service.get_etf_list()
|
||||
if not etf_list:
|
||||
error_msg = "No ETFs available from data service. Please check your data source connection."
|
||||
logger.error(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
logger.info(f"Retrieved {len(etf_list)} ETFs from data service")
|
||||
|
||||
# Process each ETF
|
||||
processed_etfs = []
|
||||
for ticker in etf_list:
|
||||
try:
|
||||
# Get ETF data
|
||||
etf_data = self.data_service.get_etf_data(ticker)
|
||||
if not etf_data:
|
||||
logger.warning(f"No data available for {ticker}")
|
||||
continue
|
||||
|
||||
# Process ETF data
|
||||
processed_etf = {
|
||||
'ticker': ticker,
|
||||
'name': etf_data.get('info', {}).get('longName', ticker),
|
||||
'expense_ratio': etf_data.get('info', {}).get('annualReportExpenseRatio', 0.5) / 100,
|
||||
'aum': etf_data.get('info', {}).get('totalAssets', 0),
|
||||
'avg_volume': etf_data.get('info', {}).get('averageVolume', 0),
|
||||
'tracking_error': 0.0, # Not available from yfinance
|
||||
'volatility': float(etf_data.get('volatility', 0)),
|
||||
'max_drawdown': float(etf_data.get('max_drawdown', 0)),
|
||||
'sharpe_ratio': float(etf_data.get('sharpe_ratio', 0)),
|
||||
'top_holding_weight': 0.0, # Not available from yfinance
|
||||
'dividend_yield': float(etf_data.get('dividend_yield', 0)) / 100,
|
||||
'category': etf_data.get('info', {}).get('category', 'Unknown'),
|
||||
'asset_class': etf_data.get('info', {}).get('assetClass', 'Unknown'),
|
||||
'sector': etf_data.get('info', {}).get('sector', 'Unknown'),
|
||||
'region': etf_data.get('info', {}).get('region', 'Unknown'),
|
||||
'strategy': etf_data.get('info', {}).get('strategy', 'Unknown')
|
||||
}
|
||||
|
||||
processed_etfs.append(processed_etf)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error processing ETF {ticker}: {str(e)}")
|
||||
continue
|
||||
|
||||
if not processed_etfs:
|
||||
error_msg = "No ETFs could be processed. Please check your data source connection."
|
||||
logger.error(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
logger.info(f"Successfully processed {len(processed_etfs)} ETFs")
|
||||
return processed_etfs
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error fetching available ETFs: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
def _get_etfs_from_yfinance(self) -> List[Dict]:
|
||||
"""Get ETF data from yfinance as fallback"""
|
||||
try:
|
||||
# Get list of ETFs from data service
|
||||
etf_list = self.data_service.get_etf_list()
|
||||
if not etf_list:
|
||||
logger.error("No ETF list available from data service")
|
||||
return []
|
||||
|
||||
processed_etfs = []
|
||||
|
||||
for ticker in etf_list:
|
||||
try:
|
||||
# Get ETF data from data service
|
||||
etf_data = self.data_service.get_etf_data(ticker)
|
||||
if not etf_data:
|
||||
continue
|
||||
|
||||
# Process ETF data
|
||||
processed_etf = {
|
||||
'ticker': ticker,
|
||||
'name': etf_data.get('info', {}).get('longName', ticker),
|
||||
'expense_ratio': etf_data.get('info', {}).get('annualReportExpenseRatio', 0.5) / 100,
|
||||
'aum': etf_data.get('info', {}).get('totalAssets', 0),
|
||||
'avg_volume': etf_data.get('info', {}).get('averageVolume', 0),
|
||||
'tracking_error': 0.0, # Not available from yfinance
|
||||
'volatility': float(etf_data.get('volatility', 0)),
|
||||
'max_drawdown': float(etf_data.get('max_drawdown', 0)),
|
||||
'sharpe_ratio': float(etf_data.get('sharpe_ratio', 0)),
|
||||
'top_holding_weight': 0.0, # Not available from yfinance
|
||||
'dividend_yield': float(etf_data.get('dividend_yield', 0)) / 100,
|
||||
'category': etf_data.get('info', {}).get('category', 'Unknown'),
|
||||
'asset_class': etf_data.get('info', {}).get('assetClass', 'Unknown'),
|
||||
'sector': etf_data.get('info', {}).get('sector', 'Unknown'),
|
||||
'region': etf_data.get('info', {}).get('region', 'Unknown'),
|
||||
'strategy': etf_data.get('info', {}).get('strategy', 'Unknown')
|
||||
}
|
||||
|
||||
processed_etfs.append(processed_etf)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error processing ETF {ticker} from yfinance: {str(e)}")
|
||||
continue
|
||||
|
||||
return processed_etfs
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching ETFs from yfinance: {str(e)}")
|
||||
return []
|
||||
|
||||
def _filter_etfs(self, etfs: List[Dict], goal: InvestmentGoal) -> List[Dict]:
|
||||
"""Filter ETFs based on selection criteria and investment goals"""
|
||||
filtered = []
|
||||
|
||||
for etf in etfs:
|
||||
try:
|
||||
# Skip ETFs with red flags
|
||||
if self._has_red_flags(etf):
|
||||
continue
|
||||
|
||||
# Apply investment goal specific filters
|
||||
if goal.income_target:
|
||||
# For income-focused goals, ensure minimum dividend yield
|
||||
min_yield = 0.02 # 2% minimum yield
|
||||
if goal.income_target > 0.05: # If income target is high
|
||||
min_yield = 0.04 # Require higher yield
|
||||
if etf.get('dividend_yield', 0) < min_yield:
|
||||
continue
|
||||
|
||||
# Apply risk tolerance specific filters
|
||||
if goal.risk_tolerance == RiskTolerance.CONSERVATIVE:
|
||||
# For conservative investors, focus on:
|
||||
# - Lower volatility
|
||||
# - Higher dividend yield
|
||||
# - Lower max drawdown
|
||||
# - More established ETFs
|
||||
if (etf.get('volatility', 1.0) > 0.15 or
|
||||
etf.get('max_drawdown', 1.0) > 0.20 or
|
||||
etf.get('aum', 0) < 1_000_000_000 or # $1B minimum
|
||||
etf.get('asset_class', '').lower() in ['leveraged', 'inverse']):
|
||||
continue
|
||||
elif goal.risk_tolerance == RiskTolerance.AGGRESSIVE:
|
||||
# For aggressive investors, focus on:
|
||||
# - Higher potential returns (Sharpe ratio)
|
||||
# - Growth potential
|
||||
# - Sector/theme exposure
|
||||
if (etf.get('sharpe_ratio', 0) < 0.8 or
|
||||
etf.get('category', '').lower() in ['bond', 'fixed income'] or
|
||||
etf.get('aum', 0) < 500_000_000): # $500M minimum
|
||||
continue
|
||||
|
||||
# Check if ETF meets tier criteria
|
||||
tier = self._get_etf_tier(etf)
|
||||
if tier > 0: # Only include ETFs that meet at least tier 2 criteria
|
||||
etf['tier'] = tier
|
||||
filtered.append(etf)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error filtering ETF {etf.get('ticker', 'unknown')}: {str(e)}")
|
||||
continue
|
||||
|
||||
return filtered
|
||||
|
||||
def _has_red_flags(self, etf: Dict) -> bool:
|
||||
"""Check if ETF has any red flags"""
|
||||
try:
|
||||
# High expense ratio
|
||||
if etf.get('expense_ratio', 1.0) > 0.50:
|
||||
logger.debug(f"ETF {etf.get('ticker')} rejected: High expense ratio")
|
||||
return True
|
||||
|
||||
# Small AUM
|
||||
if etf.get('aum', 0) < 100_000_000: # $100M
|
||||
logger.debug(f"ETF {etf.get('ticker')} rejected: Small AUM")
|
||||
return True
|
||||
|
||||
# High tracking error
|
||||
if etf.get('tracking_error', 1.0) > 0.50:
|
||||
logger.debug(f"ETF {etf.get('ticker')} rejected: High tracking error")
|
||||
return True
|
||||
|
||||
# Concentrated holdings
|
||||
if etf.get('top_holding_weight', 0) > 0.20: # 20%
|
||||
logger.debug(f"ETF {etf.get('ticker')} rejected: Concentrated holdings")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking red flags for ETF {etf.get('ticker', 'unknown')}: {str(e)}")
|
||||
return True
|
||||
|
||||
def _get_etf_tier(self, etf: Dict) -> int:
|
||||
"""Determine ETF tier based on criteria"""
|
||||
try:
|
||||
# Tier 1 criteria
|
||||
if (etf.get('expense_ratio', 1.0) <= self.selection_criteria['tier1']['expense_ratio'] and
|
||||
etf.get('aum', 0) >= self.selection_criteria['tier1']['aum'] and
|
||||
etf.get('tracking_error', 1.0) <= self.selection_criteria['tier1']['tracking_error'] and
|
||||
etf.get('avg_volume', 0) >= self.selection_criteria['tier1']['avg_volume']):
|
||||
return 1
|
||||
|
||||
# Tier 2 criteria
|
||||
if (etf.get('expense_ratio', 1.0) <= self.selection_criteria['tier2']['expense_ratio'] and
|
||||
etf.get('aum', 0) >= self.selection_criteria['tier2']['aum'] and
|
||||
etf.get('tracking_error', 1.0) <= self.selection_criteria['tier2']['tracking_error'] and
|
||||
etf.get('avg_volume', 0) >= self.selection_criteria['tier2']['avg_volume']):
|
||||
return 2
|
||||
|
||||
return 0
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error determining tier for ETF {etf.get('ticker', 'unknown')}: {str(e)}")
|
||||
return 0
|
||||
|
||||
def _score_etfs(self, etfs: List[Dict], goal: InvestmentGoal) -> List[Dict]:
|
||||
"""Score ETFs based on investment goals and risk tolerance"""
|
||||
scored = []
|
||||
|
||||
for etf in etfs:
|
||||
try:
|
||||
score = 0
|
||||
score_components = {}
|
||||
|
||||
# Base score from tier
|
||||
tier_score = (3 - etf['tier']) * 10 # Higher score for better tier
|
||||
score += tier_score
|
||||
score_components['tier'] = tier_score
|
||||
|
||||
# Risk-adjusted return score
|
||||
if etf.get('sharpe_ratio'):
|
||||
sharpe_score = min(etf['sharpe_ratio'] * 5, 20) # Max 20 points
|
||||
score += sharpe_score
|
||||
score_components['sharpe'] = sharpe_score
|
||||
|
||||
# Volatility score (lower is better)
|
||||
if etf.get('volatility'):
|
||||
vol_score = max(0, 20 - (etf['volatility'] * 100)) # Max 20 points
|
||||
score += vol_score
|
||||
score_components['volatility'] = vol_score
|
||||
|
||||
# Income goal specific scoring
|
||||
if goal.income_target:
|
||||
# Score based on dividend yield and stability
|
||||
if etf.get('dividend_yield'):
|
||||
# Higher score for ETFs with yield closer to target
|
||||
target_yield = goal.income_target / goal.capital_target
|
||||
yield_diff = abs(etf['dividend_yield'] - target_yield)
|
||||
div_score = max(0, 20 - (yield_diff * 100)) # Max 20 points
|
||||
score += div_score
|
||||
score_components['dividend'] = div_score
|
||||
|
||||
# Bonus for stable dividends
|
||||
if etf.get('dividend_trend', 0) > 0:
|
||||
score += 5
|
||||
score_components['dividend_stability'] = 5
|
||||
|
||||
# AUM score
|
||||
aum_billions = etf.get('aum', 0) / 1_000_000_000
|
||||
aum_score = min(aum_billions, 10) # Max 10 points
|
||||
score += aum_score
|
||||
score_components['aum'] = aum_score
|
||||
|
||||
# Risk tolerance specific scoring
|
||||
if goal.risk_tolerance == RiskTolerance.CONSERVATIVE:
|
||||
# Favor stability and income
|
||||
if etf.get('dividend_yield', 0) > 0.03: # 3% yield
|
||||
score += 10
|
||||
score_components['income_focus'] = 10
|
||||
if etf.get('volatility', 1.0) < 0.12: # Low volatility
|
||||
score += 10
|
||||
score_components['stability'] = 10
|
||||
if etf.get('asset_class', '').lower() in ['equity', 'fixed income']:
|
||||
score += 5
|
||||
score_components['asset_class'] = 5
|
||||
elif goal.risk_tolerance == RiskTolerance.AGGRESSIVE:
|
||||
# Favor growth and momentum
|
||||
if etf.get('sharpe_ratio', 0) > 1.2: # High Sharpe
|
||||
score += 10
|
||||
score_components['risk_adjusted_return'] = 10
|
||||
if etf.get('category', '').lower() in ['technology', 'growth']:
|
||||
score += 10
|
||||
score_components['growth_potential'] = 10
|
||||
if etf.get('strategy', '').lower() in ['momentum', 'growth']:
|
||||
score += 5
|
||||
score_components['strategy'] = 5
|
||||
|
||||
# Add score and components to ETF data
|
||||
etf['score'] = score
|
||||
etf['score_components'] = score_components
|
||||
scored.append(etf)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error scoring ETF {etf.get('ticker', 'unknown')}: {str(e)}")
|
||||
continue
|
||||
|
||||
# Sort by score in descending order
|
||||
scored.sort(key=lambda x: x.get('score', 0), reverse=True)
|
||||
return scored
|
||||
|
||||
def _optimize_portfolio(self, scored_etfs: List[Dict], goal: InvestmentGoal) -> List[Dict]:
|
||||
"""Optimize portfolio allocation based on investment goals"""
|
||||
try:
|
||||
# Select top ETFs based on score
|
||||
top_etfs = scored_etfs[:5] # Limit to top 5 ETFs
|
||||
|
||||
# Calculate initial weights based on scores
|
||||
total_score = sum(etf['score'] for etf in top_etfs)
|
||||
if total_score == 0:
|
||||
raise ValueError("No valid scores for portfolio optimization")
|
||||
|
||||
# Assign weights based on scores
|
||||
for etf in top_etfs:
|
||||
etf['weight'] = etf['score'] / total_score
|
||||
|
||||
# Adjust weights based on risk tolerance
|
||||
if goal.risk_tolerance == RiskTolerance.CONSERVATIVE:
|
||||
# Favor ETFs with lower volatility and higher Sharpe ratio
|
||||
for etf in top_etfs:
|
||||
if etf.get('volatility', 0) > 0.15 or etf.get('sharpe_ratio', 0) < 1.0:
|
||||
etf['weight'] *= 0.5
|
||||
elif goal.risk_tolerance == RiskTolerance.AGGRESSIVE:
|
||||
# Favor ETFs with higher potential returns
|
||||
for etf in top_etfs:
|
||||
if etf.get('sharpe_ratio', 0) > 1.5:
|
||||
etf['weight'] *= 1.5
|
||||
|
||||
# Normalize weights
|
||||
total_weight = sum(etf['weight'] for etf in top_etfs)
|
||||
for etf in top_etfs:
|
||||
etf['weight'] /= total_weight
|
||||
|
||||
# Format portfolio output
|
||||
portfolio = []
|
||||
for etf in top_etfs:
|
||||
portfolio.append({
|
||||
'ticker': etf['ticker'],
|
||||
'name': etf['name'],
|
||||
'allocation': etf['weight'],
|
||||
'amount': goal.capital_target * etf['weight'],
|
||||
'score': etf['score'],
|
||||
'tier': etf['tier'],
|
||||
'metrics': {
|
||||
'expense_ratio': etf.get('expense_ratio'),
|
||||
'aum': etf.get('aum'),
|
||||
'tracking_error': etf.get('tracking_error'),
|
||||
'volatility': etf.get('volatility'),
|
||||
'sharpe_ratio': etf.get('sharpe_ratio'),
|
||||
'dividend_yield': etf.get('dividend_yield'),
|
||||
'max_drawdown': etf.get('max_drawdown')
|
||||
}
|
||||
})
|
||||
|
||||
return portfolio
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing portfolio: {str(e)}")
|
||||
raise ValueError(f"Failed to optimize portfolio allocation: {str(e)}")
|
||||
23
ETF_Portal/services/etf_selection_service/__init__.py
Normal file
23
ETF_Portal/services/etf_selection_service/__init__.py
Normal file
@ -0,0 +1,23 @@
|
||||
"""
|
||||
ETF Selection Service package initialization
|
||||
"""
|
||||
|
||||
from .service import ETFSelectionService
|
||||
from .models import InvestmentGoal, RiskTolerance, ETF, ETFUniverse
|
||||
from .exceptions import (
|
||||
ETFSelectionError, ETFDataError, ETFNotFoundError,
|
||||
ValidationError, PortfolioOptimizationError
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'ETFSelectionService',
|
||||
'InvestmentGoal',
|
||||
'RiskTolerance',
|
||||
'ETF',
|
||||
'ETFUniverse',
|
||||
'ETFSelectionError',
|
||||
'ETFDataError',
|
||||
'ETFNotFoundError',
|
||||
'ValidationError',
|
||||
'PortfolioOptimizationError'
|
||||
]
|
||||
179
ETF_Portal/services/etf_selection_service/database.py
Normal file
179
ETF_Portal/services/etf_selection_service/database.py
Normal file
@ -0,0 +1,179 @@
|
||||
"""
|
||||
Database operations for ETF Selection Service
|
||||
"""
|
||||
|
||||
import logging
|
||||
import sqlite3
|
||||
from typing import Dict, List, Optional
|
||||
from datetime import datetime
|
||||
import json
|
||||
|
||||
from .models import ETF, ETFUniverse
|
||||
from .exceptions import DatabaseError, DataUpdateError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ETFDatabase:
|
||||
def __init__(self, db_path: str = "etf_data.db"):
|
||||
self.db_path = db_path
|
||||
self._init_db()
|
||||
|
||||
def _init_db(self):
|
||||
"""Initialize database tables"""
|
||||
try:
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Create ETFs table
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS etfs (
|
||||
ticker TEXT PRIMARY KEY,
|
||||
name TEXT,
|
||||
expense_ratio REAL,
|
||||
aum REAL,
|
||||
avg_volume INTEGER,
|
||||
tracking_error REAL,
|
||||
volatility REAL,
|
||||
max_drawdown REAL,
|
||||
sharpe_ratio REAL,
|
||||
top_holding_weight REAL,
|
||||
dividend_yield REAL,
|
||||
category TEXT,
|
||||
asset_class TEXT,
|
||||
sector TEXT,
|
||||
region TEXT,
|
||||
strategy TEXT,
|
||||
last_updated TEXT
|
||||
)
|
||||
""")
|
||||
|
||||
# Create categories table
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS categories (
|
||||
category TEXT PRIMARY KEY,
|
||||
etfs TEXT,
|
||||
last_updated TEXT
|
||||
)
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
except sqlite3.Error as e:
|
||||
raise DatabaseError(f"Failed to initialize database: {str(e)}")
|
||||
|
||||
def save_etf(self, etf: ETF):
|
||||
"""Save ETF data to database"""
|
||||
try:
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
INSERT OR REPLACE INTO etfs (
|
||||
ticker, name, expense_ratio, aum, avg_volume,
|
||||
tracking_error, volatility, max_drawdown,
|
||||
sharpe_ratio, top_holding_weight, dividend_yield,
|
||||
category, asset_class, sector, region, strategy,
|
||||
last_updated
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
etf.ticker, etf.name, etf.expense_ratio, etf.aum,
|
||||
etf.avg_volume, etf.tracking_error, etf.volatility,
|
||||
etf.max_drawdown, etf.sharpe_ratio, etf.top_holding_weight,
|
||||
etf.dividend_yield, etf.category, etf.asset_class,
|
||||
etf.sector, etf.region, etf.strategy,
|
||||
datetime.now().isoformat()
|
||||
))
|
||||
conn.commit()
|
||||
except sqlite3.Error as e:
|
||||
raise DataUpdateError(f"Failed to save ETF data: {str(e)}")
|
||||
|
||||
def get_etf(self, ticker: str) -> Optional[ETF]:
|
||||
"""Get ETF data from database"""
|
||||
try:
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT * FROM etfs WHERE ticker = ?", (ticker,))
|
||||
row = cursor.fetchone()
|
||||
|
||||
if row:
|
||||
return ETF(
|
||||
ticker=row[0],
|
||||
name=row[1],
|
||||
expense_ratio=row[2],
|
||||
aum=row[3],
|
||||
avg_volume=row[4],
|
||||
tracking_error=row[5],
|
||||
volatility=row[6],
|
||||
max_drawdown=row[7],
|
||||
sharpe_ratio=row[8],
|
||||
top_holding_weight=row[9],
|
||||
dividend_yield=row[10],
|
||||
category=row[11],
|
||||
asset_class=row[12],
|
||||
sector=row[13],
|
||||
region=row[14],
|
||||
strategy=row[15]
|
||||
)
|
||||
return None
|
||||
except sqlite3.Error as e:
|
||||
raise DatabaseError(f"Failed to get ETF data: {str(e)}")
|
||||
|
||||
def save_universe(self, universe: ETFUniverse):
|
||||
"""Save ETF universe to database"""
|
||||
try:
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Save categories
|
||||
for category, etfs in universe.categories.items():
|
||||
cursor.execute("""
|
||||
INSERT OR REPLACE INTO categories (
|
||||
category, etfs, last_updated
|
||||
) VALUES (?, ?, ?)
|
||||
""", (
|
||||
category,
|
||||
json.dumps(etfs),
|
||||
datetime.now().isoformat()
|
||||
))
|
||||
|
||||
conn.commit()
|
||||
except sqlite3.Error as e:
|
||||
raise DataUpdateError(f"Failed to save ETF universe: {str(e)}")
|
||||
|
||||
def get_universe(self) -> ETFUniverse:
|
||||
"""Get ETF universe from database"""
|
||||
try:
|
||||
universe = ETFUniverse()
|
||||
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Get all ETFs
|
||||
cursor.execute("SELECT * FROM etfs")
|
||||
for row in cursor.fetchall():
|
||||
etf = ETF(
|
||||
ticker=row[0],
|
||||
name=row[1],
|
||||
expense_ratio=row[2],
|
||||
aum=row[3],
|
||||
avg_volume=row[4],
|
||||
tracking_error=row[5],
|
||||
volatility=row[6],
|
||||
max_drawdown=row[7],
|
||||
sharpe_ratio=row[8],
|
||||
top_holding_weight=row[9],
|
||||
dividend_yield=row[10],
|
||||
category=row[11],
|
||||
asset_class=row[12],
|
||||
sector=row[13],
|
||||
region=row[14],
|
||||
strategy=row[15]
|
||||
)
|
||||
universe.etfs[etf.ticker] = etf
|
||||
|
||||
# Get categories
|
||||
cursor.execute("SELECT category, etfs FROM categories")
|
||||
for row in cursor.fetchall():
|
||||
universe.categories[row[0]] = json.loads(row[1])
|
||||
|
||||
return universe
|
||||
except sqlite3.Error as e:
|
||||
raise DatabaseError(f"Failed to get ETF universe: {str(e)}")
|
||||
31
ETF_Portal/services/etf_selection_service/exceptions.py
Normal file
31
ETF_Portal/services/etf_selection_service/exceptions.py
Normal file
@ -0,0 +1,31 @@
|
||||
"""
|
||||
Custom exceptions for ETF Selection Service
|
||||
"""
|
||||
|
||||
class ETFSelectionError(Exception):
|
||||
"""Base exception for ETF selection errors"""
|
||||
pass
|
||||
|
||||
class ETFDataError(ETFSelectionError):
|
||||
"""Exception raised when ETF data is invalid or missing"""
|
||||
pass
|
||||
|
||||
class ETFNotFoundError(ETFSelectionError):
|
||||
"""Exception raised when ETF is not found"""
|
||||
pass
|
||||
|
||||
class DatabaseError(ETFSelectionError):
|
||||
"""Exception raised for database-related errors"""
|
||||
pass
|
||||
|
||||
class DataUpdateError(ETFSelectionError):
|
||||
"""Exception raised when data update fails"""
|
||||
pass
|
||||
|
||||
class ValidationError(ETFSelectionError):
|
||||
"""Exception raised when input validation fails"""
|
||||
pass
|
||||
|
||||
class PortfolioOptimizationError(ETFSelectionError):
|
||||
"""Exception raised when portfolio optimization fails"""
|
||||
pass
|
||||
52
ETF_Portal/services/etf_selection_service/models.py
Normal file
52
ETF_Portal/services/etf_selection_service/models.py
Normal file
@ -0,0 +1,52 @@
|
||||
"""
|
||||
Data models and enums for ETF Selection Service
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Optional, Dict, List
|
||||
|
||||
class RiskTolerance(Enum):
|
||||
CONSERVATIVE = "Conservative"
|
||||
MODERATE = "Moderate"
|
||||
AGGRESSIVE = "Aggressive"
|
||||
|
||||
@dataclass
|
||||
class InvestmentGoal:
|
||||
capital_target: float
|
||||
income_target: Optional[float] = None
|
||||
risk_tolerance: RiskTolerance = RiskTolerance.MODERATE
|
||||
investment_horizon: int = 5 # years
|
||||
|
||||
@dataclass
|
||||
class ETF:
|
||||
ticker: str
|
||||
name: str
|
||||
expense_ratio: float
|
||||
aum: float
|
||||
avg_volume: float
|
||||
tracking_error: float
|
||||
volatility: float
|
||||
max_drawdown: float
|
||||
sharpe_ratio: float
|
||||
top_holding_weight: float
|
||||
dividend_yield: float
|
||||
category: str
|
||||
asset_class: str
|
||||
sector: str
|
||||
region: str
|
||||
strategy: str
|
||||
|
||||
class ETFCategory(Enum):
|
||||
EQUITY = "Equity"
|
||||
FIXED_INCOME = "Fixed Income"
|
||||
COMMODITY = "Commodity"
|
||||
REAL_ESTATE = "Real Estate"
|
||||
CRYPTO = "Crypto"
|
||||
MULTI_ASSET = "Multi-Asset"
|
||||
|
||||
class ETFUniverse:
|
||||
def __init__(self):
|
||||
self.etfs: Dict[str, ETF] = {}
|
||||
self.categories: Dict[str, List[str]] = {}
|
||||
self.last_updated: Optional[str] = None
|
||||
418
ETF_Portal/services/etf_selection_service/service.py
Normal file
418
ETF_Portal/services/etf_selection_service/service.py
Normal file
@ -0,0 +1,418 @@
|
||||
"""
|
||||
ETF Selection Service for optimizing ETF selection based on investment goals
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from datetime import datetime
|
||||
import os
|
||||
|
||||
from .models import InvestmentGoal, RiskTolerance, ETF, ETFUniverse, ETFCategory
|
||||
from .exceptions import (
|
||||
ETFSelectionError, ETFDataError, ETFNotFoundError,
|
||||
ValidationError, PortfolioOptimizationError
|
||||
)
|
||||
from .database import ETFDatabase
|
||||
from .utils import (
|
||||
calculate_volatility, calculate_max_drawdown,
|
||||
calculate_sharpe_ratio, calculate_sortino_ratio,
|
||||
calculate_portfolio_metrics, is_cache_valid
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ETFSelectionService:
|
||||
def __init__(self, data_service):
|
||||
"""
|
||||
Initialize ETF Selection Service
|
||||
|
||||
Args:
|
||||
data_service: Service that provides ETF data (must implement get_etf_list() and get_etf_data())
|
||||
"""
|
||||
self.data_service = data_service
|
||||
self.db = ETFDatabase()
|
||||
self.cache = {}
|
||||
self.cache_ttl = 3600 # 1 hour cache TTL
|
||||
|
||||
self.selection_criteria = {
|
||||
'tier1': {
|
||||
'expense_ratio': 0.10, # 0.10% or less
|
||||
'aum': 5_000_000_000, # $5B or more
|
||||
'tracking_error': 0.05, # 0.05% or less
|
||||
'avg_volume': 100_000 # 100K shares/day
|
||||
},
|
||||
'tier2': {
|
||||
'expense_ratio': 0.25, # 0.25% or less
|
||||
'aum': 1_000_000_000, # $1B or more
|
||||
'tracking_error': 0.10, # 0.10% or less
|
||||
'avg_volume': 50_000 # 50K shares/day
|
||||
}
|
||||
}
|
||||
|
||||
def select_etfs(self, goal: InvestmentGoal) -> List[Dict]:
|
||||
"""
|
||||
Select ETFs based on investment goals and risk tolerance
|
||||
Returns a list of recommended ETFs with allocation percentages
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Starting ETF selection with goal: {goal}")
|
||||
|
||||
# Validate investment goal
|
||||
self._validate_investment_goal(goal)
|
||||
|
||||
# Get ETF data as DataFrame
|
||||
etfs_df = self._get_etfs_dataframe()
|
||||
if etfs_df.empty:
|
||||
raise ETFDataError("No ETFs available for selection")
|
||||
|
||||
logger.info(f"Found {len(etfs_df)} available ETFs")
|
||||
|
||||
# Filter ETFs based on criteria
|
||||
filtered_df = self._filter_etfs_vectorized(etfs_df, goal)
|
||||
if filtered_df.empty:
|
||||
raise ETFSelectionError(
|
||||
"No ETFs passed the filtering criteria. Please try adjusting your risk tolerance or investment goals."
|
||||
)
|
||||
|
||||
logger.info(f"{len(filtered_df)} ETFs passed filtering criteria")
|
||||
|
||||
# Score ETFs based on criteria
|
||||
scored_df = self._score_etfs_vectorized(filtered_df, goal)
|
||||
if scored_df.empty:
|
||||
raise ETFSelectionError(
|
||||
"No ETFs passed the scoring criteria. Please try adjusting your risk tolerance or investment goals."
|
||||
)
|
||||
|
||||
logger.info(f"{len(scored_df)} ETFs passed scoring criteria")
|
||||
|
||||
# Optimize portfolio allocation
|
||||
portfolio = self._optimize_portfolio_vectorized(scored_df, goal)
|
||||
if portfolio.empty:
|
||||
raise PortfolioOptimizationError(
|
||||
"Failed to optimize portfolio allocation. Please try adjusting your investment goals or risk tolerance."
|
||||
)
|
||||
|
||||
logger.info(f"Successfully generated portfolio with {len(portfolio)} ETFs")
|
||||
return portfolio.to_dict('records')
|
||||
|
||||
except ETFSelectionError as e:
|
||||
logger.error(str(e))
|
||||
raise
|
||||
except Exception as e:
|
||||
error_msg = f"Unexpected error during ETF selection: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
raise ETFSelectionError(error_msg)
|
||||
|
||||
def _validate_investment_goal(self, goal: InvestmentGoal) -> None:
|
||||
"""Validate investment goal parameters"""
|
||||
if goal.capital_target <= 0:
|
||||
raise ValidationError("Capital target must be greater than 0")
|
||||
|
||||
if goal.income_target and goal.income_target <= 0:
|
||||
raise ValidationError("Income target must be greater than 0")
|
||||
|
||||
if not isinstance(goal.risk_tolerance, RiskTolerance):
|
||||
raise ValidationError("Risk tolerance must be a valid RiskTolerance enum value")
|
||||
|
||||
def _get_etfs_dataframe(self) -> pd.DataFrame:
|
||||
"""Get ETF data as DataFrame with caching"""
|
||||
cache_key = 'etfs_dataframe'
|
||||
|
||||
# Check cache first
|
||||
if cache_key in self.cache:
|
||||
cache_data, timestamp = self.cache[cache_key]
|
||||
if is_cache_valid(timestamp, self.cache_ttl):
|
||||
return cache_data
|
||||
|
||||
try:
|
||||
# Get list of ETFs
|
||||
etf_list = self.data_service.get_etf_list()
|
||||
if not etf_list:
|
||||
raise ETFDataError("No ETFs available from data service")
|
||||
|
||||
# Batch fetch ETF data
|
||||
etf_data = []
|
||||
for ticker in etf_list:
|
||||
try:
|
||||
data = self.data_service.get_etf_data(ticker)
|
||||
if data:
|
||||
etf_data.append({
|
||||
'ticker': ticker,
|
||||
'name': data.get('info', {}).get('longName', ticker),
|
||||
'expense_ratio': data.get('info', {}).get('annualReportExpenseRatio', 0.5) / 100,
|
||||
'aum': data.get('info', {}).get('totalAssets', 0),
|
||||
'avg_volume': data.get('info', {}).get('averageVolume', 0),
|
||||
'tracking_error': 0.0, # Not available from yfinance
|
||||
'volatility': float(data.get('volatility', 0)),
|
||||
'max_drawdown': float(data.get('max_drawdown', 0)),
|
||||
'sharpe_ratio': float(data.get('sharpe_ratio', 0)),
|
||||
'top_holding_weight': 0.0, # Not available from yfinance
|
||||
'dividend_yield': float(data.get('dividend_yield', 0)) / 100,
|
||||
'category': data.get('info', {}).get('category', 'Unknown'),
|
||||
'asset_class': data.get('info', {}).get('assetClass', 'Unknown'),
|
||||
'sector': data.get('info', {}).get('sector', 'Unknown'),
|
||||
'region': data.get('info', {}).get('region', 'Unknown'),
|
||||
'strategy': data.get('info', {}).get('strategy', 'Unknown')
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning(f"Error processing ETF {ticker}: {str(e)}")
|
||||
continue
|
||||
|
||||
if not etf_data:
|
||||
raise ETFDataError("No ETFs could be processed")
|
||||
|
||||
# Convert to DataFrame
|
||||
df = pd.DataFrame(etf_data)
|
||||
|
||||
# Cache the result
|
||||
self.cache[cache_key] = (df, datetime.now().isoformat())
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
raise ETFDataError(f"Error fetching ETF data: {str(e)}")
|
||||
|
||||
def _filter_etfs_vectorized(self, df: pd.DataFrame, goal: InvestmentGoal) -> pd.DataFrame:
|
||||
"""Filter ETFs using vectorized operations"""
|
||||
# Get criteria based on risk tolerance
|
||||
criteria = self.selection_criteria['tier1' if goal.risk_tolerance == RiskTolerance.CONSERVATIVE else 'tier2']
|
||||
|
||||
# Create boolean masks for filtering
|
||||
mask = (
|
||||
(df['expense_ratio'] <= criteria['expense_ratio']) &
|
||||
(df['aum'] >= criteria['aum']) &
|
||||
(df['tracking_error'] <= criteria['tracking_error']) &
|
||||
(df['avg_volume'] >= criteria['avg_volume'])
|
||||
)
|
||||
|
||||
return df[mask]
|
||||
|
||||
def _score_etfs_vectorized(self, df: pd.DataFrame, goal: InvestmentGoal) -> pd.DataFrame:
|
||||
"""Score ETFs using vectorized operations"""
|
||||
# Calculate risk score
|
||||
df['risk_score'] = (
|
||||
df['volatility'] * 0.3 +
|
||||
df['max_drawdown'] * 0.3 +
|
||||
(1 - df['sharpe_ratio']) * 0.4
|
||||
)
|
||||
|
||||
# Calculate income score if income target is specified
|
||||
if goal.income_target:
|
||||
df['income_score'] = df['dividend_yield'] / goal.income_target
|
||||
|
||||
# Calculate final score
|
||||
if goal.income_target:
|
||||
df['final_score'] = (
|
||||
(1 - df['risk_score']) * 0.6 +
|
||||
df['income_score'] * 0.4
|
||||
)
|
||||
else:
|
||||
df['final_score'] = 1 - df['risk_score']
|
||||
|
||||
# Normalize scores
|
||||
df['final_score'] = (df['final_score'] - df['final_score'].min()) / (df['final_score'].max() - df['final_score'].min())
|
||||
|
||||
# Filter out ETFs with low scores
|
||||
return df[df['final_score'] >= 0.5]
|
||||
|
||||
def _optimize_portfolio_vectorized(self, df: pd.DataFrame, goal: InvestmentGoal) -> pd.DataFrame:
|
||||
"""Optimize portfolio allocation using vectorized operations"""
|
||||
# Sort by final score
|
||||
df = df.sort_values('final_score', ascending=False)
|
||||
|
||||
# Select top ETFs based on risk tolerance
|
||||
n_etfs = 5 if goal.risk_tolerance == RiskTolerance.CONSERVATIVE else 10
|
||||
df = df.head(n_etfs)
|
||||
|
||||
# Calculate weights based on scores
|
||||
df['weight'] = df['final_score'] / df['final_score'].sum()
|
||||
|
||||
# Add allocation amount (already in percentage)
|
||||
df['allocation'] = df['weight'] * 100 # Convert to percentage
|
||||
df['amount'] = df['weight'] * goal.capital_target # Calculate actual amount
|
||||
|
||||
# Add metrics dictionary for each ETF
|
||||
df['metrics'] = df.apply(lambda row: {
|
||||
'expense_ratio': row['expense_ratio'],
|
||||
'aum': row['aum'],
|
||||
'volatility': row['volatility'],
|
||||
'max_drawdown': row['max_drawdown'],
|
||||
'sharpe_ratio': row['sharpe_ratio'],
|
||||
'dividend_yield': row['dividend_yield']
|
||||
}, axis=1)
|
||||
|
||||
return df[['ticker', 'name', 'allocation', 'amount', 'metrics']]
|
||||
|
||||
def _update_etf_universe(self):
|
||||
"""Update ETF universe with latest data"""
|
||||
try:
|
||||
# Get list of ETFs to update
|
||||
etfs_to_update = self._get_etfs_to_update()
|
||||
|
||||
# Update each ETF
|
||||
for ticker in etfs_to_update:
|
||||
try:
|
||||
etf = self._fetch_etf_data(ticker)
|
||||
if etf:
|
||||
self.db.save_etf(etf)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to update ETF {ticker}: {str(e)}")
|
||||
continue
|
||||
|
||||
# Update universe metadata
|
||||
self._update_universe_metadata()
|
||||
|
||||
except Exception as e:
|
||||
raise DataUpdateError(f"Failed to update ETF universe: {str(e)}")
|
||||
|
||||
def _get_etfs_to_update(self) -> List[str]:
|
||||
"""Get list of ETFs that need updating"""
|
||||
try:
|
||||
# Get current universe
|
||||
universe = self.db.get_universe()
|
||||
|
||||
# Get list of all ETFs
|
||||
all_etfs = self.data_service.get_etf_list()
|
||||
|
||||
# Find ETFs that need updating
|
||||
etfs_to_update = []
|
||||
for ticker in all_etfs:
|
||||
if ticker not in universe.etfs:
|
||||
etfs_to_update.append(ticker)
|
||||
else:
|
||||
# Check if data is stale
|
||||
etf = universe.etfs[ticker]
|
||||
if not is_cache_valid(etf.last_updated, self.cache_ttl):
|
||||
etfs_to_update.append(ticker)
|
||||
|
||||
return etfs_to_update
|
||||
|
||||
except Exception as e:
|
||||
raise DataUpdateError(f"Failed to get ETFs to update: {str(e)}")
|
||||
|
||||
def _fetch_etf_data(self, ticker: str) -> Optional[ETF]:
|
||||
"""Fetch ETF data from data service"""
|
||||
try:
|
||||
data = self.data_service.get_etf_data(ticker)
|
||||
if not data:
|
||||
return None
|
||||
|
||||
info = data.get('info', {})
|
||||
|
||||
return ETF(
|
||||
ticker=ticker,
|
||||
name=info.get('longName', ticker),
|
||||
expense_ratio=info.get('annualReportExpenseRatio', 0.5) / 100,
|
||||
aum=info.get('totalAssets', 0),
|
||||
avg_volume=info.get('averageVolume', 0),
|
||||
tracking_error=0.0, # Not available from yfinance
|
||||
volatility=float(data.get('volatility', 0)),
|
||||
max_drawdown=float(data.get('max_drawdown', 0)),
|
||||
sharpe_ratio=float(data.get('sharpe_ratio', 0)),
|
||||
top_holding_weight=0.0, # Not available from yfinance
|
||||
dividend_yield=float(data.get('dividend_yield', 0)) / 100,
|
||||
category=info.get('category', 'Unknown'),
|
||||
asset_class=info.get('assetClass', 'Unknown'),
|
||||
sector=info.get('sector', 'Unknown'),
|
||||
region=info.get('region', 'Unknown'),
|
||||
strategy=info.get('strategy', 'Unknown')
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fetch ETF data for {ticker}: {str(e)}")
|
||||
return None
|
||||
|
||||
def _determine_category(self, info: Dict) -> ETFCategory:
|
||||
"""Determine ETF category from info"""
|
||||
category = info.get('category', '').lower()
|
||||
|
||||
if 'equity' in category:
|
||||
return ETFCategory.EQUITY
|
||||
elif 'fixed income' in category or 'bond' in category:
|
||||
return ETFCategory.FIXED_INCOME
|
||||
elif 'commodity' in category:
|
||||
return ETFCategory.COMMODITY
|
||||
elif 'real estate' in category:
|
||||
return ETFCategory.REAL_ESTATE
|
||||
elif 'crypto' in category:
|
||||
return ETFCategory.CRYPTO
|
||||
else:
|
||||
return ETFCategory.MULTI_ASSET
|
||||
|
||||
def _determine_risk_level(self, volatility: float) -> str:
|
||||
"""Determine risk level based on volatility"""
|
||||
if volatility < 0.1:
|
||||
return 'Low'
|
||||
elif volatility < 0.2:
|
||||
return 'Medium'
|
||||
else:
|
||||
return 'High'
|
||||
|
||||
def _update_universe_metadata(self):
|
||||
"""Update ETF universe metadata"""
|
||||
try:
|
||||
universe = self.db.get_universe()
|
||||
|
||||
# Group ETFs by category
|
||||
categories = {}
|
||||
for etf in universe.etfs.values():
|
||||
if etf.category not in categories:
|
||||
categories[etf.category] = []
|
||||
categories[etf.category].append(etf.ticker)
|
||||
|
||||
# Update categories
|
||||
universe.categories = categories
|
||||
universe.last_updated = datetime.now().isoformat()
|
||||
|
||||
# Save to database
|
||||
self.db.save_universe(universe)
|
||||
|
||||
except Exception as e:
|
||||
raise DataUpdateError(f"Failed to update universe metadata: {str(e)}")
|
||||
|
||||
def get_etf_universe(self) -> ETFUniverse:
|
||||
"""Get current ETF universe"""
|
||||
try:
|
||||
return self.db.get_universe()
|
||||
except Exception as e:
|
||||
raise DatabaseError(f"Failed to get ETF universe: {str(e)}")
|
||||
|
||||
def search_etfs(self,
|
||||
category: Optional[ETFCategory] = None,
|
||||
risk_level: Optional[str] = None,
|
||||
min_dividend_yield: Optional[float] = None,
|
||||
max_expense_ratio: Optional[float] = None) -> List[ETF]:
|
||||
"""Search ETFs based on criteria"""
|
||||
try:
|
||||
universe = self.get_etf_universe()
|
||||
|
||||
# Filter ETFs
|
||||
filtered_etfs = []
|
||||
for etf in universe.etfs.values():
|
||||
if category and etf.category != category.value:
|
||||
continue
|
||||
if risk_level and self._determine_risk_level(etf.volatility) != risk_level:
|
||||
continue
|
||||
if min_dividend_yield and etf.dividend_yield < min_dividend_yield:
|
||||
continue
|
||||
if max_expense_ratio and etf.expense_ratio > max_expense_ratio:
|
||||
continue
|
||||
filtered_etfs.append(etf)
|
||||
|
||||
return filtered_etfs
|
||||
|
||||
except Exception as e:
|
||||
raise ETFSelectionError(f"Failed to search ETFs: {str(e)}")
|
||||
|
||||
def get_etf_details(self, ticker: str) -> Optional[ETF]:
|
||||
"""Get detailed ETF information"""
|
||||
try:
|
||||
return self.db.get_etf(ticker)
|
||||
except Exception as e:
|
||||
raise ETFNotFoundError(f"Failed to get ETF details for {ticker}: {str(e)}")
|
||||
|
||||
def clear_cache(self):
|
||||
"""Clear service cache"""
|
||||
self.cache.clear()
|
||||
49
ETF_Portal/services/etf_selection_service/utils.py
Normal file
49
ETF_Portal/services/etf_selection_service/utils.py
Normal file
@ -0,0 +1,49 @@
|
||||
"""
|
||||
Utility functions for ETF selection service
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
def calculate_volatility(returns: pd.Series) -> float:
|
||||
"""Calculate annualized volatility from returns series."""
|
||||
return returns.std() * np.sqrt(252)
|
||||
|
||||
def calculate_max_drawdown(prices: pd.Series) -> float:
|
||||
"""Calculate maximum drawdown from price series."""
|
||||
return (prices / prices.cummax() - 1).min()
|
||||
|
||||
def calculate_sharpe_ratio(returns: pd.Series, risk_free_rate: float = 0.02) -> float:
|
||||
"""Calculate Sharpe ratio from returns series."""
|
||||
excess_returns = returns - risk_free_rate/252
|
||||
if len(excess_returns) < 2:
|
||||
return 0.0
|
||||
return np.sqrt(252) * excess_returns.mean() / excess_returns.std()
|
||||
|
||||
def calculate_sortino_ratio(returns: pd.Series, risk_free_rate: float = 0.02) -> float:
|
||||
"""Calculate Sortino ratio from returns series."""
|
||||
excess_returns = returns - risk_free_rate/252
|
||||
downside_returns = excess_returns[excess_returns < 0]
|
||||
if len(downside_returns) < 2:
|
||||
return 0.0
|
||||
return np.sqrt(252) * excess_returns.mean() / downside_returns.std()
|
||||
|
||||
def calculate_portfolio_metrics(weights: np.ndarray, returns: pd.DataFrame) -> Dict[str, float]:
|
||||
"""Calculate portfolio metrics from weights and returns."""
|
||||
portfolio_returns = returns.dot(weights)
|
||||
return {
|
||||
'volatility': calculate_volatility(portfolio_returns),
|
||||
'max_drawdown': calculate_max_drawdown(portfolio_returns.cumsum()),
|
||||
'sharpe_ratio': calculate_sharpe_ratio(portfolio_returns),
|
||||
'sortino_ratio': calculate_sortino_ratio(portfolio_returns)
|
||||
}
|
||||
|
||||
def is_cache_valid(timestamp: str, ttl_seconds: int) -> bool:
|
||||
"""Check if cached data is still valid based on TTL."""
|
||||
try:
|
||||
cache_time = datetime.fromisoformat(timestamp)
|
||||
return datetime.now() - cache_time < timedelta(seconds=ttl_seconds)
|
||||
except (ValueError, TypeError):
|
||||
return False
|
||||
BIN
etf_data.db
Normal file
BIN
etf_data.db
Normal file
Binary file not shown.
@ -21,6 +21,9 @@ import traceback
|
||||
from dotenv import load_dotenv
|
||||
import re
|
||||
from ETF_Portal.services.drip_service import DRIPService, DripConfig
|
||||
from ETF_Portal.services.etf_selection_service import ETFSelectionService, InvestmentGoal, RiskTolerance
|
||||
from ETF_Portal.services.nav_erosion_service import NavErosionService
|
||||
from ETF_Portal.services.data_service import DataService
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv(override=True) # Force reload of environment variables
|
||||
@ -1373,7 +1376,7 @@ def allocate_for_income(df: pd.DataFrame, target: float, etf_allocations: List[D
|
||||
for alloc in etf_allocations:
|
||||
mask = final_alloc["Ticker"] == alloc["ticker"]
|
||||
if mask.any():
|
||||
final_alloc.loc[mask, "Allocation (%)"] = alloc["allocation"]
|
||||
final_alloc.loc[mask, "Allocation (%)"] = alloc["allocation"] # Already in percentage
|
||||
else:
|
||||
logger.warning(f"Ticker {alloc['ticker']} not found in DataFrame")
|
||||
|
||||
@ -1392,7 +1395,7 @@ def allocate_for_income(df: pd.DataFrame, target: float, etf_allocations: List[D
|
||||
logger.error("Weighted yield is zero")
|
||||
return None
|
||||
|
||||
# Calculate required capital based on weighted yield (convert percentage to decimal)
|
||||
# Calculate required capital based on weighted yield
|
||||
required_capital = annual_income / (weighted_yield / 100)
|
||||
|
||||
# Calculate capital allocation and income
|
||||
@ -1439,7 +1442,7 @@ def allocate_for_capital(df: pd.DataFrame, initial_capital: float, etf_allocatio
|
||||
for alloc in etf_allocations:
|
||||
mask = final_alloc["Ticker"] == alloc["ticker"]
|
||||
if mask.any():
|
||||
final_alloc.loc[mask, "Allocation (%)"] = alloc["allocation"]
|
||||
final_alloc.loc[mask, "Allocation (%)"] = alloc["allocation"] # Already in percentage
|
||||
else:
|
||||
logger.warning(f"Ticker {alloc['ticker']} not found in DataFrame")
|
||||
|
||||
@ -2850,7 +2853,8 @@ if st.session_state.simulation_run and st.session_state.df_data is not None:
|
||||
|
||||
# Initialize services
|
||||
from ETF_Portal.services.data_service import DataService
|
||||
from ETF_Portal.services.etf_selection_service import ETFSelectionService, InvestmentGoal, RiskTolerance
|
||||
from ETF_Portal.services.etf_selection_service import ETFSelectionService
|
||||
from ETF_Portal.services.etf_selection_service import InvestmentGoal, RiskTolerance
|
||||
|
||||
data_service = DataService()
|
||||
selection_service = ETFSelectionService(data_service)
|
||||
@ -2916,7 +2920,7 @@ if st.session_state.simulation_run and st.session_state.df_data is not None:
|
||||
st.write("A balanced approach focusing on both growth and income, suitable for most investors.")
|
||||
if balanced_portfolio:
|
||||
portfolio_df = pd.DataFrame(balanced_portfolio)
|
||||
portfolio_df['Allocation (%)'] = portfolio_df['allocation'] * 100
|
||||
portfolio_df['Allocation (%)'] = portfolio_df['allocation'].round().astype(int)
|
||||
portfolio_df['Amount ($)'] = portfolio_df['amount']
|
||||
|
||||
st.dataframe(
|
||||
@ -2947,7 +2951,7 @@ if st.session_state.simulation_run and st.session_state.df_data is not None:
|
||||
st.write("Optimized for higher dividend income with lower risk, suitable for income-focused investors.")
|
||||
if income_portfolio:
|
||||
portfolio_df = pd.DataFrame(income_portfolio)
|
||||
portfolio_df['Allocation (%)'] = portfolio_df['allocation'] * 100
|
||||
portfolio_df['Allocation (%)'] = portfolio_df['allocation'].round().astype(int)
|
||||
portfolio_df['Amount ($)'] = portfolio_df['amount']
|
||||
|
||||
st.dataframe(
|
||||
@ -2978,7 +2982,7 @@ if st.session_state.simulation_run and st.session_state.df_data is not None:
|
||||
st.write("Optimized for capital appreciation with higher risk tolerance, suitable for growth investors.")
|
||||
if growth_portfolio:
|
||||
portfolio_df = pd.DataFrame(growth_portfolio)
|
||||
portfolio_df['Allocation (%)'] = portfolio_df['allocation'] * 100
|
||||
portfolio_df['Allocation (%)'] = portfolio_df['allocation'].round().astype(int)
|
||||
portfolio_df['Amount ($)'] = portfolio_df['amount']
|
||||
|
||||
st.dataframe(
|
||||
@ -3009,7 +3013,7 @@ if st.session_state.simulation_run and st.session_state.df_data is not None:
|
||||
st.write(f"Optimized for your specific risk tolerance ({risk_tolerance}), balancing growth and income.")
|
||||
if risk_adjusted_portfolio:
|
||||
portfolio_df = pd.DataFrame(risk_adjusted_portfolio)
|
||||
portfolio_df['Allocation (%)'] = portfolio_df['allocation'] * 100
|
||||
portfolio_df['Allocation (%)'] = portfolio_df['allocation'].round().astype(int)
|
||||
portfolio_df['Amount ($)'] = portfolio_df['amount']
|
||||
|
||||
st.dataframe(
|
||||
|
||||
Loading…
Reference in New Issue
Block a user