Compare commits

...

30 Commits

Author SHA1 Message Date
19f713673e Adding education tio DRIP/No-DRIP 2025-06-03 19:51:16 +02:00
30e1bbcbd9 fixing Nav_erosion_service and drip_service services communication 2025-06-03 18:42:25 +02:00
c6797c94ee fix: update DRIP service to use correct method and attributes for portfolio calculations 2025-06-03 12:46:32 +02:00
163bf0e93b fix: DRIP Forecast tab improvements - Fix duplicate sections in DRIP Forecast tab - Fix monthly income calculation in detailed allocation table - Add back Detailed Allocation table with improved calculations - Update DRIP vs No-DRIP comparison to use correct variable names 2025-06-02 17:22:50 +02:00
edf2ce5e9c feat: Add DRIP service support files and update related services - Add exceptions and logger - Update service models and implementations - Update portfolio builder integration 2025-06-02 16:16:57 +02:00
4ea1fe2a73 fix: Update DRIP service to use correct MonthlyData structure and fix month attribute error 2025-06-02 16:07:27 +02:00
7889608544 deleted ETF_Portal_Builder.py 2025-06-01 13:09:06 +02:00
b7febef81a updated .gitignore for saved portfolios 2025-06-01 12:49:14 +02:00
5775d25e72 refactor: remove erosion radio buttons and FMP API testing section from navbar 2025-05-30 21:09:35 +02:00
7db493893e Add dividend trend service, documentation, and requirements backup 2025-05-30 18:12:36 +02:00
156b218c94 feat: current dividend trend calculation implementation 2025-05-30 15:30:03 +02:00
8bec6cd8e8 Add nav_erosion_service implementation and fix dividend trend calculation 2025-05-30 00:14:31 +02:00
f548dec7ec fix: ensure dividend trend is always a valid number in API response and add nav_erosion_service implementation 2025-05-30 00:02:14 +02:00
300b127674 Add: Complete nav erosion service implementation with tests 2025-05-29 23:38:55 +02:00
93d93277b1 Fix: Adjusted ETF age calculation and risk scoring 2025-05-29 23:37:13 +02:00
65209331f5 feat: implement NAV erosion risk assessment service and UI integration 2025-05-29 21:49:24 +02:00
c4a7f91867 feat: add DRIP service for portfolio growth calculations with erosion support 2025-05-28 16:27:29 +02:00
40cf9aac63 feat: Update DRIP Forecast implementation with improved erosion calculations and comparison features 2025-05-28 16:12:32 +02:00
81c6a1db48 feat: improve data source tracking and FMP API fallback mechanism 2025-05-28 14:41:10 +02:00
1fc54d5ea9 revert: restore original portfolio allocation logic 2025-05-27 23:53:08 +02:00
57862a1e98 refactor: optimize portfolio allocation for risk tolerance 2025-05-27 23:33:02 +02:00
3929f2d4f0 refactor: remove duplicate ETF display from sidebar for cleaner UI 2025-05-27 20:31:05 +02:00
e0dc6e57eb feat: update API and cache management with improved logging 2025-05-27 16:27:13 +02:00
027febf7da feat: improve logging for cache and API calls tracking 2025-05-27 16:02:15 +02:00
1ff511ebe1 chore: Update project configuration and add setup scripts
- Update Docker and Caddy configuration

- Add VPS setup and secrets management scripts

- Add test suite

- Update documentation

- Clean up cache files
2025-05-27 14:41:58 +02:00
38e51b4517 chore: Update Docker configuration for secure API key management
- Add env_file configuration for Docker

- Remove hardcoded API key references
2025-05-27 14:23:27 +02:00
c462342d44 feat: Add API management system with caching support
- Add base API client and cache manager classes

- Implement FMP and YFinance specific clients and cache managers

- Add API factory for managing multiple data providers

- Add test suite for API configuration and caching

- Add logging configuration for API operations
2025-05-27 14:07:32 +02:00
4fc9452c98 refactor: improve CLI and package structure - Move CLI code to ETF_Portal package - Add proper package setup with setup.py - Update README with installation and usage instructions - Improve process management and cleanup 2025-05-26 19:46:05 +02:00
fd623ac6b9 fix: correct yield calculation in ETF metrics to use TTM dividends 2025-05-25 15:14:47 +00:00
2687b63d3f Initial commit for fixing broken code: - Fixed ETF_Analyzer page config issue - Updated ETF_Portfolio_Builder with improved error handling and data validation 2025-05-24 23:24:40 +00:00
68 changed files with 10368 additions and 3438 deletions

5
.env.template Normal file
View File

@ -0,0 +1,5 @@
# API Keys
FMP_API_KEY=your_api_key_here
# Cache Configuration
CACHE_DURATION_HOURS=24

22
.gitignore vendored
View File

@ -1,3 +1,11 @@
# Environment variables
.env
#.env.*
# Cache directories
cache/
**/cache/
# Python
__pycache__/
*.py[cod]
@ -25,8 +33,8 @@ wheels/
# Virtual Environment
venv/
env/
ENV/
env/
# IDE
.idea/
@ -35,18 +43,16 @@ ENV/
*.swo
# Logs
logs/
*.log
# Cache
.cache/
__pycache__/
logs/
# Local development
.env
.env.local
.env.*.local
# System
.DS_Store
Thumbs.db
Thumbs.db
#saved portfolio
portfolios/*

37
=0.2.36 Normal file
View File

@ -0,0 +1,37 @@
Collecting yfinance
Using cached yfinance-0.2.61-py2.py3-none-any.whl (117 kB)
Requirement already satisfied: pandas>=1.3.0 in ./venv/lib/python3.11/site-packages (from yfinance) (2.2.3)
Requirement already satisfied: numpy>=1.16.5 in ./venv/lib/python3.11/site-packages (from yfinance) (2.2.6)
Requirement already satisfied: requests>=2.31 in ./venv/lib/python3.11/site-packages (from yfinance) (2.32.3)
Collecting multitasking>=0.0.7
Using cached multitasking-0.0.11-py3-none-any.whl (8.5 kB)
Collecting platformdirs>=2.0.0
Using cached platformdirs-4.3.8-py3-none-any.whl (18 kB)
Requirement already satisfied: pytz>=2022.5 in ./venv/lib/python3.11/site-packages (from yfinance) (2025.2)
Collecting frozendict>=2.3.4
Using cached frozendict-2.4.6-py311-none-any.whl (16 kB)
Collecting peewee>=3.16.2
Using cached peewee-3.18.1-py3-none-any.whl
Collecting beautifulsoup4>=4.11.1
Using cached beautifulsoup4-4.13.4-py3-none-any.whl (187 kB)
Collecting curl_cffi>=0.7
Using cached curl_cffi-0.11.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.5 MB)
Requirement already satisfied: protobuf>=3.19.0 in ./venv/lib/python3.11/site-packages (from yfinance) (6.31.0)
Collecting websockets>=13.0
Using cached websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (182 kB)
Collecting soupsieve>1.2
Using cached soupsieve-2.7-py3-none-any.whl (36 kB)
Requirement already satisfied: typing-extensions>=4.0.0 in ./venv/lib/python3.11/site-packages (from beautifulsoup4>=4.11.1->yfinance) (4.13.2)
Collecting cffi>=1.12.0
Using cached cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (467 kB)
Requirement already satisfied: certifi>=2024.2.2 in ./venv/lib/python3.11/site-packages (from curl_cffi>=0.7->yfinance) (2025.4.26)
Requirement already satisfied: python-dateutil>=2.8.2 in ./venv/lib/python3.11/site-packages (from pandas>=1.3.0->yfinance) (2.9.0.post0)
Requirement already satisfied: tzdata>=2022.7 in ./venv/lib/python3.11/site-packages (from pandas>=1.3.0->yfinance) (2025.2)
Requirement already satisfied: charset-normalizer<4,>=2 in ./venv/lib/python3.11/site-packages (from requests>=2.31->yfinance) (3.4.2)
Requirement already satisfied: idna<4,>=2.5 in ./venv/lib/python3.11/site-packages (from requests>=2.31->yfinance) (3.10)
Requirement already satisfied: urllib3<3,>=1.21.1 in ./venv/lib/python3.11/site-packages (from requests>=2.31->yfinance) (2.4.0)
Collecting pycparser
Using cached pycparser-2.22-py3-none-any.whl (117 kB)
Requirement already satisfied: six>=1.5 in ./venv/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas>=1.3.0->yfinance) (1.17.0)
Installing collected packages: peewee, multitasking, websockets, soupsieve, pycparser, platformdirs, frozendict, cffi, beautifulsoup4, curl_cffi, yfinance
Successfully installed beautifulsoup4-4.13.4 cffi-1.17.1 curl_cffi-0.11.1 frozendict-2.4.6 multitasking-0.0.11 peewee-3.18.1 platformdirs-4.3.8 pycparser-2.22 soupsieve-2.7 websockets-15.0.1 yfinance-0.2.61

View File

@ -8,7 +8,7 @@ invest.trader-lab.com {
# Main ETF Suite Launcher
handle / {
reverse_proxy etf-launcher:8500 {
reverse_proxy etf_portal-etf-launcher-1:8500 {
header_up Host {host}
header_up X-Real-IP {remote}
header_up X-Forwarded-For {remote}
@ -18,7 +18,7 @@ invest.trader-lab.com {
# Static resources for Streamlit
handle /_stcore/* {
reverse_proxy etf-launcher:8500 {
reverse_proxy etf_portal-etf-launcher-1:8500 {
header_up Host {host}
header_up X-Real-IP {remote}
header_up X-Forwarded-For {remote}
@ -27,7 +27,7 @@ invest.trader-lab.com {
}
handle /static/* {
reverse_proxy etf-launcher:8500 {
reverse_proxy etf_portal-etf-launcher-1:8500 {
header_up Host {host}
header_up X-Real-IP {remote}
header_up X-Forwarded-For {remote}

7
ETF_Portal/__init__.py Normal file
View File

@ -0,0 +1,7 @@
"""
ETF Portal
A comprehensive tool for ETF portfolio management and analysis.
"""
__version__ = "0.1.0"

View File

@ -0,0 +1,6 @@
from .factory import APIFactory
from .base import BaseAPIClient
from .fmp.client import FMPClient
from .yfinance.client import YFinanceClient
__all__ = ['APIFactory', 'BaseAPIClient', 'FMPClient', 'YFinanceClient']

113
ETF_Portal/api/base.py Normal file
View File

@ -0,0 +1,113 @@
from abc import ABC, abstractmethod
from typing import Dict, List, Optional, Union, Any
import pandas as pd
from datetime import datetime
import time
import logging
class BaseAPIClient(ABC):
"""Base class for all API clients."""
def __init__(self, api_key: Optional[str] = None):
"""Initialize base API client.
Args:
api_key: Optional API key
"""
self.api_key = api_key
self.last_request_time = None
self.rate_limit_delay = 1.0 # Default 1 second between requests
self.logger = logging.getLogger(self.__class__.__name__)
@abstractmethod
def get_etf_profile(self, symbol: str) -> Dict:
"""Get ETF profile data.
Args:
symbol: ETF ticker symbol
Returns:
Dictionary containing ETF profile information
"""
pass
@abstractmethod
def get_etf_holdings(self, symbol: str) -> List[Dict]:
"""Get ETF holdings data.
Args:
symbol: ETF ticker symbol
Returns:
List of dictionaries containing holding information
"""
pass
@abstractmethod
def get_historical_data(self, symbol: str, period: str = '1y') -> pd.DataFrame:
"""Get historical price data.
Args:
symbol: ETF ticker symbol
period: Time period (e.g., '1d', '1w', '1m', '1y')
Returns:
DataFrame with historical price data
"""
pass
@abstractmethod
def get_dividend_history(self, symbol: str) -> pd.DataFrame:
"""Get dividend history.
Args:
symbol: ETF ticker symbol
Returns:
DataFrame with dividend history
"""
pass
@abstractmethod
def get_sector_weightings(self, symbol: str) -> Dict:
"""Get sector weightings.
Args:
symbol: ETF ticker symbol
Returns:
Dictionary with sector weightings
"""
pass
def _check_rate_limit(self):
"""Check and enforce rate limiting."""
if self.last_request_time:
time_since_last = (datetime.now() - self.last_request_time).total_seconds()
if time_since_last < self.rate_limit_delay:
time.sleep(self.rate_limit_delay - time_since_last)
self.last_request_time = datetime.now()
@abstractmethod
def _validate_symbol(self, symbol: str) -> bool:
"""Validate a symbol.
Args:
symbol: Symbol to validate
Returns:
True if valid, False otherwise
"""
pass
def _handle_error(self, error: Exception) -> Dict:
"""Handle API errors.
Args:
error: Exception that occurred
Returns:
Error response dictionary
"""
self.logger.error(f"API error: {str(error)}")
return {"error": str(error)}

148
ETF_Portal/api/factory.py Normal file
View File

@ -0,0 +1,148 @@
from typing import Optional, Dict, Any
import logging
import os
from .base import BaseAPIClient
from .fmp.client import FMPClient
from .yfinance.client import YFinanceClient
from ..cache.fmp_cache import FMPCacheManager
from ..cache.yfinance_cache import YFinanceCacheManager
class APIFactory:
"""Factory for creating and managing API clients."""
def __init__(self, fmp_api_key: Optional[str] = None):
"""Initialize API factory.
Args:
fmp_api_key: Optional FMP API key. If not provided, will try to get from environment variable.
"""
# Try to get API key from environment variable if not provided
self.fmp_api_key = fmp_api_key or os.environ.get('FMP_API_KEY')
if not self.fmp_api_key:
logging.warning("No FMP API key found in environment. Some features may be limited.")
self.logger = logging.getLogger(self.__class__.__name__)
self._clients: Dict[str, BaseAPIClient] = {}
def get_client(self, provider: str = 'fmp') -> BaseAPIClient:
"""Get an API client instance.
Args:
provider: API provider ('fmp' or 'yfinance')
Returns:
API client instance
Raises:
ValueError: If provider is invalid or FMP API key is missing
"""
provider = provider.lower()
if provider not in ['fmp', 'yfinance']:
raise ValueError(f"Invalid provider: {provider}")
if provider in self._clients:
return self._clients[provider]
if provider == 'fmp':
if not self.fmp_api_key:
raise ValueError("FMP API key is required")
client = FMPClient(self.fmp_api_key)
else: # yfinance
client = YFinanceClient()
self._clients[provider] = client
return client
def get_data(self, symbol: str, data_type: str, provider: str = 'fmp', fallback: bool = True) -> Any:
"""Get data from API with fallback support.
Args:
symbol: ETF ticker symbol
data_type: Type of data to retrieve
provider: Primary API provider
fallback: Whether to fall back to yfinance if primary fails
Returns:
Requested data or error information
"""
try:
# Try primary provider
client = self.get_client(provider)
data = getattr(client, f"get_{data_type}")(symbol)
# Check if data is valid
if isinstance(data, dict) and data.get('error'):
if fallback and provider == 'fmp':
self.logger.info(f"Falling back to yfinance for {symbol}")
return self.get_data(symbol, data_type, 'yfinance', False)
return data
return data
except Exception as e:
self.logger.error(f"Error getting {data_type} for {symbol}: {str(e)}")
if fallback and provider == 'fmp':
self.logger.info(f"Falling back to yfinance for {symbol}")
return self.get_data(symbol, data_type, 'yfinance', False)
return {
'error': True,
'message': str(e),
'provider': provider,
'data_type': data_type,
'symbol': symbol
}
def clear_cache(self, provider: Optional[str] = None) -> Dict[str, int]:
"""Clear cache for specified provider or all providers.
Args:
provider: Optional provider to clear cache for
Returns:
Dictionary with number of files cleared per provider
"""
results = {}
if provider:
providers = [provider]
else:
providers = ['fmp', 'yfinance']
for prov in providers:
try:
client = self.get_client(prov)
results[prov] = client.clear_cache()
except Exception as e:
self.logger.error(f"Error clearing cache for {prov}: {str(e)}")
results[prov] = 0
return results
def get_cache_stats(self, provider: Optional[str] = None) -> Dict[str, Dict]:
"""Get cache statistics for specified provider or all providers.
Args:
provider: Optional provider to get stats for
Returns:
Dictionary with cache statistics per provider
"""
results = {}
if provider:
providers = [provider]
else:
providers = ['fmp', 'yfinance']
for prov in providers:
try:
client = self.get_client(prov)
results[prov] = client.get_cache_stats()
except Exception as e:
self.logger.error(f"Error getting cache stats for {prov}: {str(e)}")
results[prov] = {}
return results

View File

@ -0,0 +1,3 @@
from .client import FMPClient
__all__ = ['FMPClient']

View File

@ -0,0 +1,214 @@
import requests
import pandas as pd
from typing import Dict, List, Optional
from datetime import datetime
import logging
from ..base import BaseAPIClient
from ...cache.fmp_cache import FMPCacheManager
class FMPClient(BaseAPIClient):
"""Financial Modeling Prep API client."""
BASE_URL = "https://financialmodelingprep.com/api/v3"
def __init__(self, api_key: str, cache_manager: Optional[FMPCacheManager] = None):
"""Initialize FMP client.
Args:
api_key: FMP API key
cache_manager: Optional cache manager instance
"""
super().__init__(api_key)
self.cache_manager = cache_manager or FMPCacheManager()
self.logger = logging.getLogger(self.__class__.__name__)
def _validate_symbol(self, symbol: str) -> bool:
"""Validate ETF symbol format.
Args:
symbol: ETF ticker symbol
Returns:
True if valid, False otherwise
"""
return bool(symbol and isinstance(symbol, str) and symbol.isupper())
def _make_request(self, endpoint: str, params: Dict = None) -> Dict:
"""Make API request to FMP.
Args:
endpoint: API endpoint
params: Query parameters
Returns:
API response data
"""
# Prepare request
url = f"{self.BASE_URL}/{endpoint}"
params = params or {}
params['apikey'] = self.api_key
try:
response = requests.get(url, params=params)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
self.logger.error(f"FMP API request failed: {str(e)}")
return self._handle_error(e)
def get_etf_profile(self, symbol: str) -> Dict:
"""Get ETF profile data.
Args:
symbol: ETF ticker symbol
Returns:
Dictionary with ETF profile data
"""
if not self._validate_symbol(symbol):
return self._handle_error(ValueError(f"Invalid symbol: {symbol}"))
# Try cache first
if self.cache_manager:
is_valid, cached_data = self.cache_manager.load('fmp', symbol, 'profile')
if is_valid:
return cached_data
# Fetch from API
data = self._make_request(f"etf/profile/{symbol}")
# Cache the response
if self.cache_manager and data:
self.cache_manager.save('fmp', symbol, 'profile', data)
return data
def get_etf_holdings(self, symbol: str) -> List[Dict]:
"""Get ETF holdings.
Args:
symbol: ETF ticker symbol
Returns:
List of holdings
"""
if not self._validate_symbol(symbol):
return []
# Try cache first
if self.cache_manager:
is_valid, cached_data = self.cache_manager.load('fmp', symbol, 'holdings')
if is_valid:
return cached_data
# Fetch from API
data = self._make_request(f"etf/holdings/{symbol}")
# Cache the response
if self.cache_manager and data:
self.cache_manager.save('fmp', symbol, 'holdings', data)
return data
def get_etf_historical_data(self, symbol: str, timeframe: str = '1d') -> pd.DataFrame:
"""Get ETF historical data.
Args:
symbol: ETF ticker symbol
timeframe: Timeframe for historical data
Returns:
DataFrame with historical data
"""
if not self._validate_symbol(symbol):
return pd.DataFrame()
# Try cache first
if self.cache_manager:
is_valid, cached_data = self.cache_manager.load('fmp', symbol, f'historical_{timeframe}')
if is_valid:
return pd.DataFrame(cached_data)
# Fetch from API
data = self._make_request(f"etf/historical-price/{symbol}", {'timeframe': timeframe})
# Cache the response
if self.cache_manager and data:
self.cache_manager.save('fmp', symbol, f'historical_{timeframe}', data)
return pd.DataFrame(data)
def get_dividend_history(self, symbol: str) -> pd.DataFrame:
"""Get dividend history.
Args:
symbol: ETF ticker symbol
Returns:
DataFrame with dividend history
"""
if not self._validate_symbol(symbol):
return pd.DataFrame()
# Try cache first
if self.cache_manager:
is_valid, cached_data = self.cache_manager.load('fmp', symbol, 'dividend_history')
if is_valid:
return pd.DataFrame(cached_data)
# Fetch from API
data = self._make_request(f"etf/dividend/{symbol}")
# Cache the response
if self.cache_manager and data:
self.cache_manager.save('fmp', symbol, 'dividend_history', data)
return pd.DataFrame(data)
def get_sector_weightings(self, symbol: str) -> Dict:
"""Get sector weightings.
Args:
symbol: ETF ticker symbol
Returns:
Dictionary with sector weightings
"""
if not self._validate_symbol(symbol):
return self._handle_error(ValueError(f"Invalid symbol: {symbol}"))
# Try cache first
if self.cache_manager:
is_valid, cached_data = self.cache_manager.load('fmp', symbol, 'sector_weightings')
if is_valid:
return cached_data
# Fetch from API
data = self._make_request(f"etf/sector-weightings/{symbol}")
# Cache the response
if self.cache_manager and data:
self.cache_manager.save('fmp', symbol, 'sector_weightings', data)
return data
def clear_cache(self) -> int:
"""Clear expired cache entries.
Returns:
Number of files cleared
"""
if self.cache_manager:
return self.cache_manager.clear_expired()
return 0
def get_cache_stats(self) -> Dict:
"""Get cache statistics.
Returns:
Dictionary with cache statistics
"""
if self.cache_manager:
return self.cache_manager.get_stats()
return {}

View File

@ -0,0 +1,3 @@
from .client import YFinanceClient
__all__ = ['YFinanceClient']

View File

@ -0,0 +1,149 @@
import yfinance as yf
import pandas as pd
from typing import Dict, List, Optional
from datetime import datetime, timedelta
import logging
from ..base import BaseAPIClient
from ...cache.yfinance_cache import YFinanceCacheManager
class YFinanceClient(BaseAPIClient):
"""Yahoo Finance API client."""
def __init__(self, cache_manager: Optional[YFinanceCacheManager] = None):
"""Initialize YFinance client.
Args:
cache_manager: Optional cache manager instance
"""
super().__init__()
self.cache_manager = cache_manager or YFinanceCacheManager()
self.logger = logging.getLogger(self.__class__.__name__)
def _validate_symbol(self, symbol: str) -> bool:
"""Validate ETF symbol format.
Args:
symbol: ETF ticker symbol
Returns:
True if valid, False otherwise
"""
return bool(symbol and isinstance(symbol, str) and symbol.isupper())
def get_etf_info(self, symbol: str) -> Dict:
"""Get ETF information.
Args:
symbol: ETF ticker symbol
Returns:
Dictionary with ETF information
"""
if not self._validate_symbol(symbol):
return self._handle_error(ValueError(f"Invalid symbol: {symbol}"))
# Try cache first
if self.cache_manager:
is_valid, cached_data = self.cache_manager.load('yfinance', symbol, 'info')
if is_valid:
return cached_data
try:
etf = yf.Ticker(symbol)
info = etf.info
# Cache the response
if self.cache_manager and info:
self.cache_manager.save('yfinance', symbol, 'info', info)
return info
except Exception as e:
self.logger.error(f"Error fetching ETF info: {str(e)}")
return self._handle_error(e)
def get_historical_data(self, symbol: str, period: str = '1y', interval: str = '1d') -> pd.DataFrame:
"""Get historical price data.
Args:
symbol: ETF ticker symbol
period: Time period (e.g., '1d', '1w', '1m', '1y')
interval: Data interval (e.g., '1d', '1wk', '1mo')
Returns:
DataFrame with historical price data
"""
if not self._validate_symbol(symbol):
return pd.DataFrame()
# Try cache first
if self.cache_manager:
is_valid, cached_data = self.cache_manager.load('yfinance', symbol, f'historical_{period}_{interval}')
if is_valid:
return pd.DataFrame(cached_data)
try:
etf = yf.Ticker(symbol)
data = etf.history(period=period, interval=interval)
# Cache the response
if self.cache_manager and not data.empty:
self.cache_manager.save('yfinance', symbol, f'historical_{period}_{interval}', data.to_dict('records'))
return data
except Exception as e:
self.logger.error(f"Error fetching historical data: {str(e)}")
return pd.DataFrame()
def get_dividend_history(self, symbol: str) -> pd.DataFrame:
"""Get dividend history.
Args:
symbol: ETF ticker symbol
Returns:
DataFrame with dividend history
"""
if not self._validate_symbol(symbol):
return pd.DataFrame()
# Try cache first
if self.cache_manager:
is_valid, cached_data = self.cache_manager.load('yfinance', symbol, 'dividend_history')
if is_valid:
return pd.DataFrame(cached_data)
try:
etf = yf.Ticker(symbol)
data = etf.dividends.to_frame()
# Cache the response
if self.cache_manager and not data.empty:
self.cache_manager.save('yfinance', symbol, 'dividend_history', data.to_dict('records'))
return data
except Exception as e:
self.logger.error(f"Error fetching dividend history: {str(e)}")
return pd.DataFrame()
def clear_cache(self) -> int:
"""Clear expired cache entries.
Returns:
Number of files cleared
"""
if self.cache_manager:
return self.cache_manager.clear_expired()
return 0
def get_cache_stats(self) -> Dict:
"""Get cache statistics.
Returns:
Dictionary with cache statistics
"""
if self.cache_manager:
return self.cache_manager.get_stats()
return {}

368
ETF_Portal/api_client.py Normal file
View File

@ -0,0 +1,368 @@
#!/usr/bin/env python3
"""
API Client for ETF Portal
Handles API calls with caching, logging, and performance monitoring.
"""
import time
import psutil
import requests
import os
from typing import Any, Dict, Optional, Tuple, List
from datetime import datetime
from .cache_manager import cache_manager
from .logging_config import api_logger, portfolio_logger, performance_logger
class APIClient:
"""Manages API calls with caching and monitoring."""
def __init__(self, base_url: str, api_key: Optional[str] = None):
"""
Initialize API client.
Args:
base_url: Base URL for API endpoints
api_key: Optional API key for authentication
"""
self.base_url = base_url.rstrip('/')
self.api_key = api_key
self.session = requests.Session()
# Initialize performance metrics
self._init_performance_metrics()
api_logger.info(f"Initialized API client for {base_url}")
def _init_performance_metrics(self):
"""Initialize performance tracking metrics."""
self.metrics = {
'api_calls': 0,
'cache_hits': 0,
'cache_misses': 0,
'errors': 0,
'total_response_time': 0,
'start_time': time.time()
}
def _log_performance_metrics(self):
"""Log current performance metrics."""
current_time = time.time()
uptime = current_time - self.metrics['start_time']
# Calculate averages
avg_response_time = (self.metrics['total_response_time'] /
self.metrics['api_calls']) if self.metrics['api_calls'] > 0 else 0
# Calculate cache hit rate
total_cache_ops = self.metrics['cache_hits'] + self.metrics['cache_misses']
cache_hit_rate = (self.metrics['cache_hits'] / total_cache_ops * 100
if total_cache_ops > 0 else 0)
# Get memory usage
process = psutil.Process()
memory_info = process.memory_info()
metrics = {
'uptime_seconds': uptime,
'api_calls': self.metrics['api_calls'],
'cache_hits': self.metrics['cache_hits'],
'cache_misses': self.metrics['cache_misses'],
'cache_hit_rate': cache_hit_rate,
'avg_response_time': avg_response_time,
'errors': self.metrics['errors'],
'memory_usage_mb': memory_info.rss / (1024 * 1024)
}
performance_logger.log_performance_metric(
'api_performance',
time.time(),
'timestamp',
metrics
)
return metrics
def _handle_error(self, error: Exception, context: Dict[str, Any]):
"""Handle and log API errors."""
self.metrics['errors'] += 1
error_info = {
'error_type': type(error).__name__,
'error_message': str(error),
'context': context,
'timestamp': datetime.now().isoformat()
}
api_logger.error(f"API Error: {error_info}")
return error_info
def make_request(self, endpoint: str, method: str = 'GET',
params: Optional[Dict] = None, data: Optional[Dict] = None,
source: str = 'api', data_type: str = 'response') -> Tuple[bool, Any]:
"""
Make API request with caching and logging.
Args:
endpoint: API endpoint
method: HTTP method
params: Query parameters
data: Request body
source: Data source identifier
data_type: Type of data being requested
Returns:
Tuple of (success, data)
"""
start_time = time.time()
request_id = f"{source}_{endpoint}_{datetime.now().strftime('%Y%m%d%H%M%S')}"
# Log request start
api_logger.log_api_call(
endpoint=endpoint,
method=method,
params=params
)
try:
# Check cache first
cache_key = f"{source}_{endpoint}_{data_type}"
cache_hit, cached_data = cache_manager.load(source, endpoint, data_type)
if cache_hit:
self.metrics['cache_hits'] += 1
duration = time.time() - start_time
api_logger.info(f"Cache hit for {cache_key}")
performance_logger.log_performance_metric(
'cache_hit',
duration,
'seconds',
{'request_id': request_id, 'cache_key': cache_key}
)
return True, cached_data
self.metrics['cache_misses'] += 1
# Make API call
url = f"{self.base_url}/{endpoint.lstrip('/')}"
# Add API key to params if it exists
if self.api_key:
if params is None:
params = {}
params['apikey'] = self.api_key
api_logger.info(f"Added API key to request: {self.api_key[:4]}...")
else:
api_logger.warning("No API key available for request")
api_logger.info(f"Making request to {url} with params: {params}")
response = self.session.request(
method=method,
url=url,
params=params,
json=data
)
response.raise_for_status()
# Process response
response_data = response.json()
duration = time.time() - start_time
# Update metrics
self.metrics['api_calls'] += 1
self.metrics['total_response_time'] += duration
# Save to cache
cache_manager.save(source, endpoint, data_type, response_data)
# Log success
api_logger.log_api_call(
endpoint=endpoint,
method=method,
params=params,
response_time=duration,
status_code=response.status_code
)
performance_logger.log_performance_metric(
'api_response',
duration,
'seconds',
{
'request_id': request_id,
'endpoint': endpoint,
'status_code': response.status_code
}
)
return True, response_data
except requests.exceptions.RequestException as e:
error_info = self._handle_error(e, {
'endpoint': endpoint,
'method': method,
'params': params,
'request_id': request_id
})
return False, error_info
except Exception as e:
error_info = self._handle_error(e, {
'endpoint': endpoint,
'method': method,
'params': params,
'request_id': request_id
})
return False, error_info
def portfolio_operation(self, operation_type: str, input_data: Dict[str, Any]) -> Tuple[bool, Any]:
"""
Execute portfolio operation with logging and monitoring.
Args:
operation_type: Type of portfolio operation
input_data: Input parameters for the operation
Returns:
Tuple of (success, result)
"""
start_time = time.time()
operation_id = f"{operation_type}_{datetime.now().strftime('%Y%m%d%H%M%S')}"
# Log operation start
portfolio_logger.log_portfolio_calculation(
calculation_type=operation_type,
input_data=input_data
)
try:
# Track memory usage before operation
process = psutil.Process()
memory_before = process.memory_info().rss
# Execute operation steps
steps = []
current_step = 1
# Example operation steps (replace with actual implementation)
for step_name in ['validation', 'calculation', 'optimization']:
step_start = time.time()
# Log step start
portfolio_logger.info(f"Step {current_step}: {step_name}")
# Execute step (replace with actual step implementation)
time.sleep(0.1) # Simulated step execution
step_duration = time.time() - step_start
steps.append({
'step': current_step,
'name': step_name,
'duration': step_duration
})
current_step += 1
# Calculate final result
result = {
'operation_id': operation_id,
'steps': steps,
'input_data': input_data
}
# Track memory usage after operation
memory_after = process.memory_info().rss
memory_used = (memory_after - memory_before) / (1024 * 1024) # MB
# Log operation completion
duration = time.time() - start_time
portfolio_logger.log_portfolio_calculation(
calculation_type=operation_type,
input_data=input_data,
output_data=result,
duration=duration
)
# Log performance metrics
performance_logger.log_performance_metric(
'portfolio_operation',
duration,
'seconds',
{
'operation_id': operation_id,
'operation_type': operation_type,
'memory_used_mb': memory_used,
'steps': len(steps)
}
)
return True, result
except Exception as e:
error_info = self._handle_error(e, {
'operation_type': operation_type,
'input_data': input_data,
'operation_id': operation_id
})
return False, error_info
def get_performance_metrics(self) -> Dict[str, Any]:
"""Get current performance metrics."""
return self._log_performance_metrics()
def get_profile(self, ticker: str) -> Optional[List[Dict]]:
"""Get ETF profile data."""
success, data = self.make_request(
endpoint=f"profile/{ticker}",
source="fmp",
data_type="profile"
)
return data if success else None
def get_historical_data(self, ticker: str, timeframe: str = "1d") -> Optional[Dict]:
"""Get historical price data."""
success, data = self.make_request(
endpoint=f"historical-price-full/{ticker}",
params={"timeseries": timeframe},
source="fmp",
data_type="historical"
)
return data if success else None
def get_dividend_history(self, ticker: str) -> Optional[Dict]:
"""Get dividend history data."""
success, data = self.make_request(
endpoint=f"historical-price-full/stock_dividend/{ticker}",
source="fmp",
data_type="dividend_history"
)
return data if success else None
def get_holdings(self, ticker: str) -> Optional[Dict]:
"""Get ETF holdings data."""
success, data = self.make_request(
endpoint=f"etf-holdings/{ticker}",
source="fmp",
data_type="holdings"
)
return data if success else None
def get_data(self, source: str, ticker: str, data_type: str, endpoint: str,
params: Dict = None, force_refresh: bool = False) -> Any:
"""Generic method to get data from any source."""
if params is None:
params = {}
success, data = self.make_request(
endpoint=endpoint,
params=params,
source=source,
data_type=data_type
)
return data if success else None
# Create a singleton instance
api_client = APIClient(base_url="https://financialmodelingprep.com/api/v3", api_key=os.getenv('FMP_API_KEY', ''))

344
ETF_Portal/cache_manager.py Normal file
View File

@ -0,0 +1,344 @@
#!/usr/bin/env python3
"""
Cache Manager for ETF Portal
Handles caching of API responses to reduce API calls and improve response times.
Implements a time-based cache expiration system with detailed logging.
"""
import os
import json
import time
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any, Dict, Optional, Tuple, List
import hashlib
import threading
from concurrent.futures import ThreadPoolExecutor
from .logging_config import cache_logger, performance_logger
from dataclasses import dataclass, asdict
# Constants
CACHE_DIR = Path("cache")
DEFAULT_CACHE_DURATION = 24 * 60 * 60 # 24 hours in seconds
@dataclass
class CacheStats:
"""Cache statistics tracking."""
hits: int = 0
misses: int = 0
total_size: int = 0
last_cleared: Optional[datetime] = None
errors: int = 0
class CacheManager:
"""Manages caching operations for the ETF Portal."""
def __init__(self, cache_dir: str = "cache", cache_duration: int = DEFAULT_CACHE_DURATION):
"""
Initialize the cache manager.
Args:
cache_dir: Directory to store cache files
cache_duration: Cache duration in seconds (24 hours by default)
"""
# Use absolute path for cache directory
self.cache_dir = Path(os.path.abspath(cache_dir))
self.cache_duration = cache_duration
self.stats = CacheStats()
self._lock = threading.Lock()
# Create cache directory if it doesn't exist
self.cache_dir.mkdir(parents=True, exist_ok=True)
cache_logger.info(f"Cache directory: {self.cache_dir}")
cache_logger.info(f"Cache duration: {cache_duration} seconds")
# Load or initialize stats
self._load_stats()
# Log initialization complete
cache_logger.info("Cache manager initialized successfully")
performance_logger.log_performance_metric(
"cache_init",
time.time(),
"timestamp",
{"cache_duration": cache_duration}
)
def _get_cache_path(self, source: str, ticker: str, data_type: str) -> Path:
"""
Generate cache file path.
Args:
source: Data source (e.g., 'fmp', 'yahoo')
ticker: Stock/ETF ticker
data_type: Type of data (e.g., 'profile', 'historical')
Returns:
Path object for the cache file
"""
# Create subdirectory based on source and data type
if source == 'fmp':
subdir = f"FMP_cache/{data_type}"
else:
subdir = f"{source}_cache"
# Create the subdirectory
subdir_path = self.cache_dir / subdir
subdir_path.mkdir(parents=True, exist_ok=True)
# Create filename in format: {ticker}.json
filename = f"{ticker}.json"
cache_path = subdir_path / filename
cache_logger.debug(f"Cache path: {cache_path}")
return cache_path
def _load_stats(self) -> None:
"""Load cache statistics from disk."""
stats_file = self.cache_dir / "cache_stats.json"
if stats_file.exists():
try:
with open(stats_file, 'r') as f:
data = json.load(f)
self.stats = CacheStats(**data)
if self.stats.last_cleared:
self.stats.last_cleared = datetime.fromisoformat(self.stats.last_cleared)
cache_logger.info(f"Loaded cache stats: {asdict(self.stats)}")
except Exception as e:
cache_logger.error(f"Error loading cache stats: {e}")
self.stats = CacheStats()
self.stats.errors += 1
def _save_stats(self) -> None:
"""Save cache statistics to disk."""
stats_file = self.cache_dir / "cache_stats.json"
try:
with open(stats_file, 'w') as f:
stats_dict = asdict(self.stats)
if stats_dict['last_cleared']:
stats_dict['last_cleared'] = stats_dict['last_cleared'].isoformat()
json.dump(stats_dict, f, indent=2)
cache_logger.debug(f"Saved cache stats: {stats_dict}")
except Exception as e:
cache_logger.error(f"Error saving cache stats: {e}")
self.stats.errors += 1
def save(self, source: str, ticker: str, data_type: str, data: Any) -> bool:
"""
Save data to cache.
Args:
source: Data source
ticker: Stock/ETF ticker
data_type: Type of data
data: Data to cache
Returns:
True if save was successful, False otherwise
"""
with self._lock:
try:
start_time = time.time()
cache_path = self._get_cache_path(source, ticker, data_type)
# Log the data being cached
cache_logger.debug(f"Caching data for {source}/{ticker}/{data_type}")
cache_logger.debug(f"Data type: {type(data)}")
if isinstance(data, (list, dict)):
cache_logger.debug(f"Data length: {len(data)}")
# Prepare cache data with timestamp
cache_data = {
'timestamp': datetime.now().isoformat(),
'source': source,
'ticker': ticker,
'type': data_type,
'data': data
}
# Save to cache file
with open(cache_path, 'w') as f:
json.dump(cache_data, f, indent=2)
# Verify the file was written correctly
if not cache_path.exists():
cache_logger.error(f"Cache file was not created: {cache_path}")
return False
file_size = os.path.getsize(cache_path)
if file_size == 0:
cache_logger.error(f"Cache file is empty: {cache_path}")
return False
# Update stats
self.stats.total_size += file_size
self._save_stats()
duration = time.time() - start_time
cache_logger.log_cache_operation(
"save",
f"{source}/{ticker}/{data_type}",
size=file_size
)
performance_logger.log_performance_metric(
"cache_save",
duration,
"seconds",
{"source": source, "ticker": ticker, "type": data_type}
)
return True
except Exception as e:
cache_logger.error(f"Error saving to cache: {e}")
self.stats.errors += 1
return False
def load(self, source: str, ticker: str, data_type: str) -> Tuple[bool, Optional[Any]]:
"""
Load data from cache if valid.
Args:
source: Data source
ticker: Stock/ETF ticker
data_type: Type of data
Returns:
Tuple of (is_valid, data)
"""
with self._lock:
start_time = time.time()
cache_path = self._get_cache_path(source, ticker, data_type)
cache_logger.debug(f"Attempting to load cache for {source}/{ticker}/{data_type}")
cache_logger.debug(f"Cache path: {cache_path}")
if not cache_path.exists():
cache_logger.debug(f"Cache file does not exist: {cache_path}")
cache_logger.log_cache_operation(
"load",
f"{source}/{ticker}/{data_type}",
hit=False
)
self.stats.misses += 1
self._save_stats()
return False, None
try:
with open(cache_path, 'r') as f:
cache_data = json.load(f)
# Check if cache is still valid
timestamp = datetime.fromisoformat(cache_data['timestamp'])
age = datetime.now() - timestamp
cache_logger.debug(f"Cache age: {age.total_seconds()} seconds")
cache_logger.debug(f"Cache duration: {self.cache_duration} seconds")
if age.total_seconds() > self.cache_duration:
cache_logger.debug(f"Cache expired for {source}/{ticker}/{data_type}")
cache_logger.log_cache_operation(
"load",
f"{source}/{ticker}/{data_type}",
hit=False
)
self.stats.misses += 1
self._save_stats()
return False, None
duration = time.time() - start_time
cache_logger.debug(f"Cache hit for {source}/{ticker}/{data_type}")
cache_logger.log_cache_operation(
"load",
f"{source}/{ticker}/{data_type}",
hit=True
)
performance_logger.log_performance_metric(
"cache_load",
duration,
"seconds",
{"source": source, "ticker": ticker, "type": data_type}
)
self.stats.hits += 1
self._save_stats()
return True, cache_data['data']
except Exception as e:
cache_logger.error(f"Error loading from cache: {e}")
self.stats.misses += 1
self.stats.errors += 1
self._save_stats()
return False, None
def clear_expired(self) -> None:
"""Remove expired cache files."""
with self._lock:
try:
cleared_count = 0
for cache_file in self.cache_dir.glob("*.json"):
if cache_file.name == "cache_stats.json":
continue
try:
with open(cache_file, 'r') as f:
cache_data = json.load(f)
timestamp = datetime.fromisoformat(cache_data['timestamp'])
age = datetime.now() - timestamp
if age.total_seconds() > self.cache_duration:
self.stats.total_size -= os.path.getsize(cache_file)
cache_file.unlink()
cleared_count += 1
cache_logger.debug(f"Removed expired cache: {cache_file}")
except Exception as e:
cache_logger.error(f"Error processing cache file {cache_file}: {e}")
self.stats.errors += 1
if cleared_count > 0:
cache_logger.info(f"Cleared {cleared_count} expired cache files")
self.stats.last_cleared = datetime.now()
self._save_stats()
except Exception as e:
cache_logger.error(f"Error clearing expired cache: {e}")
self.stats.errors += 1
def get_stats(self) -> Dict[str, Any]:
"""
Get cache statistics.
Returns:
Dictionary containing cache statistics
"""
with self._lock:
stats = asdict(self.stats)
if stats['last_cleared']:
stats['last_cleared'] = stats['last_cleared'].isoformat()
# Add additional stats
stats['cache_files'] = len(list(self.cache_dir.glob("*.json"))) - 1 # Exclude stats file
stats['hit_rate'] = (self.stats.hits / (self.stats.hits + self.stats.misses)) if (self.stats.hits + self.stats.misses) > 0 else 0
stats['total_size_mb'] = self.stats.total_size / (1024 * 1024)
return stats
def clear_all(self) -> None:
"""Clear all cache files."""
with self._lock:
try:
for cache_file in self.cache_dir.glob("*.json"):
if cache_file.name == "cache_stats.json":
continue
cache_file.unlink()
self.stats = CacheStats()
self._save_stats()
cache_logger.info("Cleared all cache files")
except Exception as e:
cache_logger.error(f"Error clearing all cache: {e}")
self.stats.errors += 1
# Create a singleton instance
cache_manager = CacheManager()

View File

@ -0,0 +1,92 @@
#!/usr/bin/env python3
"""
Cache System Simulation
Demonstrates how the cache system works with API calls and cached data.
"""
import time
from ETF_Portal.cache_manager import cache_manager
import logging
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def simulate_api_call(source: str, ticker: str, data_type: str) -> dict:
"""
Simulate an API call with a delay.
Args:
source: Data source (e.g., 'fmp', 'yahoo')
ticker: Stock/ETF ticker
data_type: Type of data (e.g., 'profile', 'historical')
Returns:
Simulated API response data
"""
logger.info(f"Making API call to {source} for {ticker} {data_type}")
time.sleep(1) # Simulate API delay
return {
"ticker": ticker,
"source": source,
"type": data_type,
"data": f"Simulated data for {ticker} from {source}",
"timestamp": time.time()
}
def get_data(source: str, ticker: str, data_type: str) -> dict:
"""
Get data either from cache or API.
Args:
source: Data source
ticker: Stock/ETF ticker
data_type: Type of data
Returns:
Data from either cache or API
"""
# Try to load from cache first
is_valid, cached_data = cache_manager.load(source, ticker, data_type)
if is_valid:
logger.info(f"Cache HIT: Found valid data for {ticker} in cache")
return cached_data
# If not in cache or expired, fetch from API
logger.info(f"Cache MISS: Fetching data for {ticker} from API")
data = simulate_api_call(source, ticker, data_type)
# Save to cache
cache_manager.save(source, ticker, data_type, data)
return data
def run_simulation():
"""Run a simulation of the cache system."""
# First request - should be a cache miss
logger.info("\n=== First Request ===")
data1 = get_data('fmp', 'SPY', 'profile')
print(f"Data received: {data1}")
# Second request - should be a cache hit
logger.info("\n=== Second Request ===")
data2 = get_data('fmp', 'SPY', 'profile')
print(f"Data received: {data2}")
# Request different data - should be a cache miss
logger.info("\n=== Different Data Request ===")
data3 = get_data('fmp', 'QQQ', 'profile')
print(f"Data received: {data3}")
# Show cache statistics
logger.info("\n=== Cache Statistics ===")
stats = cache_manager.get_stats()
print(f"Cache hits: {stats['hits']}")
print(f"Cache misses: {stats['misses']}")
print(f"Hit rate: {stats['hit_rate']:.2%}")
print(f"Total cache size: {stats['total_size']} bytes")
print(f"Number of cache files: {stats['cache_files']}")
if __name__ == "__main__":
run_simulation()

547
ETF_Portal/cli.py Normal file
View File

@ -0,0 +1,547 @@
#!/usr/bin/env python3
"""
ETF Portal CLI
A command-line interface for managing the ETF Portal application.
"""
import click
import subprocess
import webbrowser
import time
import threading
import os
import sys
import socket
import signal
import json
import psutil
import logging
from pathlib import Path
from typing import List, Dict, Optional, Tuple
from datetime import datetime
# Path configurations
WORKSPACE_PATH = Path(__file__).parent.parent
LAUNCHER_PATH = WORKSPACE_PATH / "ETF_Suite_Launcher.py"
PORTFOLIO_BUILDER_PATH = WORKSPACE_PATH / "pages" / "ETF_Portfolio_Builder.py"
ANALYZER_PATH = WORKSPACE_PATH / "pages" / "ETF_Analyzer.py"
CONFIG_DIR = WORKSPACE_PATH / "config"
CONFIG_FILE = CONFIG_DIR / "etf_suite_config.json"
LOGS_DIR = WORKSPACE_PATH / "logs"
# Default port settings
DEFAULT_PORTS = {
"launcher": 8500,
"portfolio_builder": 8501,
"analyzer": 8502
}
# Process tracking
active_processes = {}
# Setup logging
def setup_logging():
"""Set up logging configuration."""
LOGS_DIR.mkdir(exist_ok=True)
log_file = LOGS_DIR / f"etf_portal_{datetime.now().strftime('%Y%m%d')}.log"
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(log_file),
logging.StreamHandler()
]
)
return logging.getLogger("etf_portal")
logger = setup_logging()
def ensure_config_exists():
"""Ensure config directory and file exist."""
CONFIG_DIR.mkdir(exist_ok=True)
if not CONFIG_FILE.exists():
with open(CONFIG_FILE, 'w') as f:
json.dump({
"ports": DEFAULT_PORTS,
"streamlit_path": os.path.join(sys.prefix, "bin", "streamlit")
}, f, indent=2)
def get_config():
"""Get the configuration from the config file."""
ensure_config_exists()
with open(CONFIG_FILE, 'r') as f:
return json.load(f)
def update_config(key, value):
"""Update a specific configuration value."""
config = get_config()
# Handle nested configuration like ports.launcher
if '.' in key:
main_key, sub_key = key.split('.', 1)
if main_key not in config:
config[main_key] = {}
config[main_key][sub_key] = value
else:
config[key] = value
with open(CONFIG_FILE, 'w') as f:
json.dump(config, f, indent=2)
def cleanup_streamlit_processes():
"""Kill any existing Streamlit processes to prevent conflicts."""
click.echo("Cleaning up existing Streamlit processes...")
logger.info("Cleaning up existing Streamlit processes")
try:
config = get_config()
ports = config["ports"]
processed_pids = set() # Track PIDs we've already handled
# First, find and kill processes using our target ports
for port in ports.values():
try:
# Find process using the port
cmd = f"lsof -i :{port} | grep LISTEN | awk '{{print $2}}'"
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
if result.stdout.strip():
pids = result.stdout.strip().split('\n')
for pid in pids:
pid = int(pid)
if pid not in processed_pids:
try:
os.kill(pid, signal.SIGTERM)
logger.info(f"Terminated process {pid} using port {port}")
processed_pids.add(pid)
except ProcessLookupError:
pass
except Exception as e:
logger.error(f"Error cleaning up port {port}: {e}")
# Then find and kill any remaining Streamlit processes
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
try:
if proc.info['pid'] not in processed_pids and 'streamlit' in ' '.join(proc.info['cmdline'] or []).lower():
proc.terminate()
logger.info(f"Terminated Streamlit process {proc.info['pid']}")
processed_pids.add(proc.info['pid'])
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
pass
# Give processes time to shut down
time.sleep(2)
# Force kill any remaining processes
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
try:
if proc.info['pid'] not in processed_pids and 'streamlit' in ' '.join(proc.info['cmdline'] or []).lower():
proc.kill()
logger.info(f"Force killed Streamlit process {proc.info['pid']}")
processed_pids.add(proc.info['pid'])
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
pass
except Exception as e:
logger.error(f"Error during cleanup: {e}")
click.echo(f"Warning during cleanup: {e}")
def port_is_available(port):
"""Check if a port is available."""
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(("127.0.0.1", port))
return True
except socket.error:
return False
def open_browser(url, delay=3):
"""Open browser after a delay to ensure app is running."""
time.sleep(delay)
click.echo(f"Opening browser to {url}")
webbrowser.open(url)
def start_component(component: str, open_browser_tab=True, background=False) -> Optional[subprocess.Popen]:
"""Start a specific component of the ETF Portal."""
config = get_config()
streamlit_path = config.get("streamlit_path", os.path.join(sys.prefix, "bin", "streamlit"))
ports = config["ports"]
# Check if streamlit exists at the specified path
if not os.path.exists(streamlit_path):
error_msg = f"ERROR: Streamlit not found at {streamlit_path}"
logger.error(error_msg)
click.echo(error_msg)
click.echo("Please install streamlit or update the streamlit_path in config")
return None
# Component-specific configurations
component_configs = {
"launcher": {
"path": LAUNCHER_PATH,
"port": ports["launcher"],
},
"portfolio_builder": {
"path": PORTFOLIO_BUILDER_PATH,
"port": ports["portfolio_builder"],
},
"analyzer": {
"path": ANALYZER_PATH,
"port": ports["analyzer"],
}
}
if component not in component_configs:
error_msg = f"Unknown component: {component}"
logger.error(error_msg)
click.echo(error_msg)
return None
component_config = component_configs[component]
port = component_config["port"]
if not port_is_available(port):
error_msg = f"ERROR: Port {port} is in use."
logger.error(error_msg)
click.echo(error_msg)
click.echo(f"Please stop existing service on port {port} or configure a different port.")
return None
log_file = None
if background:
# Create log file for background process
LOGS_DIR.mkdir(exist_ok=True)
log_file = LOGS_DIR / f"{component}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
logger.info(f"Starting {component} in background mode. Logs will be written to {log_file}")
click.echo(f"Starting {component} on port {port}..." + (" (background mode)" if background else ""))
# Prepare command
cmd = [
streamlit_path, "run",
str(component_config["path"]),
"--server.port", str(port),
"--server.fileWatcherType", "none" # Disable file watcher to prevent inotify issues
]
# Launch the component
if background:
# In background mode, redirect output to log file
with open(log_file, 'w') as log:
process = subprocess.Popen(
cmd,
stdout=log,
stderr=log,
# Make the process independent of the parent
start_new_session=True
)
else:
# In foreground mode, just run normally
process = subprocess.Popen(cmd)
# Store process for tracking
active_processes[component] = {
"process": process,
"port": port,
"pid": process.pid,
"background": background
}
# Open browser pointing to the component
if open_browser_tab:
thread = threading.Thread(
target=open_browser,
args=(f"http://localhost:{port}",)
)
thread.daemon = True
thread.start()
# If running in background, we don't need to wait
if background:
logger.info(f"Started {component} in background mode (PID: {process.pid})")
# Give it a moment to start
time.sleep(1)
# Check if the process is still running
if process.poll() is not None:
error_msg = f"Error: {component} failed to start in background mode"
logger.error(error_msg)
click.echo(error_msg)
return None
return process
def get_streamlit_processes() -> List[Dict]:
"""Get a list of running Streamlit processes."""
processes = []
seen_ports = set() # Track ports we've seen to avoid duplicates
for proc in psutil.process_iter(['pid', 'name', 'cmdline', 'create_time']):
try:
cmdline = proc.info['cmdline']
if not cmdline or 'streamlit' not in ' '.join(cmdline).lower():
continue
port = None
component = None
# Extract port from command line
for i, arg in enumerate(cmdline):
if arg == '--server.port' and i + 1 < len(cmdline):
port = cmdline[i + 1]
# Skip if we've already seen this port (likely a duplicate)
if port in seen_ports:
continue
seen_ports.add(port)
# Identify which component based on file path
for i, arg in enumerate(cmdline):
if arg == 'run' and i + 1 < len(cmdline):
path = cmdline[i + 1]
if 'ETF_Suite_Launcher.py' in path:
component = 'launcher'
elif 'ETF_Portfolio_Builder.py' in path:
component = 'portfolio_builder'
elif 'ETF_Analyzer.py' in path:
component = 'analyzer'
# Only add processes that have a valid port
if port:
processes.append({
'pid': proc.info['pid'],
'port': port,
'component': component,
'cmdline': ' '.join(cmdline),
'create_time': proc.info['create_time']
})
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
# Sort by creation time (newest first)
processes.sort(key=lambda x: x['create_time'], reverse=True)
return processes
def stop_component(component=None, pid=None):
"""Stop a specific component or Streamlit process."""
if pid:
try:
process = psutil.Process(pid)
process.terminate()
try:
process.wait(timeout=5)
except psutil.TimeoutExpired:
process.kill()
click.echo(f"Stopped process with PID {pid}")
logger.info(f"Stopped process with PID {pid}")
return True
except psutil.NoSuchProcess:
click.echo(f"No process found with PID {pid}")
return False
elif component:
# Check active tracked processes first
if component in active_processes:
process_info = active_processes[component]
try:
process = process_info["process"]
process.terminate()
try:
process.wait(timeout=5)
except subprocess.TimeoutExpired:
process.kill()
click.echo(f"Stopped {component}")
logger.info(f"Stopped {component}")
del active_processes[component]
return True
except Exception:
pass
# Fall back to finding by component name in running processes
processes = get_streamlit_processes()
for proc in processes:
if proc['component'] == component:
return stop_component(pid=proc['pid'])
click.echo(f"No running {component} process found")
return False
@click.group()
def cli():
"""ETF Portal CLI - Manage the ETF Portal application."""
pass
@cli.command()
@click.option('--component', type=click.Choice(['launcher', 'portfolio_builder', 'analyzer', 'all']),
default='launcher', help='Component to start')
@click.option('--no-browser', is_flag=True, help="Don't open browser automatically")
@click.option('--background', is_flag=True, help="Run in background mode (daemon)")
def start(component, no_browser, background):
"""Start ETF Portal components."""
if component == 'all':
# Start launcher first, it will manage the other components
process = start_component('launcher', not no_browser, background)
if not process:
return
else:
process = start_component(component, not no_browser, background)
if not process:
return
click.echo(f"Started {component}" + (" in background mode" if background else ""))
# In background mode, we just return immediately
if background:
return
# In foreground mode, wait for the process
click.echo("Press Ctrl+C to exit")
# Keep running until interrupted
try:
if component == 'all' or component == 'launcher':
process.wait()
else:
# For individual components, we'll just exit
return
except KeyboardInterrupt:
click.echo("\nShutting down...")
if component == 'all':
stop_component('launcher')
else:
stop_component(component)
@cli.command()
@click.option('--component', type=click.Choice(['launcher', 'portfolio_builder', 'analyzer', 'all']),
default='all', help='Component to stop')
@click.option('--pid', type=int, help='Process ID to stop')
def stop(component, pid):
"""Stop ETF Portal components."""
if pid:
stop_component(pid=pid)
elif component == 'all':
cleanup_streamlit_processes()
click.echo("Stopped all ETF Portal components")
logger.info("Stopped all ETF Portal components")
else:
stop_component(component)
@cli.command()
@click.option('--component', type=click.Choice(['launcher', 'portfolio_builder', 'analyzer', 'all']),
default='all', help='Component to restart')
@click.option('--no-browser', is_flag=True, help="Don't open browser automatically")
@click.option('--background', is_flag=True, help="Run in background mode (daemon)")
def restart(component, no_browser, background):
"""Restart ETF Portal components (stop and then start)."""
# First stop the components
if component == 'all':
cleanup_streamlit_processes()
click.echo("Stopped all ETF Portal components")
logger.info("Stopped all ETF Portal components")
else:
stop_component(component)
# Give processes time to fully shut down
time.sleep(2)
# Then start them again
if component == 'all':
# Start launcher first, it will manage the other components
process = start_component('launcher', not no_browser, background)
if not process:
return
else:
process = start_component(component, not no_browser, background)
if not process:
return
click.echo(f"Restarted {component}" + (" in background mode" if background else ""))
# In background mode, we just return immediately
if background:
return
# In foreground mode, wait for the process
click.echo("Press Ctrl+C to exit")
# Keep running until interrupted
try:
if component == 'all' or component == 'launcher':
process.wait()
else:
# For individual components, we'll just exit
return
except KeyboardInterrupt:
click.echo("\nShutting down...")
if component == 'all':
stop_component('launcher')
else:
stop_component(component)
@cli.command()
def status():
"""Check status of ETF Portal components."""
processes = get_streamlit_processes()
if not processes:
click.echo("No ETF Portal processes are currently running.")
return
click.echo("Running ETF Portal processes:")
for i, proc in enumerate(processes):
component = proc['component'] or 'unknown'
port = proc['port'] or 'unknown'
click.echo(f"{i+1}. {component.upper()} (PID: {proc['pid']}, Port: {port})")
@cli.command()
@click.option('--key', required=True, help='Configuration key to update (e.g., ports.launcher)')
@click.option('--value', required=True, help='New value')
def config(key, value):
"""View or update configuration."""
try:
# Convert value to integer if possible
try:
value = int(value)
except ValueError:
pass
update_config(key, value)
click.echo(f"Updated {key} to {value}")
logger.info(f"Updated configuration: {key}={value}")
except Exception as e:
error_msg = f"Error updating configuration: {e}"
logger.error(error_msg)
click.echo(error_msg)
@cli.command()
def logs():
"""Show recent logs from ETF Portal components."""
LOGS_DIR.mkdir(exist_ok=True)
log_files = sorted(LOGS_DIR.glob("*.log"), key=os.path.getmtime, reverse=True)
if not log_files:
click.echo("No log files found.")
return
click.echo("Recent log files:")
for i, log_file in enumerate(log_files[:5]): # Show 5 most recent logs
size = os.path.getsize(log_file) / 1024 # Size in KB
modified = datetime.fromtimestamp(os.path.getmtime(log_file)).strftime('%Y-%m-%d %H:%M:%S')
click.echo(f"{i+1}. {log_file.name} ({size:.1f} KB, last modified: {modified})")
# Show most recent log contents
if log_files:
most_recent = log_files[0]
click.echo(f"\nMost recent log ({most_recent.name}):")
try:
# Show last 20 lines of the most recent log
with open(most_recent, 'r') as f:
lines = f.readlines()
for line in lines[-20:]:
click.echo(line.strip())
except Exception as e:
click.echo(f"Error reading log file: {e}")
def main():
"""Main entry point for the CLI."""
cli()

View File

@ -0,0 +1,199 @@
#!/usr/bin/env python3
"""
Enhanced Logging Configuration for ETF Portal
Provides centralized logging configuration with component-specific logging,
log rotation, and structured logging formats.
"""
import logging
import logging.handlers
from pathlib import Path
from datetime import datetime
from typing import Optional, Dict, Any
import json
import sys
import os
# Constants
LOGS_DIR = Path("logs")
MAX_LOG_SIZE = 10 * 1024 * 1024 # 10MB
BACKUP_COUNT = 5
# Log formats
DETAILED_FORMAT = '%(asctime)s - %(name)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s'
SIMPLE_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
# Component configurations
COMPONENTS = {
'api': {
'name': 'API',
'log_level': logging.INFO,
'file_prefix': 'api',
'categories': ['requests', 'responses', 'errors']
},
'cache': {
'name': 'Cache',
'log_level': logging.INFO,
'file_prefix': 'cache',
'categories': ['hits', 'misses', 'cleanup']
},
'portfolio': {
'name': 'Portfolio',
'log_level': logging.INFO,
'file_prefix': 'portfolio',
'categories': ['calculations', 'allocations', 'optimizations']
},
'performance': {
'name': 'Performance',
'log_level': logging.INFO,
'file_prefix': 'performance',
'categories': ['response_times', 'resource_usage']
}
}
class ComponentLogger:
"""Manages logging for a specific component."""
def __init__(self, component: str):
"""
Initialize component logger.
Args:
component: Component name (must be in COMPONENTS)
"""
if component not in COMPONENTS:
raise ValueError(f"Unknown component: {component}")
self.component = component
self.config = COMPONENTS[component]
self.logger = logging.getLogger(f"etf_portal.{component}")
self.logger.setLevel(self.config['log_level'])
# Create component-specific log directory
self.log_dir = LOGS_DIR / component
self.log_dir.mkdir(parents=True, exist_ok=True)
# Setup handlers
self._setup_handlers()
# Log initialization
self.logger.info(f"Initialized {self.config['name']} logger")
def _setup_handlers(self):
"""Setup logging handlers for the component."""
# Remove any existing handlers
self.logger.handlers = []
# Create formatters
detailed_formatter = logging.Formatter(DETAILED_FORMAT)
simple_formatter = logging.Formatter(SIMPLE_FORMAT)
# Main log file with rotation
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
log_file = self.log_dir / f"{self.config['file_prefix']}_{timestamp}.log"
file_handler = logging.handlers.RotatingFileHandler(
log_file,
maxBytes=MAX_LOG_SIZE,
backupCount=BACKUP_COUNT
)
file_handler.setFormatter(detailed_formatter)
file_handler.setLevel(logging.DEBUG)
self.logger.addHandler(file_handler)
# Error log file
error_file = self.log_dir / f"{self.config['file_prefix']}_error_{timestamp}.log"
error_handler = logging.handlers.RotatingFileHandler(
error_file,
maxBytes=MAX_LOG_SIZE,
backupCount=BACKUP_COUNT
)
error_handler.setFormatter(detailed_formatter)
error_handler.setLevel(logging.ERROR)
self.logger.addHandler(error_handler)
# Console handler
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(simple_formatter)
console_handler.setLevel(logging.INFO)
self.logger.addHandler(console_handler)
def log_api_call(self, endpoint: str, method: str, params: Dict = None,
response_time: float = None, status_code: int = None):
"""Log API call details."""
log_data = {
'endpoint': endpoint,
'method': method,
'params': params,
'response_time': response_time,
'status_code': status_code
}
self.logger.info(f"API Call: {json.dumps(log_data)}")
def log_cache_operation(self, operation: str, key: str, hit: bool = None,
size: int = None, ttl: int = None):
"""Log cache operation details."""
log_data = {
'operation': operation,
'key': key,
'hit': hit,
'size': size,
'ttl': ttl
}
self.logger.info(f"Cache Operation: {json.dumps(log_data)}")
def log_portfolio_calculation(self, calculation_type: str,
input_data: Dict = None,
output_data: Dict = None,
duration: float = None):
"""Log portfolio calculation details."""
log_data = {
'type': calculation_type,
'input': input_data,
'output': output_data,
'duration': duration
}
self.logger.info(f"Portfolio Calculation: {json.dumps(log_data)}")
def log_performance_metric(self, metric_name: str, value: float,
unit: str = None, context: Dict = None):
"""Log performance metric details."""
log_data = {
'metric': metric_name,
'value': value,
'unit': unit,
'context': context,
'timestamp': datetime.now().isoformat()
}
self.logger.info(f"Performance Metric: {json.dumps(log_data)}")
def setup_logging():
"""Initialize the logging system."""
# Create logs directory
LOGS_DIR.mkdir(parents=True, exist_ok=True)
# Create component loggers
loggers = {}
for component in COMPONENTS:
loggers[component] = ComponentLogger(component)
return loggers
# Create loggers for all components
loggers = setup_logging()
# Export component loggers
api_logger = loggers['api'].logger
cache_logger = loggers['cache'].logger
portfolio_logger = loggers['portfolio'].logger
performance_logger = loggers['performance'].logger
# Add performance metric logging methods to loggers
def add_performance_logging(logger):
"""Add performance metric logging methods to a logger."""
logger.log_performance_metric = lambda metric_name, value, unit=None, context=None: \
loggers['performance'].log_performance_metric(metric_name, value, unit, context)
# Add performance logging to all loggers
for logger in [api_logger, cache_logger, portfolio_logger, performance_logger]:
add_performance_logging(logger)

View File

@ -0,0 +1,337 @@
"""
Data Service for ETF data retrieval
"""
import os
import json
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from typing import Dict, Optional
import yfinance as yf
import logging
from pathlib import Path
logger = logging.getLogger(__name__)
class DataService:
"""Service for retrieving ETF data with fallback logic"""
def __init__(self):
# Use existing cache structure
self.base_dir = Path(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
self.cache_dir = self.base_dir / 'cache'
self.yf_cache_dir = self.cache_dir / 'yfinance_cache'
self.fmp_cache_dir = self.cache_dir / 'FMP_cache'
self.fmp_profiles_dir = self.fmp_cache_dir / 'etf_profiles'
self.fmp_historical_dir = self.fmp_cache_dir / 'historical_data'
self.fmp_holdings_dir = self.fmp_cache_dir / 'etf_holdings'
self.cache_timeout = timedelta(hours=1)
def get_etf_data(self, ticker: str) -> Dict:
"""Get ETF data using fallback logic:
1. Try FMP cache
2. Try FMP API
3. Try yfinance cache
4. Try yfinance
5. Use high yield estimates
"""
try:
# Try FMP cache first
fmp_cached_data = self._get_from_fmp_cache(ticker)
if fmp_cached_data:
logger.info(f"Using FMP cached data for {ticker}")
return fmp_cached_data
# Try FMP API
fmp_data = self._get_from_fmp(ticker)
if fmp_data:
logger.info(f"Using FMP data for {ticker}")
self._save_to_fmp_cache(ticker, fmp_data)
return fmp_data
# Try yfinance cache
yf_cached_data = self._get_from_yf_cache(ticker)
if yf_cached_data:
logger.info(f"Using yfinance cached data for {ticker}")
return yf_cached_data
# Try yfinance
yf_data = self._get_from_yfinance(ticker)
if yf_data:
logger.info(f"Using yfinance data for {ticker}")
self._save_to_yf_cache(ticker, yf_data)
return yf_data
# Use high yield estimates
logger.warning(f"Using high yield estimates for {ticker}")
return self._get_high_yield_estimates(ticker)
except Exception as e:
logger.error(f"Error fetching data for {ticker}: {str(e)}")
return self._get_high_yield_estimates(ticker)
def _get_from_fmp_cache(self, ticker: str) -> Optional[Dict]:
"""Get data from FMP cache if available and not expired"""
# Check profile cache
profile_file = self.fmp_profiles_dir / f"{ticker}.json"
if not profile_file.exists():
return None
try:
with open(profile_file, 'r') as f:
profile_data = json.load(f)
# Check if cache is expired
cache_time = datetime.fromisoformat(profile_data['timestamp'])
if datetime.now() - cache_time > self.cache_timeout:
return None
# Get historical data
hist_file = self.fmp_historical_dir / f"{ticker}.json"
if hist_file.exists():
with open(hist_file, 'r') as f:
hist_data = json.load(f)
else:
hist_data = {}
# Get holdings data
holdings_file = self.fmp_holdings_dir / f"{ticker}.json"
if holdings_file.exists():
with open(holdings_file, 'r') as f:
holdings_data = json.load(f)
else:
holdings_data = {}
# Combine all data
return {
'info': profile_data['data'],
'hist': hist_data.get('data', {}),
'holdings': holdings_data.get('data', {}),
'volatility': profile_data['data'].get('volatility', 0.0),
'max_drawdown': profile_data['data'].get('maxDrawdown', 0.0),
'sharpe_ratio': profile_data['data'].get('sharpeRatio', 0.0),
'sortino_ratio': profile_data['data'].get('sortinoRatio', 0.0),
'dividend_trend': profile_data['data'].get('dividendTrend', 0.0),
'age_years': profile_data['data'].get('ageYears', 0.0),
'is_new': profile_data['data'].get('ageYears', 0.0) < 2
}
except Exception as e:
logger.warning(f"Error reading FMP cache for {ticker}: {str(e)}")
return None
def _get_from_yf_cache(self, ticker: str) -> Optional[Dict]:
"""Get data from yfinance cache if available and not expired"""
cache_file = self.yf_cache_dir / f"{ticker}_data.json"
if not cache_file.exists():
return None
try:
with open(cache_file, 'r') as f:
data = json.load(f)
# Check if cache is expired
cache_time = datetime.fromisoformat(data['timestamp'])
if datetime.now() - cache_time > self.cache_timeout:
return None
return data['data']
except Exception as e:
logger.warning(f"Error reading yfinance cache for {ticker}: {str(e)}")
return None
def _save_to_fmp_cache(self, ticker: str, data: Dict):
"""Save data to FMP cache"""
try:
# Save profile data
profile_data = {
'timestamp': datetime.now().isoformat(),
'data': data['info']
}
profile_file = self.fmp_profiles_dir / f"{ticker}.json"
with open(profile_file, 'w') as f:
json.dump(profile_data, f)
# Save historical data
if 'hist' in data:
hist_data = {
'timestamp': datetime.now().isoformat(),
'data': data['hist']
}
hist_file = self.fmp_historical_dir / f"{ticker}.json"
with open(hist_file, 'w') as f:
json.dump(hist_data, f)
# Save holdings data
if 'holdings' in data:
holdings_data = {
'timestamp': datetime.now().isoformat(),
'data': data['holdings']
}
holdings_file = self.fmp_holdings_dir / f"{ticker}.json"
with open(holdings_file, 'w') as f:
json.dump(holdings_data, f)
except Exception as e:
logger.warning(f"Error saving FMP cache for {ticker}: {str(e)}")
def _save_to_yf_cache(self, ticker: str, data: Dict):
"""Save data to yfinance cache"""
try:
cache_data = {
'timestamp': datetime.now().isoformat(),
'data': data
}
cache_file = self.yf_cache_dir / f"{ticker}_data.json"
with open(cache_file, 'w') as f:
json.dump(cache_data, f)
except Exception as e:
logger.warning(f"Error saving yfinance cache for {ticker}: {str(e)}")
def _get_from_fmp(self, ticker: str) -> Optional[Dict]:
"""Get data from FMP API"""
# TODO: Implement FMP API integration
return None
def _get_from_yfinance(self, ticker: str) -> Optional[Dict]:
"""Get data from yfinance"""
try:
yf_ticker = yf.Ticker(ticker)
# Get basic info
info = yf_ticker.info
if not info:
return None
# Get historical data - use 5 years for better calculations
hist = yf_ticker.history(period="5y")
if hist.empty:
return None
# Get current price
current_price = info.get('regularMarketPrice', hist['Close'].iloc[-1])
# Get dividend yield
dividend_yield = info.get('dividendYield', 0) * 100 # Convert to percentage
# Get dividends with proper handling
try:
dividends = yf_ticker.dividends
if dividends is None or dividends.empty:
# Try to get dividend info from info
dividend_rate = info.get('dividendRate', 0)
if dividend_rate > 0:
# Create a synthetic dividend series
annual_dividend = dividend_rate
monthly_dividend = annual_dividend / 12
dividends = pd.Series(monthly_dividend, index=hist.index)
else:
dividends = pd.Series(0, index=hist.index)
except Exception as e:
logger.warning(f"Error getting dividends for {ticker}: {str(e)}")
dividends = pd.Series(0, index=hist.index)
# Calculate metrics with proper annualization
hist['log_returns'] = np.log(hist['Close'] / hist['Close'].shift(1))
returns = hist['log_returns'].dropna()
# Calculate annualized volatility using daily log returns
volatility = returns.std() * np.sqrt(252)
# Calculate max drawdown using rolling window
rolling_max = hist['Close'].rolling(window=252, min_periods=1).max()
daily_drawdown = hist['Close'] / rolling_max - 1.0
max_drawdown = abs(daily_drawdown.min())
# Calculate annualized return
annual_return = returns.mean() * 252
# Calculate Sharpe and Sortino ratios with proper risk-free rate
risk_free_rate = 0.05 # Current 3-month Treasury yield
excess_returns = returns - risk_free_rate/252
# Sharpe Ratio
if volatility > 0:
sharpe_ratio = (annual_return - risk_free_rate) / volatility
else:
sharpe_ratio = 0
# Sortino Ratio
downside_returns = returns[returns < 0]
if len(downside_returns) > 0:
downside_volatility = downside_returns.std() * np.sqrt(252)
if downside_volatility > 0:
sortino_ratio = (annual_return - risk_free_rate) / downside_volatility
else:
sortino_ratio = 0
else:
sortino_ratio = 0
# Calculate dividend trend
if not dividends.empty:
dividend_trend = (dividends.iloc[-1] / dividends.iloc[0]) - 1 if dividends.iloc[0] > 0 else 0
else:
dividend_trend = 0
# Calculate ETF age
if 'firstTradeDateEpochUtc' in info:
age_years = (datetime.now() - datetime.fromtimestamp(info['firstTradeDateEpochUtc'])).days / 365.25
else:
age_years = 0
# Return formatted data
return {
'price': current_price,
'dividend_yield': dividend_yield,
'volatility': volatility,
'max_drawdown': max_drawdown,
'sharpe_ratio': sharpe_ratio,
'sortino_ratio': sortino_ratio,
'dividend_trend': dividend_trend,
'age_years': age_years,
'is_new': age_years < 2,
'info': info,
'hist': hist.to_dict('records'),
'dividends': dividends.to_dict()
}
except Exception as e:
logger.error(f"Error fetching yfinance data for {ticker}: {str(e)}")
return None
def _get_high_yield_estimates(self, ticker: str) -> Dict:
"""Get conservative high yield estimates when no data is available"""
# Determine ETF type based on ticker
if ticker in ['JEPI', 'FEPI', 'MSTY']: # Income ETFs
max_drawdown = 0.10 # 10% for income ETFs
volatility = 0.15 # 15% volatility
sharpe_ratio = 0.8 # Lower Sharpe for income ETFs
sortino_ratio = 1.2 # Higher Sortino for income ETFs
dividend_trend = 0.05 # 5% dividend growth for income ETFs
elif ticker in ['VTI', 'VOO']: # Growth ETFs
max_drawdown = 0.25 # 25% for growth ETFs
volatility = 0.20 # 20% volatility
sharpe_ratio = 1.2 # Higher Sharpe for growth ETFs
sortino_ratio = 1.5 # Higher Sortino for growth ETFs
dividend_trend = 0.10 # 10% dividend growth for growth ETFs
else: # Balanced ETFs
max_drawdown = 0.20 # 20% for balanced ETFs
volatility = 0.18 # 18% volatility
sharpe_ratio = 1.0 # Moderate Sharpe for balanced ETFs
sortino_ratio = 1.3 # Moderate Sortino for balanced ETFs
dividend_trend = 0.07 # 7% dividend growth for balanced ETFs
return {
'info': {},
'hist': {},
'dividends': {},
'volatility': volatility,
'max_drawdown': max_drawdown,
'sharpe_ratio': sharpe_ratio,
'sortino_ratio': sortino_ratio,
'dividend_trend': dividend_trend,
'age_years': 3.0, # Conservative estimate
'is_new': False,
'is_estimated': True # Flag to indicate these are estimates
}

View File

@ -0,0 +1,260 @@
from typing import Dict, Tuple, List, Optional
import pandas as pd
import numpy as np
from scipy import stats
from sklearn.ensemble import RandomForestRegressor
from statsmodels.tsa.arima.model import ARIMA
import logging
logger = logging.getLogger(__name__)
class DividendTrendService:
"""Service for calculating comprehensive dividend trend analysis"""
def __init__(self):
self.min_history_years = 5
self.required_data_points = 60 # 5 years of monthly data
def calculate_dividend_trend(self, etf_data: Dict) -> Dict:
"""
Calculate comprehensive dividend trend analysis
Args:
etf_data: Dictionary containing ETF data including:
- dividends: List of historical dividend payments
- prices: List of historical prices
- metadata: ETF metadata (age, type, etc.)
Returns:
Dictionary containing comprehensive dividend trend analysis
"""
try:
# 1. Basic Data Validation
if not self._validate_data(etf_data):
return self._create_error_response("Insufficient historical data")
# 2. Calculate Theoretical Models
gordon_growth = self._calculate_gordon_growth(etf_data)
ddm_value = self._calculate_ddm(etf_data)
# 3. Calculate Empirical Metrics
empirical_metrics = self._calculate_empirical_metrics(etf_data)
# 4. Calculate Risk Metrics
risk_metrics = self._calculate_risk_metrics(etf_data)
# 5. Generate ML Predictions
ml_predictions = self._generate_ml_predictions(etf_data)
return {
'gordon_growth': gordon_growth,
'ddm_value': ddm_value,
'empirical_metrics': empirical_metrics,
'risk_metrics': risk_metrics,
'ml_predictions': ml_predictions
}
except Exception as e:
logger.error(f"Error calculating dividend trend: {str(e)}")
return self._create_error_response(str(e))
def _validate_data(self, etf_data: Dict) -> bool:
"""Validate that we have sufficient historical data"""
if not etf_data.get('dividends') or not etf_data.get('prices'):
return False
dividends = pd.Series(etf_data['dividends'])
if len(dividends) < self.required_data_points:
return False
return True
def _calculate_gordon_growth(self, etf_data: Dict) -> float:
"""Calculate growth rate using Gordon Growth Model"""
try:
dividends = pd.Series(etf_data['dividends'])
prices = pd.Series(etf_data['prices'])
# Calculate required rate of return (r)
returns = prices.pct_change().dropna()
r = returns.mean() * 12 # Annualized
# Calculate current dividend (D)
D = dividends.iloc[-1]
# Calculate current price (P)
P = prices.iloc[-1]
# Solve for g: P = D/(r-g)
g = r - (D/P)
return g
except Exception as e:
logger.error(f"Error in Gordon Growth calculation: {str(e)}")
return 0.0
def _calculate_ddm(self, etf_data: Dict) -> float:
"""Calculate value using Dividend Discount Model"""
try:
dividends = pd.Series(etf_data['dividends'])
# Calculate growth rate
growth_rate = self._calculate_growth_rate(dividends)
# Calculate discount rate (using CAPM)
discount_rate = self._calculate_discount_rate(etf_data)
# Calculate terminal value
terminal_value = self._calculate_terminal_value(dividends, growth_rate, discount_rate)
# Calculate present value of dividends
present_value = self._calculate_present_value(dividends, discount_rate)
return present_value + terminal_value
except Exception as e:
logger.error(f"Error in DDM calculation: {str(e)}")
return 0.0
def _calculate_empirical_metrics(self, etf_data: Dict) -> Dict:
"""Calculate empirical metrics from historical data"""
try:
dividends = pd.Series(etf_data['dividends'])
# Calculate rolling growth
rolling_growth = self._calculate_rolling_growth(dividends)
# Calculate volatility
volatility = self._calculate_volatility(dividends)
# Calculate autocorrelation
autocorrelation = self._calculate_autocorrelation(dividends)
return {
'rolling_growth': rolling_growth,
'volatility': volatility,
'autocorrelation': autocorrelation
}
except Exception as e:
logger.error(f"Error calculating empirical metrics: {str(e)}")
return {}
def _calculate_risk_metrics(self, etf_data: Dict) -> Dict:
"""Calculate risk metrics"""
try:
dividends = pd.Series(etf_data['dividends'])
prices = pd.Series(etf_data['prices'])
# Calculate coverage ratio
coverage_ratio = self._calculate_coverage_ratio(etf_data)
# Calculate payout ratio
payout_ratio = self._calculate_payout_ratio(etf_data)
# Calculate market correlation
market_correlation = self._calculate_market_correlation(etf_data)
return {
'coverage_ratio': coverage_ratio,
'payout_ratio': payout_ratio,
'market_correlation': market_correlation
}
except Exception as e:
logger.error(f"Error calculating risk metrics: {str(e)}")
return {}
def _generate_ml_predictions(self, etf_data: Dict) -> Dict:
"""Generate machine learning predictions"""
try:
dividends = pd.Series(etf_data['dividends'])
# Generate time series forecast
forecast = self._generate_time_series_forecast(dividends)
# Calculate confidence interval
confidence_interval = self._calculate_confidence_interval(forecast)
return {
'next_year_growth': forecast,
'confidence_interval': confidence_interval
}
except Exception as e:
logger.error(f"Error generating ML predictions: {str(e)}")
return {}
def _create_error_response(self, error_message: str) -> Dict:
"""Create a standardized error response"""
return {
'error': error_message,
'gordon_growth': 0.0,
'ddm_value': 0.0,
'empirical_metrics': {},
'risk_metrics': {},
'ml_predictions': {}
}
# Helper methods for specific calculations
def _calculate_growth_rate(self, dividends: pd.Series) -> float:
"""Calculate dividend growth rate"""
return dividends.pct_change().mean() * 12 # Annualized
def _calculate_discount_rate(self, etf_data: Dict) -> float:
"""Calculate discount rate using CAPM"""
# Implementation needed
return 0.1 # Placeholder
def _calculate_terminal_value(self, dividends: pd.Series, growth_rate: float, discount_rate: float) -> float:
"""Calculate terminal value for DDM"""
# Implementation needed
return 0.0 # Placeholder
def _calculate_present_value(self, dividends: pd.Series, discount_rate: float) -> float:
"""Calculate present value of dividends"""
# Implementation needed
return 0.0 # Placeholder
def _calculate_rolling_growth(self, dividends: pd.Series) -> float:
"""Calculate rolling 12-month growth rate"""
return dividends.pct_change(12).mean()
def _calculate_volatility(self, dividends: pd.Series) -> float:
"""Calculate dividend volatility"""
return dividends.pct_change().std() * np.sqrt(12) # Annualized
def _calculate_autocorrelation(self, dividends: pd.Series) -> float:
"""Calculate autocorrelation of dividend payments"""
return dividends.autocorr()
def _calculate_coverage_ratio(self, etf_data: Dict) -> float:
"""Calculate dividend coverage ratio"""
# Implementation needed
return 0.0 # Placeholder
def _calculate_payout_ratio(self, etf_data: Dict) -> float:
"""Calculate payout ratio"""
# Implementation needed
return 0.0 # Placeholder
def _calculate_market_correlation(self, etf_data: Dict) -> float:
"""Calculate correlation with market returns"""
# Implementation needed
return 0.0 # Placeholder
def _generate_time_series_forecast(self, dividends: pd.Series) -> float:
"""Generate time series forecast using ARIMA"""
try:
model = ARIMA(dividends, order=(1,1,1))
model_fit = model.fit()
forecast = model_fit.forecast(steps=12)
return forecast.mean()
except:
return 0.0
def _calculate_confidence_interval(self, forecast: float) -> Tuple[float, float]:
"""Calculate confidence interval for forecast"""
# Implementation needed
return (0.0, 0.0) # Placeholder

View File

@ -0,0 +1,20 @@
from .service import DRIPService
from .no_drip_service import NoDRIPService, NoDRIPMonthlyData, NoDRIPResult
from .models import DRIPMetrics, DRIPForecastResult, DRIPPortfolioResult, DripConfig
from .exceptions import DRIPError, DataFetchError, CalculationError, ValidationError, CacheError
__all__ = [
'DRIPService',
'NoDRIPService',
'NoDRIPMonthlyData',
'NoDRIPResult',
'DRIPMetrics',
'DRIPForecastResult',
'DRIPPortfolioResult',
'DRIPError',
'DataFetchError',
'CalculationError',
'ValidationError',
'CacheError',
'DripConfig'
]

View File

@ -0,0 +1,19 @@
class DRIPError(Exception):
"""Base exception for DRIP service errors"""
pass
class DataFetchError(DRIPError):
"""Raised when ETF data cannot be fetched"""
pass
class CalculationError(DRIPError):
"""Raised when DRIP calculations fail"""
pass
class ValidationError(DRIPError):
"""Raised when input validation fails"""
pass
class CacheError(DRIPError):
"""Raised when cache operations fail"""
pass

View File

@ -0,0 +1,35 @@
import logging
import os
from pathlib import Path
def get_logger(name: str) -> logging.Logger:
"""Configure and return a logger for the DRIP service"""
logger = logging.getLogger(name)
if not logger.handlers:
logger.setLevel(logging.INFO)
# Create logs directory if it doesn't exist
log_dir = Path("logs")
log_dir.mkdir(parents=True, exist_ok=True)
# File handler
file_handler = logging.FileHandler(log_dir / "drip_service.log")
file_handler.setLevel(logging.INFO)
# Console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
# Formatter
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)
# Add handlers
logger.addHandler(file_handler)
logger.addHandler(console_handler)
return logger

View File

@ -0,0 +1,116 @@
from dataclasses import dataclass, asdict
from typing import Dict, List, Optional
from datetime import datetime
import json
@dataclass
class MonthlyData:
"""Data for a single month in the DRIP simulation"""
month: int
total_value: float
monthly_income: float
cumulative_income: float
shares: Dict[str, float]
prices: Dict[str, float]
yields: Dict[str, float]
@dataclass
class DripConfig:
"""Configuration for DRIP calculations"""
months: int
erosion_type: str
erosion_level: Dict
dividend_frequency: Dict[str, int] = None
def __post_init__(self):
if self.dividend_frequency is None:
self.dividend_frequency = {
"Monthly": 12,
"Quarterly": 4,
"Semi-Annually": 2,
"Annually": 1,
"Unknown": 12 # Default to monthly if unknown
}
@dataclass
class DripResult:
"""Results of a DRIP calculation"""
monthly_data: List[MonthlyData]
final_portfolio_value: float
total_income: float
total_shares: Dict[str, float]
@dataclass
class DRIPMetrics:
"""Metrics for a single ETF's DRIP calculation"""
ticker: str
date: datetime
shares: float
price: float
dividend_yield: float
monthly_dividend: float
new_shares: float
portfolio_value: float
monthly_income: float
yield_on_cost: float
def to_dict(self) -> Dict:
"""Convert the metrics to a dictionary for JSON serialization"""
data = asdict(self)
data['date'] = self.date.isoformat()
return data
@classmethod
def from_dict(cls, data: Dict) -> 'DRIPMetrics':
"""Create a DRIPMetrics instance from a dictionary"""
data = data.copy()
data['date'] = datetime.fromisoformat(data['date'])
return cls(**data)
@dataclass
class DRIPForecastResult:
"""Results of a DRIP forecast for a single ETF"""
ticker: str
initial_shares: float
final_shares: float
initial_value: float
final_value: float
total_income: float
average_yield: float
monthly_metrics: List[DRIPMetrics]
accumulated_cash: float = 0.0 # Added for No-DRIP scenarios
def to_dict(self) -> Dict:
"""Convert the forecast result to a dictionary for JSON serialization"""
data = asdict(self)
data['monthly_metrics'] = [m.to_dict() for m in self.monthly_metrics]
return data
@classmethod
def from_dict(cls, data: Dict) -> 'DRIPForecastResult':
"""Create a DRIPForecastResult instance from a dictionary"""
data = data.copy()
data['monthly_metrics'] = [DRIPMetrics.from_dict(m) for m in data['monthly_metrics']]
return cls(**data)
@dataclass
class DRIPPortfolioResult:
"""Results of a DRIP forecast for an entire portfolio"""
total_value: float
monthly_income: float
total_income: float
etf_results: Dict[str, DRIPForecastResult]
accumulated_cash: float = 0.0 # Added for No-DRIP scenarios
def to_dict(self) -> Dict:
"""Convert the portfolio result to a dictionary for JSON serialization"""
data = asdict(self)
data['etf_results'] = {k: v.to_dict() for k, v in self.etf_results.items()}
return data
@classmethod
def from_dict(cls, data: Dict) -> 'DRIPPortfolioResult':
"""Create a DRIPPortfolioResult instance from a dictionary"""
data = data.copy()
data['etf_results'] = {k: DRIPForecastResult.from_dict(v) for k, v in data['etf_results'].items()}
return cls(**data)

View File

@ -0,0 +1,408 @@
from typing import Dict, List, Optional, Tuple, Any
import pandas as pd
import numpy as np
import logging
import traceback
from dataclasses import dataclass, field
from enum import Enum
from .models import (
MonthlyData,
DripConfig,
DripResult,
DRIPMetrics,
DRIPForecastResult,
DRIPPortfolioResult
)
from ..nav_erosion_service import NavErosionService
# Duplicate necessary classes to avoid circular import
class DistributionFrequency(Enum):
"""Enum for distribution frequencies"""
MONTHLY = ("Monthly", 12)
QUARTERLY = ("Quarterly", 4)
SEMI_ANNUALLY = ("Semi-Annually", 2)
ANNUALLY = ("Annually", 1)
UNKNOWN = ("Unknown", 12)
def __init__(self, name: str, payments_per_year: int):
self.display_name = name
self.payments_per_year = payments_per_year
@dataclass
class TickerData:
"""Data structure for individual ticker information"""
ticker: str
price: float
annual_yield: float
shares: float
allocation_pct: float
distribution_freq: DistributionFrequency
@property
def market_value(self) -> float:
return self.price * self.shares
@property
def monthly_yield(self) -> float:
return self.annual_yield / 12
@property
def distribution_yield(self) -> float:
return self.annual_yield / self.distribution_freq.payments_per_year
@dataclass
class ErosionConfig:
"""Configuration for erosion calculations"""
erosion_type: str
erosion_level: Dict[str, Dict[str, float]] # Changed to match NavErosionService output
# Configure logging
logger = logging.getLogger(__name__)
__all__ = ['NoDRIPService', 'NoDRIPMonthlyData', 'NoDRIPResult']
@dataclass
class NoDRIPMonthlyData:
"""Data for a single month in the No-DRIP simulation"""
month: int
portfolio_value: float # Original shares * current prices
monthly_income: float # Dividends received as cash
cumulative_income: float # Total cash accumulated
prices: Dict[str, float] # Current (eroded) prices
yields: Dict[str, float] # Current (eroded) yields
original_shares: Dict[str, float] # Original shares (constant)
@dataclass
class NoDRIPResult:
"""Results of a No-DRIP calculation"""
monthly_data: List[NoDRIPMonthlyData]
final_portfolio_value: float # Original shares * final prices
total_cash_income: float # All dividends as cash
total_value: float # Portfolio value + cash
original_shares: Dict[str, float] # Original share counts
class NoDRIPService:
"""No-DRIP calculation service - dividends are kept as cash, not reinvested"""
def __init__(self) -> None:
self.MAX_EROSION_LEVEL = 9
self.MAX_MONTHLY_EROSION = 0.05 # 5% monthly max erosion
self.DISTRIBUTION_FREQUENCIES = {freq.display_name: freq for freq in DistributionFrequency}
self.nav_erosion_service = NavErosionService()
def calculate_no_drip_growth(self, portfolio_df: pd.DataFrame, config: DripConfig) -> NoDRIPResult:
"""
Calculate No-DRIP growth for a portfolio over a specified period.
In No-DRIP strategy, dividends are kept as cash and not reinvested.
Args:
portfolio_df: DataFrame containing portfolio allocation
config: DripConfig object with simulation parameters
Returns:
NoDRIPResult object containing the simulation results
"""
try:
# Validate inputs (reuse from DRIP service)
self._validate_inputs(portfolio_df, config)
# Initialize portfolio data
ticker_data = self._initialize_ticker_data(portfolio_df)
# Handle erosion configuration
erosion_config = self._parse_erosion_config(config)
# If erosion is requested but no proper erosion_level is provided, calculate it
if (config.erosion_type != "None" and
(not hasattr(config, 'erosion_level') or
not isinstance(config.erosion_level, dict) or
"per_ticker" not in config.erosion_level)):
logger.info(f"Calculating erosion rates for No-DRIP with erosion type: {config.erosion_type}")
tickers = list(ticker_data.keys())
calculated_erosion = self._calculate_erosion_from_analysis(tickers)
erosion_config = ErosionConfig(
erosion_type=config.erosion_type,
erosion_level=calculated_erosion
)
# Pre-calculate distribution schedule for performance
distribution_schedule = self._create_distribution_schedule(ticker_data, config.months)
# Initialize simulation state (shares remain constant in No-DRIP)
simulation_state = self._initialize_simulation_state(ticker_data)
monthly_data: List[NoDRIPMonthlyData] = []
# Create monthly tracking table
monthly_tracking = []
# Run monthly simulation
for month in range(1, config.months + 1):
# Calculate monthly income from distributions (keep as cash)
monthly_income = self._calculate_monthly_distributions(
month, simulation_state, ticker_data, distribution_schedule
)
# Update cumulative cash income
simulation_state['cumulative_cash'] += monthly_income
# Apply erosion to prices and yields (but NOT to shares)
if erosion_config.erosion_type != "None":
self._apply_monthly_erosion(simulation_state, erosion_config, ticker_data.keys())
# Calculate portfolio value (original shares * current eroded prices)
portfolio_value = sum(
simulation_state['original_shares'][ticker] * simulation_state['current_prices'][ticker]
for ticker in ticker_data.keys()
)
# Total value = portfolio + cash
total_value = portfolio_value + simulation_state['cumulative_cash']
# Add to monthly tracking
monthly_tracking.append({
'Month': month,
'Portfolio Value': portfolio_value,
'Monthly Income': monthly_income,
'Cumulative Income': simulation_state['cumulative_cash'],
'Total Value': total_value,
'Prices': {ticker: simulation_state['current_prices'][ticker] for ticker in ticker_data.keys()},
'Yields': {ticker: simulation_state['current_yields'][ticker] for ticker in ticker_data.keys()}
})
# Create monthly data
monthly_data.append(NoDRIPMonthlyData(
month=month,
portfolio_value=portfolio_value,
monthly_income=monthly_income,
cumulative_income=simulation_state['cumulative_cash'],
prices=simulation_state['current_prices'].copy(),
yields=simulation_state['current_yields'].copy(),
original_shares=simulation_state['original_shares'].copy()
))
# Print monthly tracking table
print("\nMonthly No-DRIP Simulation Results:")
print("=" * 100)
print(f"{'Month':<6} {'Portfolio Value':<15} {'Monthly Income':<15} {'Cumulative Income':<18} {'Total Value':<15}")
print("-" * 100)
for month_data in monthly_tracking:
print(f"{month_data['Month']:<6} ${month_data['Portfolio Value']:<14.2f} ${month_data['Monthly Income']:<14.2f} ${month_data['Cumulative Income']:<17.2f} ${month_data['Total Value']:<14.2f}")
print("=" * 100)
# Calculate final results
return self._create_no_drip_result(monthly_data, simulation_state)
except Exception as e:
logger.error(f"Error calculating No-DRIP growth: {str(e)}")
logger.error(traceback.format_exc())
raise
def _validate_inputs(self, portfolio_df: pd.DataFrame, config: DripConfig) -> None:
"""Validate input parameters (reuse from DRIP service)"""
required_columns = ["Ticker", "Price", "Yield (%)", "Shares"]
missing_columns = [col for col in required_columns if col not in portfolio_df.columns]
if missing_columns:
raise ValueError(f"Missing required columns: {missing_columns}")
if config.months <= 0:
raise ValueError("Months must be positive")
if portfolio_df.empty:
raise ValueError("Portfolio DataFrame is empty")
def _initialize_ticker_data(self, portfolio_df: pd.DataFrame) -> Dict[str, TickerData]:
"""Initialize ticker data with validation (reuse from DRIP service)"""
ticker_data = {}
for _, row in portfolio_df.iterrows():
ticker = row["Ticker"]
# Handle distribution frequency
dist_period = row.get("Distribution Period", "Monthly")
dist_freq = self.DISTRIBUTION_FREQUENCIES.get(dist_period, DistributionFrequency.MONTHLY)
ticker_data[ticker] = TickerData(
ticker=ticker,
price=max(0.01, float(row["Price"])), # Prevent zero/negative prices
annual_yield=max(0.0, float(row["Yield (%)"] / 100)), # Convert to decimal
shares=max(0.0, float(row["Shares"])),
allocation_pct=float(row.get("Allocation (%)", 0) / 100),
distribution_freq=dist_freq
)
return ticker_data
def _parse_erosion_config(self, config: DripConfig) -> ErosionConfig:
"""Parse and validate erosion configuration (reuse from DRIP service)"""
if not hasattr(config, 'erosion_level') or config.erosion_type == "None":
return ErosionConfig(erosion_type="None", erosion_level={})
# Check if erosion_level is already in the correct format
if isinstance(config.erosion_level, dict) and "per_ticker" in config.erosion_level:
return ErosionConfig(
erosion_type=config.erosion_type,
erosion_level=config.erosion_level
)
return ErosionConfig(
erosion_type=config.erosion_type,
erosion_level=config.erosion_level
)
def _calculate_erosion_from_analysis(self, tickers: List[str]) -> Dict:
"""Calculate erosion rates using NavErosionService (reuse from DRIP service)"""
try:
# Use NavErosionService to analyze the tickers
analysis = self.nav_erosion_service.analyze_etf_erosion_risk(tickers)
# Convert to format expected by No-DRIP service
erosion_config = self.nav_erosion_service.convert_to_drip_erosion_config(analysis)
logger.info(f"Calculated erosion rates for No-DRIP tickers: {tickers}")
logger.info(f"Erosion configuration: {erosion_config}")
return erosion_config
except Exception as e:
logger.error(f"Error calculating erosion rates for No-DRIP: {str(e)}")
logger.warning("Falling back to no erosion")
return {"per_ticker": {ticker: {"nav": 0.0, "yield": 0.0} for ticker in tickers}}
def _create_distribution_schedule(self, ticker_data: Dict[str, TickerData], total_months: int) -> Dict[str, List[int]]:
"""Pre-calculate which months each ticker pays distributions (reuse from DRIP service)"""
schedule = {}
for ticker, data in ticker_data.items():
distribution_months = []
freq = data.distribution_freq
for month in range(1, total_months + 1):
if self._is_distribution_month(month, freq):
distribution_months.append(month)
schedule[ticker] = distribution_months
return schedule
def _initialize_simulation_state(self, ticker_data: Dict[str, TickerData]) -> Dict[str, Any]:
"""Initialize simulation state variables"""
return {
'original_shares': {ticker: data.shares for ticker, data in ticker_data.items()}, # Constant
'current_prices': {ticker: data.price for ticker, data in ticker_data.items()},
'current_yields': {ticker: data.annual_yield for ticker, data in ticker_data.items()},
'cumulative_cash': 0.0 # Cash accumulated from dividends
}
def _calculate_monthly_distributions(
self,
month: int,
state: Dict[str, Any],
ticker_data: Dict[str, TickerData],
distribution_schedule: Dict[str, List[int]]
) -> float:
"""Calculate distributions for the current month (reuse logic from DRIP service)"""
monthly_income = 0.0
for ticker, data in ticker_data.items():
if month in distribution_schedule[ticker]:
shares = state['original_shares'][ticker] # Original shares (constant)
price = state['current_prices'][ticker]
yield_rate = state['current_yields'][ticker]
# Calculate distribution amount using current (eroded) values
distribution_yield = yield_rate / data.distribution_freq.payments_per_year
distribution_amount = shares * price * distribution_yield
monthly_income += distribution_amount
# Log distribution calculation
logger.info(f"Month {month} No-DRIP distribution for {ticker}:")
logger.info(f" Shares: {shares:.4f} (constant)")
logger.info(f" Price: ${price:.2f}")
logger.info(f" Yield: {yield_rate:.2%}")
logger.info(f" Distribution: ${distribution_amount:.2f}")
return monthly_income
def _apply_monthly_erosion(
self,
state: Dict[str, Any],
erosion_config: ErosionConfig,
tickers: List[str]
) -> None:
"""Apply monthly erosion to prices and yields (reuse from DRIP service)"""
if erosion_config.erosion_type == "None":
return
# Validate erosion configuration structure
if not isinstance(erosion_config.erosion_level, dict):
logger.warning(f"Invalid erosion_level format: {type(erosion_config.erosion_level)}")
return
per_ticker_data = erosion_config.erosion_level.get("per_ticker", {})
if not per_ticker_data:
logger.warning("No per_ticker erosion data found in erosion_level")
return
for ticker in tickers:
# Get per-ticker erosion rates with fallback
ticker_rates = per_ticker_data.get(ticker, {})
if not ticker_rates:
logger.warning(f"No erosion rates found for ticker {ticker}, skipping erosion")
continue
nav_rate = ticker_rates.get("nav", 0.0) # Monthly rate in decimal form
yield_rate = ticker_rates.get("yield", 0.0) # Monthly rate in decimal form
# Validate rates are reasonable (0 to 5% monthly max)
nav_rate = max(0.0, min(nav_rate, self.MAX_MONTHLY_EROSION))
yield_rate = max(0.0, min(yield_rate, self.MAX_MONTHLY_EROSION))
# Store original values for logging
original_price = state['current_prices'][ticker]
original_yield = state['current_yields'][ticker]
# Apply erosion directly (rates are already monthly)
state['current_prices'][ticker] *= (1 - nav_rate)
state['current_yields'][ticker] *= (1 - yield_rate)
# Ensure prices and yields don't go below reasonable minimums
state['current_prices'][ticker] = max(state['current_prices'][ticker], 0.01)
state['current_yields'][ticker] = max(state['current_yields'][ticker], 0.0)
# Log erosion application
logger.info(f"Applied monthly erosion to {ticker} (No-DRIP):")
logger.info(f" NAV: {nav_rate:.4%} -> Price: ${original_price:.2f} -> ${state['current_prices'][ticker]:.2f}")
logger.info(f" Yield: {yield_rate:.4%} -> Yield: {original_yield:.2%} -> {state['current_yields'][ticker]:.2%}")
def _is_distribution_month(self, month: int, frequency: DistributionFrequency) -> bool:
"""Check if current month is a distribution month (reuse from DRIP service)"""
if frequency == DistributionFrequency.MONTHLY:
return True
elif frequency == DistributionFrequency.QUARTERLY:
return month % 3 == 0
elif frequency == DistributionFrequency.SEMI_ANNUALLY:
return month % 6 == 0
elif frequency == DistributionFrequency.ANNUALLY:
return month % 12 == 0
else:
return True # Default to monthly for unknown
def _create_no_drip_result(self, monthly_data: List[NoDRIPMonthlyData], state: Dict[str, Any]) -> NoDRIPResult:
"""Create final No-DRIP result object"""
if not monthly_data:
raise ValueError("No monthly data generated")
final_data = monthly_data[-1]
return NoDRIPResult(
monthly_data=monthly_data,
final_portfolio_value=final_data.portfolio_value,
total_cash_income=state['cumulative_cash'],
total_value=final_data.portfolio_value + state['cumulative_cash'],
original_shares=state['original_shares'].copy()
)

View File

@ -0,0 +1,735 @@
from typing import Dict, List, Optional, Tuple, Any
import pandas as pd
import numpy as np
import logging
import traceback
from dataclasses import dataclass, field
from enum import Enum
from .models import (
MonthlyData,
DripConfig,
DripResult,
DRIPMetrics,
DRIPForecastResult,
DRIPPortfolioResult
)
from ..nav_erosion_service import NavErosionService
# Configure logging
logger = logging.getLogger(__name__)
__all__ = ['DRIPService', 'DistributionFrequency', 'TickerData', 'ErosionConfig']
class DistributionFrequency(Enum):
"""Enum for distribution frequencies"""
MONTHLY = ("Monthly", 12)
QUARTERLY = ("Quarterly", 4)
SEMI_ANNUALLY = ("Semi-Annually", 2)
ANNUALLY = ("Annually", 1)
UNKNOWN = ("Unknown", 12)
def __init__(self, name: str, payments_per_year: int):
self.display_name = name
self.payments_per_year = payments_per_year
@dataclass
class TickerData:
"""Data structure for individual ticker information"""
ticker: str
price: float
annual_yield: float
shares: float
allocation_pct: float
distribution_freq: DistributionFrequency
@property
def market_value(self) -> float:
return self.price * self.shares
@property
def monthly_yield(self) -> float:
return self.annual_yield / 12
@property
def distribution_yield(self) -> float:
return self.annual_yield / self.distribution_freq.payments_per_year
@dataclass
class ErosionConfig:
"""Configuration for erosion calculations"""
erosion_type: str
erosion_level: Dict[str, Dict[str, float]] # Changed to match NavErosionService output
class DRIPService:
"""Enhanced DRIP calculation service with improved performance and accuracy"""
def __init__(self) -> None:
self.MAX_EROSION_LEVEL = 9
self.MAX_MONTHLY_EROSION = 0.05 # 5% monthly max erosion
self.DISTRIBUTION_FREQUENCIES = {freq.display_name: freq for freq in DistributionFrequency}
self.nav_erosion_service = NavErosionService()
self.no_drip_service = None # Will be initialized when needed to avoid circular import
def calculate_drip_growth(self, portfolio_df: pd.DataFrame, config: DripConfig) -> DripResult:
"""
Calculate DRIP growth for a portfolio over a specified period with enhanced accuracy.
Args:
portfolio_df: DataFrame containing portfolio allocation
config: DripConfig object with simulation parameters
Returns:
DripResult object containing the simulation results
"""
try:
# Validate inputs
self._validate_inputs(portfolio_df, config)
# Initialize portfolio data
ticker_data = self._initialize_ticker_data(portfolio_df)
# Handle erosion configuration
erosion_config = self._parse_erosion_config(config)
# If erosion is requested but no proper erosion_level is provided, calculate it
if (config.erosion_type != "None" and
(not hasattr(config, 'erosion_level') or
not isinstance(config.erosion_level, dict) or
"per_ticker" not in config.erosion_level)):
logger.info(f"Calculating erosion rates for erosion type: {config.erosion_type}")
tickers = list(ticker_data.keys())
calculated_erosion = self.calculate_erosion_from_analysis(tickers)
erosion_config = ErosionConfig(
erosion_type=config.erosion_type,
erosion_level=calculated_erosion
)
# Pre-calculate distribution schedule for performance
distribution_schedule = self._create_distribution_schedule(ticker_data, config.months)
# Initialize simulation state
simulation_state = self._initialize_simulation_state(ticker_data)
monthly_data: List[MonthlyData] = []
# Create monthly tracking table
monthly_tracking = []
# Run monthly simulation
for month in range(1, config.months + 1):
# Calculate monthly income from distributions
monthly_income = self._calculate_monthly_distributions(
month, simulation_state, ticker_data, distribution_schedule
)
# Update cumulative income
simulation_state['cumulative_income'] += monthly_income
# Apply erosion to prices and yields
if erosion_config.erosion_type != "None":
self._apply_monthly_erosion(simulation_state, erosion_config, ticker_data.keys())
# Reinvest dividends (DRIP)
self._reinvest_dividends(month, simulation_state, distribution_schedule)
# Calculate total portfolio value
total_value = sum(
simulation_state['current_shares'][ticker] * simulation_state['current_prices'][ticker]
for ticker in ticker_data.keys()
)
# Add to monthly tracking
monthly_tracking.append({
'Month': month,
'Portfolio Value': total_value,
'Monthly Income': monthly_income,
'Cumulative Income': simulation_state['cumulative_income'],
'Shares': {ticker: simulation_state['current_shares'][ticker] for ticker in ticker_data.keys()},
'Prices': {ticker: simulation_state['current_prices'][ticker] for ticker in ticker_data.keys()},
'Yields': {ticker: simulation_state['current_yields'][ticker] for ticker in ticker_data.keys()}
})
# Create monthly data
monthly_data.append(MonthlyData(
month=month,
total_value=total_value,
monthly_income=monthly_income,
cumulative_income=simulation_state['cumulative_income'],
shares=simulation_state['current_shares'].copy(),
prices=simulation_state['current_prices'].copy(),
yields=simulation_state['current_yields'].copy()
))
# Print monthly tracking table
print("\nMonthly DRIP Simulation Results:")
print("=" * 100)
print(f"{'Month':<6} {'Portfolio Value':<15} {'Monthly Income':<15} {'Cumulative Income':<15} {'Shares':<15}")
print("-" * 100)
for month_data in monthly_tracking:
shares_str = ", ".join([f"{ticker}: {shares:.4f}" for ticker, shares in month_data['Shares'].items()])
print(f"{month_data['Month']:<6} ${month_data['Portfolio Value']:<14.2f} ${month_data['Monthly Income']:<14.2f} ${month_data['Cumulative Income']:<14.2f} {shares_str}")
print("=" * 100)
# Calculate final results
return self._create_drip_result(monthly_data, simulation_state)
except Exception as e:
logger.error(f"Error calculating DRIP growth: {str(e)}")
logger.error(traceback.format_exc())
raise
def _validate_inputs(self, portfolio_df: pd.DataFrame, config: DripConfig) -> None:
"""Validate input parameters"""
required_columns = ["Ticker", "Price", "Yield (%)", "Shares"]
missing_columns = [col for col in required_columns if col not in portfolio_df.columns]
if missing_columns:
raise ValueError(f"Missing required columns: {missing_columns}")
if config.months <= 0:
raise ValueError("Months must be positive")
if portfolio_df.empty:
raise ValueError("Portfolio DataFrame is empty")
def _initialize_ticker_data(self, portfolio_df: pd.DataFrame) -> Dict[str, TickerData]:
"""Initialize ticker data with validation"""
ticker_data = {}
for _, row in portfolio_df.iterrows():
ticker = row["Ticker"]
# Handle distribution frequency
dist_period = row.get("Distribution Period", "Monthly")
dist_freq = self.DISTRIBUTION_FREQUENCIES.get(dist_period, DistributionFrequency.MONTHLY)
ticker_data[ticker] = TickerData(
ticker=ticker,
price=max(0.01, float(row["Price"])), # Prevent zero/negative prices
annual_yield=max(0.0, float(row["Yield (%)"] / 100)), # Convert to decimal
shares=max(0.0, float(row["Shares"])),
allocation_pct=float(row.get("Allocation (%)", 0) / 100),
distribution_freq=dist_freq
)
return ticker_data
def _parse_erosion_config(self, config: DripConfig) -> ErosionConfig:
"""Parse and validate erosion configuration"""
if not hasattr(config, 'erosion_level') or config.erosion_type == "None":
return ErosionConfig(erosion_type="None", erosion_level={})
# Check if erosion_level is already in the correct format
if isinstance(config.erosion_level, dict) and "per_ticker" in config.erosion_level:
return ErosionConfig(
erosion_type=config.erosion_type,
erosion_level=config.erosion_level
)
# If erosion_level is not in the correct format, it might be a NavErosionAnalysis
# or we need to calculate it from scratch
return ErosionConfig(
erosion_type=config.erosion_type,
erosion_level=config.erosion_level
)
def calculate_erosion_from_analysis(self, tickers: List[str]) -> Dict:
"""
Calculate erosion rates using NavErosionService
Args:
tickers: List of ticker symbols to analyze
Returns:
Dict in format expected by _apply_monthly_erosion
"""
try:
# Use NavErosionService to analyze the tickers
analysis = self.nav_erosion_service.analyze_etf_erosion_risk(tickers)
# Convert to format expected by DRIP service
erosion_config = self.nav_erosion_service.convert_to_drip_erosion_config(analysis)
logger.info(f"Calculated erosion rates for tickers: {tickers}")
logger.info(f"Erosion configuration: {erosion_config}")
return erosion_config
except Exception as e:
logger.error(f"Error calculating erosion rates: {str(e)}")
logger.warning("Falling back to no erosion")
return {"per_ticker": {ticker: {"nav": 0.0, "yield": 0.0} for ticker in tickers}}
def _normalize_erosion_rate(self, erosion_level: float) -> float:
"""Convert erosion level (0-9) to monthly rate with validation"""
rate = (erosion_level / self.MAX_EROSION_LEVEL) * self.MAX_MONTHLY_EROSION
return min(max(0.0, rate), self.MAX_MONTHLY_EROSION)
def _create_distribution_schedule(self, ticker_data: Dict[str, TickerData], total_months: int) -> Dict[str, List[int]]:
"""Pre-calculate which months each ticker pays distributions"""
schedule = {}
for ticker, data in ticker_data.items():
distribution_months = []
freq = data.distribution_freq
for month in range(1, total_months + 1):
if self._is_distribution_month(month, freq):
distribution_months.append(month)
schedule[ticker] = distribution_months
return schedule
def _initialize_simulation_state(self, ticker_data: Dict[str, TickerData]) -> Dict[str, Any]:
"""Initialize simulation state variables"""
return {
'current_shares': {ticker: data.shares for ticker, data in ticker_data.items()},
'current_prices': {ticker: data.price for ticker, data in ticker_data.items()},
'current_yields': {ticker: data.annual_yield for ticker, data in ticker_data.items()},
'cumulative_income': 0.0
}
def _calculate_monthly_distributions(
self,
month: int,
state: Dict[str, Any],
ticker_data: Dict[str, TickerData],
distribution_schedule: Dict[str, List[int]]
) -> float:
"""Calculate distributions for the current month"""
monthly_income = 0.0
for ticker, data in ticker_data.items():
if month in distribution_schedule[ticker]:
shares = state['current_shares'][ticker]
price = state['current_prices'][ticker]
yield_rate = state['current_yields'][ticker]
# Calculate distribution amount using current (eroded) values
distribution_yield = yield_rate / data.distribution_freq.payments_per_year
distribution_amount = shares * price * distribution_yield
monthly_income += distribution_amount
# Log distribution calculation
logger.info(f"Month {month} distribution for {ticker}:")
logger.info(f" Shares: {shares:.4f}")
logger.info(f" Price: ${price:.2f}")
logger.info(f" Yield: {yield_rate:.2%}")
logger.info(f" Distribution: ${distribution_amount:.2f}")
return monthly_income
def _apply_monthly_erosion(
self,
state: Dict[str, Any],
erosion_config: ErosionConfig,
tickers: List[str]
) -> None:
"""Apply monthly erosion to prices and yields"""
if erosion_config.erosion_type == "None":
return
# Validate erosion configuration structure
if not isinstance(erosion_config.erosion_level, dict):
logger.warning(f"Invalid erosion_level format: {type(erosion_config.erosion_level)}")
return
per_ticker_data = erosion_config.erosion_level.get("per_ticker", {})
if not per_ticker_data:
logger.warning("No per_ticker erosion data found in erosion_level")
return
for ticker in tickers:
# Get per-ticker erosion rates with fallback
ticker_rates = per_ticker_data.get(ticker, {})
if not ticker_rates:
logger.warning(f"No erosion rates found for ticker {ticker}, skipping erosion")
continue
nav_rate = ticker_rates.get("nav", 0.0) # Monthly rate in decimal form
yield_rate = ticker_rates.get("yield", 0.0) # Monthly rate in decimal form
# Validate rates are reasonable (0 to 5% monthly max)
nav_rate = max(0.0, min(nav_rate, self.MAX_MONTHLY_EROSION))
yield_rate = max(0.0, min(yield_rate, self.MAX_MONTHLY_EROSION))
# Store original values for logging
original_price = state['current_prices'][ticker]
original_yield = state['current_yields'][ticker]
# Apply erosion directly (rates are already monthly)
state['current_prices'][ticker] *= (1 - nav_rate)
state['current_yields'][ticker] *= (1 - yield_rate)
# Ensure prices and yields don't go below reasonable minimums
state['current_prices'][ticker] = max(state['current_prices'][ticker], 0.01)
state['current_yields'][ticker] = max(state['current_yields'][ticker], 0.0)
# Log erosion application
logger.info(f"Applied monthly erosion to {ticker}:")
logger.info(f" NAV: {nav_rate:.4%} -> Price: ${original_price:.2f} -> ${state['current_prices'][ticker]:.2f}")
logger.info(f" Yield: {yield_rate:.4%} -> Yield: {original_yield:.2%} -> {state['current_yields'][ticker]:.2%}")
def _reinvest_dividends(
self,
month: int,
state: Dict[str, Any],
distribution_schedule: Dict[str, List[int]]
) -> None:
"""Reinvest dividends for tickers that distributed in this month"""
for ticker, distribution_months in distribution_schedule.items():
if month in distribution_months:
shares = state['current_shares'][ticker]
price = state['current_prices'][ticker]
yield_rate = state['current_yields'][ticker]
# Calculate dividend income using current (eroded) values
dividend_income = shares * price * yield_rate / 12
# Purchase additional shares at current price
if price > 0:
new_shares = dividend_income / price
state['current_shares'][ticker] += new_shares
# Log reinvestment
logger.info(f"Month {month} reinvestment for {ticker}:")
logger.info(f" Dividend Income: ${dividend_income:.2f}")
logger.info(f" New Shares: {new_shares:.4f}")
logger.info(f" Total Shares: {state['current_shares'][ticker]:.4f}")
def _is_distribution_month(self, month: int, frequency: DistributionFrequency) -> bool:
"""Check if current month is a distribution month"""
if frequency == DistributionFrequency.MONTHLY:
return True
elif frequency == DistributionFrequency.QUARTERLY:
return month % 3 == 0
elif frequency == DistributionFrequency.SEMI_ANNUALLY:
return month % 6 == 0
elif frequency == DistributionFrequency.ANNUALLY:
return month % 12 == 0
else:
return True # Default to monthly for unknown
def _create_drip_result(self, monthly_data: List[MonthlyData], state: Dict[str, Any]) -> DripResult:
"""Create final DRIP result object"""
if not monthly_data:
raise ValueError("No monthly data generated")
final_data = monthly_data[-1]
return DripResult(
monthly_data=monthly_data,
final_portfolio_value=final_data.total_value,
total_income=state['cumulative_income'],
total_shares=state['current_shares'].copy()
)
# Utility methods for analysis and comparison - duplicate method removed
# The main calculate_drip_vs_no_drip_comparison method is defined below
def forecast_portfolio(
self,
portfolio_df: pd.DataFrame,
config: DripConfig,
tickers: Optional[List[str]] = None
) -> DRIPPortfolioResult:
"""
Forecast DRIP growth for an entire portfolio.
Args:
portfolio_df: DataFrame containing portfolio allocation with columns:
- Ticker: ETF ticker symbol
- Price: Current price
- Yield (%): Annual dividend yield
- Shares: Number of shares
- Allocation (%): Portfolio allocation percentage
config: DripConfig object with simulation parameters
tickers: Optional list of tickers to include in the forecast. If None, all tickers are included.
Returns:
DRIPPortfolioResult object containing the forecast results
"""
try:
# Filter portfolio_df if tickers are specified
if tickers is not None:
portfolio_df = portfolio_df[portfolio_df['Ticker'].isin(tickers)].copy()
if portfolio_df.empty:
raise ValueError(f"No matching tickers found in portfolio: {tickers}")
# Calculate DRIP growth for the portfolio
result = self.calculate_drip_growth(portfolio_df, config)
# Convert the result to DRIPPortfolioResult format
etf_results = {}
for ticker in portfolio_df['Ticker'].unique():
ticker_data = portfolio_df[portfolio_df['Ticker'] == ticker].iloc[0]
initial_shares = float(ticker_data['Shares'])
initial_value = initial_shares * float(ticker_data['Price'])
# Get final values from the result
final_shares = result.total_shares.get(ticker, initial_shares)
final_value = final_shares * float(ticker_data['Price'])
# Calculate metrics
total_income = sum(
month_data.monthly_income * (initial_shares / sum(portfolio_df['Shares']))
for month_data in result.monthly_data
)
average_yield = float(ticker_data['Yield (%)']) / 100
# Create monthly metrics
monthly_metrics = []
for month_data in result.monthly_data:
metrics = DRIPMetrics(
ticker=ticker,
date=pd.Timestamp.now() + pd.DateOffset(months=month_data.month),
shares=month_data.shares.get(ticker, initial_shares),
price=month_data.prices.get(ticker, float(ticker_data['Price'])),
dividend_yield=month_data.yields.get(ticker, average_yield),
monthly_dividend=month_data.monthly_income * (initial_shares / sum(portfolio_df['Shares'])),
new_shares=month_data.shares.get(ticker, initial_shares) - initial_shares,
portfolio_value=month_data.total_value * (initial_shares / sum(portfolio_df['Shares'])),
monthly_income=month_data.monthly_income * (initial_shares / sum(portfolio_df['Shares'])),
yield_on_cost=average_yield
)
monthly_metrics.append(metrics)
# Create forecast result for this ETF
etf_results[ticker] = DRIPForecastResult(
ticker=ticker,
initial_shares=initial_shares,
final_shares=final_shares,
initial_value=initial_value,
final_value=final_value,
total_income=total_income,
average_yield=average_yield,
monthly_metrics=monthly_metrics
)
# Create and return the portfolio result
return DRIPPortfolioResult(
total_value=result.final_portfolio_value,
monthly_income=result.monthly_data[-1].monthly_income,
total_income=result.total_income,
etf_results=etf_results
)
except Exception as e:
logger.error(f"Error forecasting portfolio: {str(e)}")
logger.error(traceback.format_exc())
raise
def calculate_drip_vs_no_drip_comparison(
self,
portfolio_df: pd.DataFrame,
config: DripConfig
) -> Dict[str, Any]:
"""
Calculate and compare DRIP vs No-DRIP strategies with detailed analysis.
This method runs both simulations and displays comparison tables.
Args:
portfolio_df: DataFrame containing portfolio allocation
config: DripConfig object with simulation parameters
Returns:
Dict containing both results and comparison analysis
"""
try:
# Initialize No-DRIP service if needed
if self.no_drip_service is None:
from .no_drip_service import NoDRIPService
self.no_drip_service = NoDRIPService()
# Calculate initial investment
initial_investment = (portfolio_df['Price'] * portfolio_df['Shares']).sum()
# Run DRIP simulation (this will print the DRIP table)
logger.info("Running DRIP simulation...")
drip_result = self.calculate_drip_growth(portfolio_df, config)
# Run No-DRIP simulation (this will print the No-DRIP table)
logger.info("Running No-DRIP simulation...")
no_drip_result = self.no_drip_service.calculate_no_drip_growth(portfolio_df, config)
# Calculate break-even analysis
drip_break_even = self._calculate_break_even_analysis(
"DRIP", drip_result.monthly_data, initial_investment,
lambda md: md.total_value
)
no_drip_break_even = self._calculate_break_even_analysis(
"No-DRIP", no_drip_result.monthly_data, initial_investment,
lambda md: md.portfolio_value + md.cumulative_income
)
# Determine winner
drip_final = drip_result.final_portfolio_value
no_drip_final = no_drip_result.total_value
if drip_final > no_drip_final:
winner = "DRIP"
advantage_amount = drip_final - no_drip_final
advantage_percentage = (advantage_amount / no_drip_final) * 100
elif no_drip_final > drip_final:
winner = "No-DRIP"
advantage_amount = no_drip_final - drip_final
advantage_percentage = (advantage_amount / drip_final) * 100
else:
winner = "Tie"
advantage_amount = 0.0
advantage_percentage = 0.0
# Print comparison table
self._print_strategy_comparison(
drip_result, no_drip_result, initial_investment,
winner, advantage_amount, advantage_percentage,
drip_break_even, no_drip_break_even
)
return {
'drip_result': drip_result,
'no_drip_result': no_drip_result,
'initial_investment': initial_investment,
'drip_final_value': drip_final,
'no_drip_final_value': no_drip_final,
'winner': winner,
'advantage_amount': advantage_amount,
'advantage_percentage': advantage_percentage,
'drip_break_even': drip_break_even,
'no_drip_break_even': no_drip_break_even,
'comparison_summary': self._generate_comparison_summary(
drip_final, no_drip_final, initial_investment, winner, advantage_percentage
)
}
except Exception as e:
logger.error(f"Error in DRIP vs No-DRIP comparison: {str(e)}")
logger.error(traceback.format_exc())
raise
def _calculate_break_even_analysis(
self,
strategy_name: str,
monthly_data: List,
initial_investment: float,
value_extractor: callable
) -> Dict[str, Any]:
"""Calculate break-even analysis for a strategy"""
break_even_month = None
profit_at_break_even = 0.0
for month_data in monthly_data:
total_value = value_extractor(month_data)
profit = total_value - initial_investment
if profit > 0 and break_even_month is None:
break_even_month = month_data.month
profit_at_break_even = profit
break
# Format break-even time
if break_even_month is None:
months_to_break_even = "Never (within simulation period)"
else:
years = break_even_month // 12
months = break_even_month % 12
if years > 0:
months_to_break_even = f"{years} year(s) and {months} month(s)"
else:
months_to_break_even = f"{months} month(s)"
return {
'strategy_name': strategy_name,
'break_even_month': break_even_month,
'profit_at_break_even': profit_at_break_even,
'months_to_break_even': months_to_break_even,
'initial_investment': initial_investment
}
def _print_strategy_comparison(
self,
drip_result: DripResult,
no_drip_result: Any, # NoDRIPResult
initial_investment: float,
winner: str,
advantage_amount: float,
advantage_percentage: float,
drip_break_even: Dict[str, Any],
no_drip_break_even: Dict[str, Any]
) -> None:
"""Print detailed strategy comparison table"""
print("\n" + "="*100)
print("DRIP vs No-DRIP STRATEGY COMPARISON")
print("="*100)
print(f"{'Metric':<35} {'DRIP Strategy':<25} {'No-DRIP Strategy':<25}")
print("-"*100)
print(f"{'Initial Investment':<35} ${initial_investment:<24,.2f} ${initial_investment:<24,.2f}")
print(f"{'Final Portfolio Value':<35} ${drip_result.final_portfolio_value:<24,.2f} ${no_drip_result.final_portfolio_value:<24,.2f}")
print(f"{'Total Cash Income':<35} ${drip_result.total_income:<24,.2f} ${no_drip_result.total_cash_income:<24,.2f}")
print(f"{'Total Final Value':<35} ${drip_result.final_portfolio_value:<24,.2f} ${no_drip_result.total_value:<24,.2f}")
drip_return = ((drip_result.final_portfolio_value / initial_investment) - 1) * 100
no_drip_return = ((no_drip_result.total_value / initial_investment) - 1) * 100
print(f"{'Total Return %':<35} {drip_return:<24.1f}% {no_drip_return:<24.1f}%")
# Break-even analysis
print(f"{'Break-even Time':<35} {drip_break_even['months_to_break_even']:<25} {no_drip_break_even['months_to_break_even']:<25}")
print("-"*100)
print(f"WINNER: {winner}")
if winner != "Tie":
print(f"ADVANTAGE: ${advantage_amount:,.2f} ({advantage_percentage:.1f}%)")
# Investment recommendation
recommendation = self._generate_investment_recommendation(winner, advantage_percentage)
print(f"RECOMMENDATION: {recommendation}")
print("="*100)
def _generate_investment_recommendation(self, winner: str, advantage_percentage: float) -> str:
"""Generate investment recommendation based on comparison results"""
if winner == "Tie":
return "Both strategies perform equally. Choose based on your liquidity needs."
if advantage_percentage < 1.0:
return f"{winner} wins by a small margin ({advantage_percentage:.1f}%). Choose based on liquidity preferences."
elif advantage_percentage < 5.0:
return f"{winner} strategy is recommended with a {advantage_percentage:.1f}% advantage."
else:
return f"{winner} strategy is strongly recommended with a {advantage_percentage:.1f}% advantage."
def _generate_comparison_summary(
self,
drip_final: float,
no_drip_final: float,
initial_investment: float,
winner: str,
advantage_percentage: float
) -> str:
"""Generate comparison summary"""
drip_total_return = ((drip_final / initial_investment) - 1) * 100
no_drip_total_return = ((no_drip_final / initial_investment) - 1) * 100
summary = f"Initial Investment: ${initial_investment:,.2f}\n"
summary += f"DRIP Final Value: ${drip_final:,.2f} (Total Return: {drip_total_return:.1f}%)\n"
summary += f"No-DRIP Final Value: ${no_drip_final:,.2f} (Total Return: {no_drip_total_return:.1f}%)\n"
if winner != "Tie":
summary += f"Winner: {winner} strategy ({advantage_percentage:.1f}% advantage)"
else:
summary += "Result: Both strategies perform equally"
return summary

View File

@ -0,0 +1,218 @@
"""
NAV Erosion Service for analyzing ETF erosion risk
"""
import pandas as pd
import numpy as np
from typing import List, Dict, Optional
from dataclasses import dataclass
import logging
from .data_service import DataService
logger = logging.getLogger(__name__)
@dataclass
class ErosionRiskResult:
"""Result of erosion risk analysis for a single ETF"""
ticker: str
nav_erosion_risk: float # 0-9 scale
yield_erosion_risk: float # 0-9 scale
estimated_nav_erosion: float # Annual percentage
estimated_yield_erosion: float # Annual percentage
nav_risk_explanation: str
yield_risk_explanation: str
etf_age_years: Optional[float] = None
max_drawdown: Optional[float] = None
volatility: Optional[float] = None
sharpe_ratio: Optional[float] = None
sortino_ratio: Optional[float] = None
dividend_trend: Optional[float] = None
@dataclass
class ErosionRiskAnalysis:
"""Complete erosion risk analysis results"""
results: List[ErosionRiskResult]
timestamp: pd.Timestamp
class NavErosionService:
"""Service for analyzing ETF NAV and yield erosion risk"""
def __init__(self):
self.data_service = DataService()
def analyze_etf_erosion_risk(self, tickers: List[str]) -> ErosionRiskAnalysis:
"""
Analyze erosion risk for a list of ETFs
Args:
tickers: List of ETF tickers to analyze
Returns:
ErosionRiskAnalysis object containing results for each ETF
"""
results = []
for ticker in tickers:
try:
# Get ETF data
etf_data = self.data_service.get_etf_data(ticker)
if not etf_data:
logger.warning(f"No data available for {ticker}")
continue
# Calculate NAV erosion risk
nav_risk = self._calculate_nav_erosion_risk(etf_data)
# Calculate yield erosion risk
yield_risk = self._calculate_yield_erosion_risk(etf_data)
# Create result object
result = ErosionRiskResult(
ticker=ticker,
nav_erosion_risk=nav_risk['score'],
yield_erosion_risk=yield_risk['score'],
estimated_nav_erosion=nav_risk['estimated_erosion'],
estimated_yield_erosion=yield_risk['estimated_erosion'],
nav_risk_explanation=nav_risk['explanation'],
yield_risk_explanation=yield_risk['explanation'],
etf_age_years=etf_data.get('age_years'),
max_drawdown=etf_data.get('max_drawdown'),
volatility=etf_data.get('volatility'),
sharpe_ratio=etf_data.get('sharpe_ratio'),
sortino_ratio=etf_data.get('sortino_ratio'),
dividend_trend=etf_data.get('dividend_trend')
)
results.append(result)
except Exception as e:
logger.error(f"Error analyzing {ticker}: {str(e)}")
continue
return ErosionRiskAnalysis(
results=results,
timestamp=pd.Timestamp.now()
)
def _calculate_nav_erosion_risk(self, etf_data: Dict) -> Dict:
"""
Calculate NAV erosion risk score and explanation
Args:
etf_data: Dictionary containing ETF data
Returns:
Dictionary with risk score, estimated erosion, and explanation
"""
try:
# Get relevant metrics
volatility = float(etf_data.get('volatility', 0))
max_drawdown = float(etf_data.get('max_drawdown', 0))
sharpe_ratio = float(etf_data.get('sharpe_ratio', 0))
sortino_ratio = float(etf_data.get('sortino_ratio', 0))
age_years = float(etf_data.get('age_years', 0))
# Calculate risk score components
volatility_score = min(9, int(volatility * 20)) # Scale volatility to 0-9
drawdown_score = min(9, int(max_drawdown * 20)) # Scale drawdown to 0-9
risk_adjusted_score = min(9, int((2 - min(2, max(0, sharpe_ratio))) * 4.5)) # Scale Sharpe to 0-9
age_score = min(9, int((5 - min(5, age_years)) * 1.8)) # Scale age to 0-9
# Calculate final risk score (weighted average)
risk_score = (
volatility_score * 0.3 +
drawdown_score * 0.3 +
risk_adjusted_score * 0.2 +
age_score * 0.2
)
# Estimate annual erosion based on risk score
estimated_erosion = risk_score * 0.01 # 1% per risk point
# Generate explanation
explanation = []
if volatility_score > 6:
explanation.append(f"High volatility ({volatility:.1%})")
if drawdown_score > 6:
explanation.append(f"Large drawdowns ({max_drawdown:.1%})")
if risk_adjusted_score > 6:
explanation.append(f"Poor risk-adjusted returns (Sharpe: {sharpe_ratio:.2f})")
if age_score > 6:
explanation.append(f"New ETF ({age_years:.1f} years)")
explanation = ", ".join(explanation) if explanation else "Moderate risk profile"
return {
'score': float(risk_score),
'estimated_erosion': float(estimated_erosion),
'explanation': explanation
}
except Exception as e:
logger.error(f"Error calculating NAV erosion risk: {str(e)}")
return {
'score': 5.0, # Default to middle risk
'estimated_erosion': 0.05, # Default to 5%
'explanation': "Unable to calculate precise risk"
}
def _calculate_yield_erosion_risk(self, etf_data: Dict) -> Dict:
"""
Calculate yield erosion risk score and explanation
Args:
etf_data: Dictionary containing ETF data
Returns:
Dictionary with risk score, estimated erosion, and explanation
"""
try:
# Get relevant metrics
dividend_trend = float(etf_data.get('dividend_trend', 0))
volatility = float(etf_data.get('volatility', 0))
max_drawdown = float(etf_data.get('max_drawdown', 0))
age_years = float(etf_data.get('age_years', 0))
# Calculate risk score components
trend_score = min(9, int((1 - min(1, max(-1, dividend_trend))) * 4.5)) # Scale trend to 0-9
volatility_score = min(9, int(volatility * 20)) # Scale volatility to 0-9
drawdown_score = min(9, int(max_drawdown * 20)) # Scale drawdown to 0-9
age_score = min(9, int((5 - min(5, age_years)) * 1.8)) # Scale age to 0-9
# Calculate final risk score (weighted average)
risk_score = (
trend_score * 0.3 +
volatility_score * 0.3 +
drawdown_score * 0.2 +
age_score * 0.2
)
# Estimate annual erosion based on risk score
estimated_erosion = risk_score * 0.01 # 1% per risk point
# Generate explanation
explanation = []
if trend_score > 6:
explanation.append(f"Declining dividends ({dividend_trend:.1%})")
if volatility_score > 6:
explanation.append(f"High volatility ({volatility:.1%})")
if drawdown_score > 6:
explanation.append(f"Large drawdowns ({max_drawdown:.1%})")
if age_score > 6:
explanation.append(f"New ETF ({age_years:.1f} years)")
explanation = ", ".join(explanation) if explanation else "Moderate risk profile"
return {
'score': float(risk_score),
'estimated_erosion': float(estimated_erosion),
'explanation': explanation
}
except Exception as e:
logger.error(f"Error calculating yield erosion risk: {str(e)}")
return {
'score': 5.0, # Default to middle risk
'estimated_erosion': 0.05, # Default to 5%
'explanation': "Unable to calculate precise risk"
}

View File

@ -0,0 +1,16 @@
"""
NAV Erosion Service package
"""
from .service import NavErosionService
from .models import NavErosionResult, NavErosionAnalysis
from .exceptions import NavErosionError, DataFetchError, CalculationError
__all__ = [
'NavErosionService',
'NavErosionResult',
'NavErosionAnalysis',
'NavErosionError',
'DataFetchError',
'CalculationError'
]

View File

@ -0,0 +1,19 @@
"""
Custom exceptions for NAV Erosion Service
"""
class NavErosionError(Exception):
"""Base exception for NAV Erosion Service"""
pass
class DataFetchError(NavErosionError):
"""Raised when ETF data cannot be fetched"""
pass
class CalculationError(NavErosionError):
"""Raised when risk calculation fails"""
pass
class ValidationError(NavErosionError):
"""Error in risk validation."""
pass

View File

@ -0,0 +1,121 @@
"""
Logger module for NAV Erosion Service
"""
import logging
import os
from datetime import datetime
def get_logger(name: str) -> logging.Logger:
"""Get a logger instance with proper configuration."""
logger = logging.getLogger(name)
# Create logs directory if it doesn't exist
log_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'logs')
os.makedirs(log_dir, exist_ok=True)
# Set up file handler
log_file = os.path.join(log_dir, f'nav_erosion_{datetime.now().strftime("%Y%m%d")}.log')
file_handler = logging.FileHandler(log_file)
file_handler.setLevel(logging.INFO)
# Set up console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
# Create formatter
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)
# Add handlers to logger
logger.addHandler(file_handler)
logger.addHandler(console_handler)
logger.setLevel(logging.INFO)
return logger
class ErosionRiskLogger:
"""Logger for NAV Erosion Service"""
def __init__(self):
self.logger = logging.getLogger('erosion_risk')
self.setup_logger()
def setup_logger(self):
"""Configure logger with file and console handlers"""
# Create logs directory if it doesn't exist
log_dir = Path('logs')
log_dir.mkdir(exist_ok=True)
# Set base logging level
self.logger.setLevel(logging.INFO)
# File handler for errors
error_handler = logging.FileHandler(
log_dir / f'erosion_risk_errors_{datetime.now().strftime("%Y%m%d")}.log'
)
error_handler.setLevel(logging.ERROR)
error_handler.setFormatter(
logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
)
# File handler for flow tracking
flow_handler = logging.FileHandler(
log_dir / f'erosion_risk_flow_{datetime.now().strftime("%Y%m%d")}.log'
)
flow_handler.setLevel(logging.INFO)
flow_handler.setFormatter(
logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
)
# Console handler for immediate feedback
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.WARNING)
console_handler.setFormatter(
logging.Formatter('%(levelname)s: %(message)s')
)
# Add handlers to logger
self.logger.addHandler(error_handler)
self.logger.addHandler(flow_handler)
self.logger.addHandler(console_handler)
def log_risk_calculation(self, ticker: str, component_risks: dict, final_risk: float):
"""Log risk calculation details"""
self.logger.info(f"Risk calculation for {ticker}:")
# Log NAV Risk Components
self.logger.info("NAV Risk Components:")
for component, risk in component_risks.get('nav', {}).items():
self.logger.info(f" {component}: {risk}")
# Log Yield Risk Components
self.logger.info("Yield Risk Components:")
for component, risk in component_risks.get('yield', {}).items():
self.logger.info(f" {component}: {risk}")
# Log Structural Risk Components
self.logger.info("Structural Risk Components:")
for component, risk in component_risks.get('structural', {}).items():
self.logger.info(f" {component}: {risk}")
self.logger.info(f"Final Risk Score: {final_risk}")
def log_error(self, ticker: str, error: Exception, context: dict = None):
"""Log error with context"""
self.logger.error(f"Error processing {ticker}: {str(error)}")
if context:
self.logger.error(f"Context: {context}")
def log_warning(self, ticker: str, message: str, context: dict = None):
"""Log warning with context"""
self.logger.warning(f"Warning for {ticker}: {message}")
if context:
self.logger.warning(f"Context: {context}")
def log_info(self, message: str, context: dict = None):
"""Log info message with context"""
self.logger.info(message)
if context:
self.logger.info(f"Context: {context}")

View File

@ -0,0 +1,36 @@
"""
Data models for NAV Erosion Service
"""
from dataclasses import dataclass
from typing import List, Optional, Dict
from datetime import datetime
@dataclass
class NavErosionResult:
"""Result of NAV erosion risk analysis for a single ETF"""
ticker: str
nav_erosion_risk: float # 0-9 scale
yield_erosion_risk: float # 0-9 scale
estimated_nav_erosion: float # Annual percentage
estimated_yield_erosion: float # Annual percentage
nav_risk_explanation: str
yield_risk_explanation: str
component_risks: Dict[str, float] # Detailed risk components
etf_age_years: Optional[float] = None
is_new_etf: bool = False
max_drawdown: Optional[float] = None
volatility: Optional[float] = None
sharpe_ratio: Optional[float] = None
sortino_ratio: Optional[float] = None
dividend_trend: Optional[float] = None
@dataclass
class NavErosionAnalysis:
"""Complete NAV erosion analysis for a portfolio"""
results: List[NavErosionResult]
portfolio_nav_risk: float # Weighted average
portfolio_yield_risk: float # Weighted average
risk_summary: str
timestamp: datetime
component_weights: Dict[str, float] # Weights used in calculation

View File

@ -0,0 +1,591 @@
"""
NAV Erosion Service implementation
"""
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from typing import List, Dict, Optional, Tuple
import yfinance as yf
from .models import NavErosionResult, NavErosionAnalysis
from .exceptions import NavErosionError, DataFetchError, CalculationError
from .logger import get_logger
from enum import Enum
from ETF_Portal.services.data_service import DataService # Our existing data service
logger = get_logger(__name__)
class ETFType(Enum):
INDEX = "index" # e.g., VTI, VOO
COVERED_CALL = "covered_call" # e.g., JEPI
HIGH_YIELD = "high_yield" # e.g., FEPI
SECTOR = "sector" # e.g., MSTY
class NavErosionService:
"""Service for calculating NAV erosion risk"""
# Risk weights
NAV_RISK_WEIGHT = 0.45
YIELD_RISK_WEIGHT = 0.35
STRUCTURAL_RISK_WEIGHT = 0.20
# Component weights within each risk category
NAV_COMPONENT_WEIGHTS = {
'drawdown': 0.333, # 33.3% of NAV risk
'volatility': 0.222, # 22.2% of NAV risk
'sharpe': 0.222, # 22.2% of NAV risk
'sortino': 0.222 # 22.2% of NAV risk
}
YIELD_COMPONENT_WEIGHTS = {
'stability': 0.429, # 42.9% of yield risk
'growth': 0.286, # 28.6% of yield risk
'payout': 0.285 # 28.5% of yield risk
}
STRUCTURAL_COMPONENT_WEIGHTS = {
'age': 0.25, # 25% of structural risk
'aum': 0.25, # 25% of structural risk
'liquidity': 0.25, # 25% of structural risk
'expense': 0.25 # 25% of structural risk
}
def __init__(self):
self.data_service = DataService() # Use our existing data service
self.etf_types = {
'VTI': ETFType.INDEX,
'VOO': ETFType.INDEX,
'JEPI': ETFType.COVERED_CALL,
'FEPI': ETFType.HIGH_YIELD,
'MSTY': ETFType.SECTOR,
# Add more ETFs as needed
}
# ETF-type specific weights for risk calculation
self.risk_weights = {
ETFType.INDEX: {'stability': 0.4, 'growth': 0.4, 'payout': 0.2},
ETFType.COVERED_CALL: {'stability': 0.3, 'growth': 0.2, 'payout': 0.5},
ETFType.HIGH_YIELD: {'stability': 0.5, 'growth': 0.3, 'payout': 0.2},
ETFType.SECTOR: {'stability': 0.4, 'growth': 0.3, 'payout': 0.3}
}
# Maximum annual erosion by ETF type
self.max_erosion = {
ETFType.INDEX: 0.15, # 15% max for index ETFs
ETFType.COVERED_CALL: 0.30, # 30% max for covered call
ETFType.HIGH_YIELD: 0.25, # 25% max for high yield
ETFType.SECTOR: 0.20 # 20% max for sector
}
def get_etf_type(self, ticker: str) -> ETFType:
"""Get ETF type from our mapping or default to INDEX."""
return self.etf_types.get(ticker, ETFType.INDEX)
def calculate_stability(self, etf_data: Dict) -> float:
"""Calculate dividend stability (0-1)."""
if not etf_data.get('dividends'):
return 0.0
# Convert dividends dict to DataFrame
dividends = pd.DataFrame.from_dict(etf_data['dividends'], orient='index', columns=['Dividends'])
dividends.index = pd.to_datetime(dividends.index)
# Calculate coefficient of variation
mean = dividends['Dividends'].mean()
std = dividends['Dividends'].std()
cv = std / mean if mean > 0 else 1.0
# Convert to stability score (0-1)
stability = 1 / (1 + cv)
return min(max(stability, 0), 1)
def calculate_growth(self, etf_data: Dict) -> float:
"""Calculate dividend growth (0-1)."""
if not etf_data.get('dividends'):
return 0.0
# Convert dividends dict to DataFrame
dividends = pd.DataFrame.from_dict(etf_data['dividends'], orient='index', columns=['Dividends'])
dividends.index = pd.to_datetime(dividends.index)
# Calculate year-over-year growth
yearly_divs = dividends.resample('Y')['Dividends'].sum()
if len(yearly_divs) < 2:
return 0.5 # Neutral if not enough data
growth_rates = yearly_divs.pct_change().dropna()
avg_growth = growth_rates.mean()
# Convert to growth score (0-1)
# 0% growth = 0.5, 10% growth = 1.0, -10% growth = 0.0
growth_score = 0.5 + (avg_growth * 5)
return min(max(growth_score, 0), 1)
def calculate_payout_ratio(self, etf_data: Dict) -> float:
"""Calculate payout ratio (0-1)."""
if not etf_data.get('dividends') or not etf_data.get('hist'):
return 0.0
# Convert dividends dict to DataFrame
dividends = pd.DataFrame.from_dict(etf_data['dividends'], orient='index', columns=['Dividends'])
dividends.index = pd.to_datetime(dividends.index)
# Convert historical data to DataFrame
hist = pd.DataFrame.from_dict(etf_data['hist'])
hist.index = pd.to_datetime(hist.index)
# Calculate annual dividend yield
annual_div = dividends['Dividends'].sum()
avg_price = hist['Close'].mean()
yield_ratio = annual_div / avg_price if avg_price > 0 else 0
# Normalize to 0-1 range (assuming max yield of 20%)
payout_ratio = min(yield_ratio / 0.20, 1.0)
return payout_ratio
def _calculate_dividend_trend(self, etf_data: Dict) -> Tuple[float, str]:
"""
Calculate dividend trend score and direction.
Returns a tuple of (trend_score, trend_direction)
trend_score: float between -1 and 1
trend_direction: str ('Increasing', 'Decreasing', 'Stable', 'Unknown')
"""
try:
if not etf_data.get('dividends'):
logger.warning("No dividend data available for trend calculation")
return 0.0, "Unknown"
# Convert dividends to DataFrame
dividends = pd.DataFrame.from_dict(etf_data['dividends'], orient='index', columns=['Dividends'])
dividends.index = pd.to_datetime(dividends.index)
# Resample to monthly and calculate rolling averages
monthly_divs = dividends.resample('M')['Dividends'].sum()
if len(monthly_divs) < 6: # Need at least 6 months of data
return 0.0, "Unknown"
# Calculate 3-month and 6-month moving averages
ma3 = monthly_divs.rolling(window=3).mean()
ma6 = monthly_divs.rolling(window=6).mean()
# Calculate trend metrics
recent_ma3 = ma3.iloc[-3:].mean()
recent_ma6 = ma6.iloc[-6:].mean()
# Calculate year-over-year growth
yearly_divs = dividends.resample('Y')['Dividends'].sum()
if len(yearly_divs) >= 2:
yoy_growth = (yearly_divs.iloc[-1] / yearly_divs.iloc[-2]) - 1
else:
yoy_growth = 0
# Calculate trend score (-1 to 1)
ma_trend = (recent_ma3 / recent_ma6) - 1 if recent_ma6 > 0 else 0
trend_score = (ma_trend * 0.7 + yoy_growth * 0.3) # Weighted combination
# Normalize trend score to -1 to 1 range
trend_score = max(min(trend_score, 1), -1)
# Determine trend direction
if abs(trend_score) < 0.05:
direction = "Stable"
elif trend_score > 0:
direction = "Increasing"
else:
direction = "Decreasing"
return trend_score, direction
except Exception as e:
logger.error(f"Error calculating dividend trend: {str(e)}")
return 0.0, "Unknown"
def analyze_etf_erosion_risk(self, tickers: List[str]) -> NavErosionAnalysis:
"""Analyze erosion risk for a list of ETFs."""
results = []
errors = []
for ticker in tickers:
try:
# Get ETF type
etf_type = self.get_etf_type(ticker)
# Get ETF data using our existing pipeline
etf_data = self._fetch_etf_data(ticker)
if etf_data.get('is_estimated', False):
logger.warning(f"Using estimated data for {ticker}")
# Calculate all risk components with ETF-type specific adjustments
nav_risk, nav_components = self._calculate_nav_risk(etf_data, etf_type)
yield_risk, yield_components = self._calculate_yield_risk(etf_data, etf_type)
structural_risk, structural_components = self._calculate_structural_risk(etf_data)
# Calculate dividend trend
trend_score, trend_direction = self._calculate_dividend_trend(etf_data)
# Calculate final risk scores
final_nav_risk = round(
nav_risk * self.NAV_RISK_WEIGHT +
structural_risk * self.STRUCTURAL_RISK_WEIGHT
)
final_yield_risk = round(
yield_risk * self.YIELD_RISK_WEIGHT +
structural_risk * self.STRUCTURAL_RISK_WEIGHT
)
# Create result object
result = NavErosionResult(
ticker=ticker,
nav_erosion_risk=final_nav_risk,
yield_erosion_risk=final_yield_risk,
estimated_nav_erosion=round(final_nav_risk / 9 * self.max_erosion[etf_type], 3),
estimated_yield_erosion=round(final_yield_risk / 9 * self.max_erosion[etf_type], 3),
nav_risk_explanation=self._generate_nav_explanation(nav_components),
yield_risk_explanation=(
f"Dividend stability: {yield_components['stability']:.1%}, "
f"Growth: {yield_components['growth']:.1%}, "
f"Payout ratio: {yield_components['payout']:.1%}, "
f"Trend: {trend_direction}"
),
etf_age_years=etf_data.get('age_years', 3),
max_drawdown=round(etf_data.get('max_drawdown', 0.0), 3),
volatility=round(etf_data.get('volatility', 0.0), 3),
sharpe_ratio=round(etf_data.get('sharpe_ratio', 0.0), 2),
sortino_ratio=round(etf_data.get('sortino_ratio', 0.0), 2),
dividend_trend=trend_score,
component_risks={
'nav': nav_components,
'yield': yield_components,
'structural': structural_components
}
)
results.append(result)
except Exception as e:
logger.error(f"Error analyzing {ticker}: {str(e)}")
errors.append((ticker, str(e)))
# Add a result with error info
results.append(NavErosionResult(
ticker=ticker,
nav_erosion_risk=0,
yield_erosion_risk=0,
estimated_nav_erosion=0.0,
estimated_yield_erosion=0.0,
nav_risk_explanation=f"Error: {str(e)}",
yield_risk_explanation=f"Error: {str(e)}",
component_risks={}
))
if not results:
raise CalculationError(f"No valid results generated. Errors: {errors}")
# Calculate portfolio averages
portfolio_nav_risk = round(sum(r.nav_erosion_risk for r in results) / len(results))
portfolio_yield_risk = round(sum(r.yield_erosion_risk for r in results) / len(results))
return NavErosionAnalysis(
results=results,
portfolio_nav_risk=portfolio_nav_risk,
portfolio_yield_risk=portfolio_yield_risk,
risk_summary=f"Portfolio average NAV risk: {portfolio_nav_risk}/9, Yield risk: {portfolio_yield_risk}/9",
timestamp=datetime.now(),
component_weights=self.risk_weights
)
def _fetch_etf_data(self, ticker: str) -> Dict:
"""Fetch ETF data using our existing data pipeline."""
try:
# Use our existing data service
data = self.data_service.get_etf_data(ticker)
if not data:
raise DataFetchError(f"No data available for {ticker}")
# Calculate actual ETF age
if 'info' in data and 'firstTradeDateEpochUtc' in data['info']:
inception_date = datetime.fromtimestamp(data['info']['firstTradeDateEpochUtc'])
age_years = (datetime.now() - inception_date).days / 365.25
data['age_years'] = round(age_years)
else:
# Known ETF inception dates as fallback
known_ages = {
'VTI': 23, # Inception: 2001
'VOO': 13, # Inception: 2010
'JEPI': 4, # Inception: 2020
'FEPI': 3, # Inception: 2021
'MSTY': 1 # Inception: 2022
}
data['age_years'] = known_ages.get(ticker, 3)
return data
except Exception as e:
logger.error(f"Error fetching data for {ticker}: {str(e)}")
raise DataFetchError(f"Failed to fetch data for {ticker}: {str(e)}")
def _calculate_nav_risk(self, etf_data: Dict, etf_type: ETFType) -> Tuple[float, Dict]:
"""Calculate NAV risk components with ETF-type specific adjustments"""
components = {}
# Base risk calculation
if etf_data.get('max_drawdown') is not None:
if etf_data['max_drawdown'] > 0.40:
components['drawdown'] = 7
elif etf_data['max_drawdown'] > 0.25:
components['drawdown'] = 5
elif etf_data['max_drawdown'] > 0.15:
components['drawdown'] = 3
else:
components['drawdown'] = 2
else:
components['drawdown'] = 4
if etf_data.get('volatility') is not None:
if etf_data['volatility'] > 0.40:
components['volatility'] = 7
elif etf_data['volatility'] > 0.25:
components['volatility'] = 5
elif etf_data['volatility'] > 0.15:
components['volatility'] = 3
else:
components['volatility'] = 2
else:
components['volatility'] = 4
if etf_data.get('sharpe_ratio') is not None:
if etf_data['sharpe_ratio'] >= 2.0:
components['sharpe'] = 1
elif etf_data['sharpe_ratio'] >= 1.5:
components['sharpe'] = 2
elif etf_data['sharpe_ratio'] >= 1.0:
components['sharpe'] = 3
elif etf_data['sharpe_ratio'] >= 0.5:
components['sharpe'] = 4
else:
components['sharpe'] = 5
else:
components['sharpe'] = 4
if etf_data.get('sortino_ratio') is not None:
if etf_data['sortino_ratio'] >= 2.0:
components['sortino'] = 1
elif etf_data['sortino_ratio'] >= 1.5:
components['sortino'] = 2
elif etf_data['sortino_ratio'] >= 1.0:
components['sortino'] = 3
elif etf_data['sortino_ratio'] >= 0.5:
components['sortino'] = 4
else:
components['sortino'] = 5
else:
components['sortino'] = 4
# ETF-type specific adjustments for NAV risk
if etf_type == ETFType.INDEX:
# Index ETFs are generally more stable
components = {k: max(1, v - 2) for k, v in components.items()}
elif etf_type == ETFType.SECTOR:
# Sector ETFs are more volatile
components = {k: min(9, v + 1) for k, v in components.items()}
elif etf_type == ETFType.COVERED_CALL:
# Covered call ETFs have higher NAV risk due to strategy complexity
components = {k: min(9, v + 3) for k, v in components.items()}
elif etf_type == ETFType.HIGH_YIELD:
# High yield ETFs have highest NAV risk
components = {k: min(9, v + 3) for k, v in components.items()}
# Calculate weighted NAV risk
nav_risk = sum(
components[component] * weight
for component, weight in self.NAV_COMPONENT_WEIGHTS.items()
)
return nav_risk, components
def _calculate_yield_risk(self, etf_data: Dict, etf_type: ETFType) -> Tuple[float, Dict]:
"""Calculate yield risk components with ETF-type specific adjustments"""
components = {}
# Calculate base components
stability = self.calculate_stability(etf_data)
growth = self.calculate_growth(etf_data)
payout = self.calculate_payout_ratio(etf_data)
# Convert to risk scores (1-9)
components['stability'] = int((1 - stability) * 8) + 1
components['growth'] = int((1 - growth) * 8) + 1
components['payout'] = int((1 - payout) * 8) + 1
# ETF-type specific adjustments
if etf_type == ETFType.INDEX:
# Index ETFs have lower yield risk
components = {k: max(1, v - 2) for k, v in components.items()}
elif etf_type == ETFType.SECTOR:
# Sector ETFs have moderate yield risk
components = {k: min(9, v + 1) for k, v in components.items()}
elif etf_type == ETFType.COVERED_CALL:
# Covered call ETFs have higher yield risk
components = {k: min(9, v + 2) for k, v in components.items()}
elif etf_type == ETFType.HIGH_YIELD:
# High yield ETFs have highest yield risk
components = {k: min(9, v + 3) for k, v in components.items()}
# Calculate weighted yield risk
yield_risk = sum(
components[component] * weight
for component, weight in self.YIELD_COMPONENT_WEIGHTS.items()
)
return yield_risk, components
def _calculate_structural_risk(self, etf_data: Dict) -> Tuple[float, Dict]:
"""Calculate structural risk components"""
components = {}
# Age risk - adjusted for actual ETF ages
age = etf_data.get('age_years', 3.0)
if age < 1:
components['age'] = 7 # Very new ETF
elif age < 3:
components['age'] = 6 # New ETF
elif age < 5:
components['age'] = 4 # Moderately established
elif age < 10:
components['age'] = 3 # Well established
else:
components['age'] = 2 # Long established
# AUM risk
if etf_data.get('info', {}).get('totalAssets') is not None:
aum = etf_data['info']['totalAssets']
if aum < 100_000_000: # Less than $100M
components['aum'] = 7
elif aum < 500_000_000: # Less than $500M
components['aum'] = 5
elif aum < 1_000_000_000: # Less than $1B
components['aum'] = 3
else:
components['aum'] = 2
else:
components['aum'] = 4 # Default medium
# Liquidity risk (using average volume as proxy)
if etf_data.get('info', {}).get('averageVolume') is not None:
volume = etf_data['info']['averageVolume']
if volume < 100_000:
components['liquidity'] = 7
elif volume < 500_000:
components['liquidity'] = 5
elif volume < 1_000_000:
components['liquidity'] = 3
else:
components['liquidity'] = 2
else:
components['liquidity'] = 4 # Default medium
# Expense ratio risk
if etf_data.get('info', {}).get('annualReportExpenseRatio') is not None:
expense_ratio = etf_data['info']['annualReportExpenseRatio']
if expense_ratio > 0.0075: # > 0.75%
components['expense'] = 7
elif expense_ratio > 0.005: # > 0.50%
components['expense'] = 5
elif expense_ratio > 0.0025: # > 0.25%
components['expense'] = 3
else:
components['expense'] = 2
else:
components['expense'] = 4 # Default medium
# Calculate weighted structural risk
structural_risk = sum(
components[component] * weight
for component, weight in self.STRUCTURAL_COMPONENT_WEIGHTS.items()
)
return structural_risk, components
def _generate_nav_explanation(self, components: Dict) -> str:
"""Generate explanation for NAV risk"""
explanations = []
if components.get('drawdown') is not None:
explanations.append(f"Drawdown risk level: {components['drawdown']}/9")
if components.get('volatility') is not None:
explanations.append(f"Volatility risk level: {components['volatility']}/9")
if components.get('sharpe') is not None:
explanations.append(f"Sharpe ratio risk level: {components['sharpe']}/9")
if components.get('sortino') is not None:
explanations.append(f"Sortino ratio risk level: {components['sortino']}/9")
return " | ".join(explanations)
def _generate_portfolio_summary(self, results: List[NavErosionResult]) -> str:
"""Generate portfolio-level risk summary"""
nav_risks = [r.nav_erosion_risk for r in results]
yield_risks = [r.yield_erosion_risk for r in results]
avg_nav_risk = np.mean(nav_risks)
avg_yield_risk = np.mean(yield_risks)
return (
f"Portfolio NAV Risk: {avg_nav_risk:.1f}/9 | "
f"Portfolio Yield Risk: {avg_yield_risk:.1f}/9"
)
def convert_to_drip_erosion_config(self, analysis: NavErosionAnalysis) -> Dict:
"""
Convert NavErosionAnalysis results to format expected by DRIPService.
Args:
analysis: NavErosionAnalysis object from analyze_etf_erosion_risk()
Returns:
Dict in format expected by DRIPService:
{
"per_ticker": {
"TICKER": {
"nav": monthly_nav_erosion_rate,
"yield": monthly_yield_erosion_rate
}
}
}
"""
per_ticker_erosion = {}
for result in analysis.results:
# Convert annual erosion rates to monthly rates
# Monthly rate = (1 + annual_rate)^(1/12) - 1
# For small rates, approximately annual_rate / 12
annual_nav_erosion = result.estimated_nav_erosion
annual_yield_erosion = result.estimated_yield_erosion
# Convert to monthly rates using compound formula for accuracy
if annual_nav_erosion > 0:
monthly_nav_rate = (1 + annual_nav_erosion) ** (1/12) - 1
else:
monthly_nav_rate = 0.0
if annual_yield_erosion > 0:
monthly_yield_rate = (1 + annual_yield_erosion) ** (1/12) - 1
else:
monthly_yield_rate = 0.0
# Cap maximum monthly erosion at 5% for safety
monthly_nav_rate = min(monthly_nav_rate, 0.05)
monthly_yield_rate = min(monthly_yield_rate, 0.05)
per_ticker_erosion[result.ticker] = {
"nav": monthly_nav_rate,
"yield": monthly_yield_rate
}
logger.info(f"Converted erosion rates for {result.ticker}:")
logger.info(f" Annual NAV erosion: {annual_nav_erosion:.2%} -> Monthly: {monthly_nav_rate:.4%}")
logger.info(f" Annual Yield erosion: {annual_yield_erosion:.2%} -> Monthly: {monthly_yield_rate:.4%}")
return {
"per_ticker": per_ticker_erosion
}

87
ETF_Portal/test_cache.py Normal file
View File

@ -0,0 +1,87 @@
#!/usr/bin/env python3
"""
Cache System Test
Tests the caching behavior with consecutive API calls.
"""
import logging
import time
from api_client import APIClient
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def run_test(api_key: str):
"""Run cache test with consecutive API calls."""
# Initialize API client
api_client = APIClient(api_key=api_key)
# Test data
test_tickers = ["SPY", "QQQ", "VTI"]
logger.info("\n=== First Run (Should make API calls) ===")
for ticker in test_tickers:
logger.info(f"\nTesting {ticker}:")
# Get profile
start_time = time.time()
profile = api_client.get_profile(ticker)
duration = time.time() - start_time
logger.info(f"Profile data received in {duration:.2f}s")
# Get historical data
start_time = time.time()
historical = api_client.get_historical_data(ticker, timeframe="1d")
duration = time.time() - start_time
logger.info(f"Historical data received in {duration:.2f}s")
# Get holdings
start_time = time.time()
holdings = api_client.get_holdings(ticker)
duration = time.time() - start_time
logger.info(f"Holdings data received in {duration:.2f}s")
# Wait a moment between runs
time.sleep(2)
logger.info("\n=== Second Run (Should use cache) ===")
for ticker in test_tickers:
logger.info(f"\nTesting {ticker}:")
# Get profile
start_time = time.time()
profile = api_client.get_profile(ticker)
duration = time.time() - start_time
logger.info(f"Profile data received in {duration:.2f}s")
# Get historical data
start_time = time.time()
historical = api_client.get_historical_data(ticker, timeframe="1d")
duration = time.time() - start_time
logger.info(f"Historical data received in {duration:.2f}s")
# Get holdings
start_time = time.time()
holdings = api_client.get_holdings(ticker)
duration = time.time() - start_time
logger.info(f"Holdings data received in {duration:.2f}s")
# Get cache statistics
from cache_manager import cache_manager
stats = cache_manager.get_stats()
logger.info("\n=== Cache Statistics ===")
logger.info(f"Cache hits: {stats['hits']}")
logger.info(f"Cache misses: {stats['misses']}")
logger.info(f"Hit rate: {stats['hit_rate']:.2%}")
logger.info(f"Total cache size: {stats['total_size']} bytes")
logger.info(f"Number of cache files: {stats['cache_files']}")
if __name__ == "__main__":
# Use your FMP API key
API_KEY = "fmp_live_8c8c8c8c8c8c8c8c8c8c8c8c8c8c8c8c" # Replace with your actual API key
run_test(API_KEY)

View File

View File

@ -0,0 +1,68 @@
import os
import logging
from ..api import APIFactory
import pandas as pd
def test_api_configuration():
"""Test the API configuration and secrets."""
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
try:
# Initialize API factory
api_factory = APIFactory()
# Test FMP client
logger.info("Testing FMP client...")
fmp_client = api_factory.get_client('fmp')
# Test with a known ETF
test_symbol = "SPY"
# Test profile data
logger.info(f"Getting profile data for {test_symbol}...")
profile = fmp_client.get_etf_profile(test_symbol)
if isinstance(profile, (dict, list)) and (isinstance(profile, dict) and not profile.get('error') or isinstance(profile, list) and len(profile) > 0):
logger.info("✅ Profile data retrieved successfully")
else:
logger.error("❌ Failed to get profile data")
if isinstance(profile, dict):
logger.error(f"Error: {profile.get('message', 'Unknown error')}")
else:
logger.error(f"Error: Unexpected response type: {type(profile)}")
# Test historical data
logger.info(f"Getting historical data for {test_symbol}...")
historical = fmp_client.get_historical_data(test_symbol, period='1mo')
if isinstance(historical, pd.DataFrame) and not historical.empty:
logger.info("✅ Historical data retrieved successfully")
logger.info(f"Data points: {len(historical)}")
else:
logger.error("❌ Failed to get historical data")
# Test cache
logger.info("Testing cache...")
cache_stats = api_factory.get_cache_stats()
logger.info(f"Cache stats: {cache_stats}")
# Test fallback to yfinance
logger.info("Testing fallback to yfinance...")
yfinance_data = api_factory.get_data(test_symbol, 'etf_profile', provider='yfinance')
if isinstance(yfinance_data, dict) and not yfinance_data.get('error'):
logger.info("✅ YFinance fallback working")
else:
logger.error("❌ YFinance fallback failed")
logger.error(f"Error: {yfinance_data.get('message', 'Unknown error')}")
return True
except Exception as e:
logger.error(f"❌ Test failed: {str(e)}")
return False
if __name__ == "__main__":
success = test_api_configuration()
if success:
print("\n✅ All tests passed!")
else:
print("\n❌ Some tests failed. Check the logs for details.")

View File

@ -116,15 +116,6 @@ st.markdown("""
# Create sidebar for API keys
with st.sidebar:
st.markdown("### API Configuration")
fmp_key = st.text_input(
"FMP API Key",
value=st.session_state.fmp_api_key,
type="password",
key="fmp_api_key_field"
)
if fmp_key != st.session_state.fmp_api_key:
st.session_state.fmp_api_key = fmp_key
openai_key = st.text_input(
"OpenAI API Key",
value=st.session_state.openai_api_key,

152
README.md
View File

@ -1,6 +1,97 @@
# ETF Dividend Portfolio Builder
# ETF Portal
A comprehensive tool for discovering, analyzing, and building dividend-focused ETF portfolios using real-time market data.
A comprehensive tool for ETF portfolio management and analysis.
## Installation
1. Clone the repository:
```bash
git clone <repository-url>
cd ETF_Portal
```
2. Create and activate a virtual environment:
```bash
python3 -m venv venv
source venv/bin/activate # On Linux/Mac
# or
.\venv\Scripts\activate # On Windows
```
3. Install the package in development mode:
```bash
pip install -e .
```
## Environment Setup
1. Copy the environment template:
```bash
cp .env.template .env
```
2. Edit the `.env` file and add your API keys:
```
FMP_API_KEY=your_api_key_here
CACHE_DURATION_HOURS=24
```
3. Never commit the `.env` file to version control.
## Usage
The ETF Portal provides a command-line interface for managing the application:
```bash
# Start the launcher
etf-portal start
# Start a specific component
etf-portal start --component portfolio_builder
# Start in background mode
etf-portal start --component launcher --background
# Stop all components
etf-portal stop
# Stop a specific component
etf-portal stop --component analyzer
# Check status
etf-portal status
# Restart components
etf-portal restart
# View logs
etf-portal logs
# Update configuration
etf-portal config --key ports.launcher --value 8500
```
## Components
- **Launcher**: The main entry point for the ETF Portal (port 8500)
- **Portfolio Builder**: Tool for building and managing ETF portfolios (port 8501)
- **Analyzer**: Tool for analyzing ETF portfolios (port 8502)
## Configuration
The configuration file is located at `config/etf_suite_config.json`. You can modify it directly or use the `config` command:
```bash
etf-portal config --key ports.launcher --value 8500
```
## Logs
Logs are stored in the `logs` directory. You can view recent logs using:
```bash
etf-portal logs
```
## Features
@ -47,49 +138,40 @@ The ETF Suite CLI provides a convenient way to manage the different components o
### Installation
```
```bash
# Install the package
pip install -e .
# Create required directories
sudo mkdir -p /var/run/etf-portal
sudo mkdir -p /var/log/etf-portal
sudo chown -R $USER:$USER /var/run/etf-portal
sudo chown -R $USER:$USER /var/log/etf-portal
```
### Usage
```bash
# Start the ETF Portal application
etf-portal start
# Stop the ETF Portal application
etf-portal stop
# Restart the ETF Portal application
etf-portal restart
# Check the status of the ETF Portal application
etf-portal status
```
# Start the entire ETF Suite (Launcher, Portfolio Builder, Analyzer)
etf-suite start --component all
# Start just the ETF Analyzer
etf-suite start --component analyzer
## Logs
# Start without opening browser automatically
etf-suite start --component launcher --no-browser
Logs are stored in `/var/log/etf-portal/cli_manager.log`
# Start in background mode (daemon)
etf-suite start --component all --background
## PID File
# Stop all running ETF Suite components
etf-suite stop
# Stop a specific component
etf-suite stop --component portfolio_builder
# Restart all components
etf-suite restart
# Restart a specific component
etf-suite restart --component analyzer
# Restart in background mode
etf-suite restart --background
# Check status of running components
etf-suite status
# View recent logs
etf-suite logs
# Update configuration
etf-suite config --key ports.launcher --value 8600
```
The PID file is stored in `/var/run/etf-portal/etf_portal.pid`
## Usage

0
api/__init__.py Normal file
View File

View File

@ -1 +0,0 @@
{"data": {"__pandas_series__": true, "index": [

View File

@ -1 +0,0 @@
{"data": {"longBusinessSummary": "The fund is an actively managed exchange-traded fund that seeks current income while providing direct and/or indirect exposure to the share price of select U.S. listed securities, subject to a limit on potential investment gains. It uses both traditional and synthetic covered call strategies that are designed to produce higher income levels when the underlying securities experience more volatility. The fund is non-diversified.", "companyOfficers": [], "executiveTeam": [], "maxAge": 86400, "priceHint": 2, "previousClose": 6.1, "open": 6.02, "dayLow": 6.02, "dayHigh": 6.13, "regularMarketPreviousClose": 6.1, "regularMarketOpen": 6.02, "regularMarketDayLow": 6.02, "regularMarketDayHigh": 6.13, "trailingPE": 26.701918, "volume": 2869944, "regularMarketVolume": 2869944, "averageVolume": 1994822, "averageVolume10days": 3513250, "averageDailyVolume10Day": 3513250, "bid": 6.1, "ask": 6.12, "bidSize": 8, "askSize": 30, "yield": 1.6552, "totalAssets": 226379696, "fiftyTwoWeekLow": 5.23, "fiftyTwoWeekHigh": 15.22, "fiftyDayAverage": 6.0538, "twoHundredDayAverage": 8.74755, "navPrice": 6.0906, "currency": "USD", "tradeable": false, "category": "Derivative Income", "ytdReturn": -11.21162, "beta3Year": 0.0, "fundFamily": "YieldMax ETFs", "fundInceptionDate": 1709078400, "legalType": "Exchange Traded Fund", "quoteType": "ETF", "symbol": "ULTY", "language": "en-US", "region": "US", "typeDisp": "ETF", "quoteSourceName": "Delayed Quote", "triggerable": true, "customPriceAlertConfidence": "HIGH", "shortName": "Tidal Trust II YieldMax Ultra O", "longName": "YieldMax Ultra Option Income Strategy ETF", "fiftyTwoWeekLowChange": 0.8699999, "fiftyTwoWeekLowChangePercent": 0.16634797, "fiftyTwoWeekRange": "5.23 - 15.22", "fiftyTwoWeekHighChange": -9.120001, "fiftyTwoWeekHighChangePercent": -0.59921163, "fiftyTwoWeekChangePercent": -57.282913, "dividendYield": 165.52, "trailingThreeMonthReturns": -13.87, "trailingThreeMonthNavReturns": -13.87, "netAssets": 226379696.0, "epsTrailingTwelveMonths": 0.228448, "marketState": "CLOSED", "regularMarketChangePercent": 0.0, "regularMarketPrice": 6.1, "corporateActions": [], "postMarketTime": 1748044768, "regularMarketTime": 1748030400, "exchange": "PCX", "messageBoardId": "finmb_1869805004", "exchangeTimezoneName": "America/New_York", "exchangeTimezoneShortName": "EDT", "gmtOffSetMilliseconds": -14400000, "market": "us_market", "esgPopulated": false, "fiftyDayAverageChange": 0.0461998, "fiftyDayAverageChangePercent": 0.007631537, "twoHundredDayAverageChange": -2.64755, "twoHundredDayAverageChangePercent": -0.3026619, "netExpenseRatio": 1.3, "sourceInterval": 15, "exchangeDataDelayedBy": 0, "cryptoTradeable": false, "hasPrePostMarketData": true, "firstTradeDateMilliseconds": 1709217000000, "postMarketChangePercent": 0.49180672, "postMarketPrice": 6.13, "postMarketChange": 0.03000021, "regularMarketChange": 0.0, "regularMarketDayRange": "6.02 - 6.13", "fullExchangeName": "NYSEArca", "averageDailyVolume3Month": 1994822, "trailingPegRatio": null}, "timestamp": "2025-05-24T13:52:58.466664"}

View File

@ -1 +0,0 @@
{"data": {"__pandas_series__": true, "index": [

View File

@ -1 +0,0 @@
{"data": {"__pandas_series__": true, "index": [

View File

@ -1 +0,0 @@
{"data": {"longBusinessSummary": "The fund is an actively managed exchange-traded fund (\u201cETF\u201d) that seeks current income while maintaining the opportunity for exposure to the share price (i.e., the price returns) of the securities of the companies comprising the Solactive FANG Innovation Index. The fund advisor seeks to employ the fund's investment strategy regardless of whether there are periods of adverse market, economic, or other conditions and will not seek to take temporary defensive positions during such periods. It is non-diversified.", "companyOfficers": [], "executiveTeam": [], "maxAge": 86400, "priceHint": 2, "previousClose": 43.66, "open": 43.12, "dayLow": 43.02, "dayHigh": 43.55, "regularMarketPreviousClose": 43.66, "regularMarketOpen": 43.12, "regularMarketDayLow": 43.02, "regularMarketDayHigh": 43.55, "trailingPE": 36.65259, "volume": 129662, "regularMarketVolume": 129662, "averageVolume": 147330, "averageVolume10days": 111310, "averageDailyVolume10Day": 111310, "bid": 43.11, "ask": 43.35, "bidSize": 2, "askSize": 2, "yield": 0.3039, "totalAssets": 426724160, "fiftyTwoWeekLow": 35.44, "fiftyTwoWeekHigh": 56.44, "fiftyDayAverage": 41.907, "twoHundredDayAverage": 48.12085, "navPrice": 43.62, "currency": "USD", "tradeable": false, "category": "Derivative Income", "ytdReturn": -8.89758, "beta3Year": 0.0, "fundFamily": "REX Advisers, LLC", "fundInceptionDate": 1696982400, "legalType": "Exchange Traded Fund", "quoteType": "ETF", "symbol": "FEPI", "language": "en-US", "region": "US", "typeDisp": "ETF", "quoteSourceName": "Nasdaq Real Time Price", "triggerable": true, "customPriceAlertConfidence": "HIGH", "corporateActions": [], "postMarketTime": 1748040939, "regularMarketTime": 1748030400, "hasPrePostMarketData": true, "firstTradeDateMilliseconds": 1697031000000, "postMarketChangePercent": 0.716766, "postMarketPrice": 43.56, "postMarketChange": 0.310001, "regularMarketChange": -0.40999985, "regularMarketDayRange": "43.02 - 43.55", "fullExchangeName": "NasdaqGM", "averageDailyVolume3Month": 147330, "fiftyTwoWeekLowChange": 7.8100014, "fiftyTwoWeekLowChangePercent": 0.22037251, "fiftyTwoWeekRange": "35.44 - 56.44", "fiftyTwoWeekHighChange": -13.189999, "fiftyTwoWeekHighChangePercent": -0.23369949, "fiftyTwoWeekChangePercent": -20.72947, "dividendYield": 30.39, "trailingThreeMonthReturns": -9.55848, "trailingThreeMonthNavReturns": -9.55848, "netAssets": 426724160.0, "epsTrailingTwelveMonths": 1.1799984, "fiftyDayAverageChange": 1.3429985, "fiftyDayAverageChangePercent": 0.032047115, "twoHundredDayAverageChange": -4.8708496, "twoHundredDayAverageChangePercent": -0.10122119, "netExpenseRatio": 0.65, "sourceInterval": 15, "exchangeDataDelayedBy": 0, "ipoExpectedDate": "2023-10-11", "cryptoTradeable": false, "marketState": "CLOSED", "shortName": "REX FANG & Innovation Equity Pr", "longName": "REX FANG & Innovation Equity Premium Income ETF", "exchange": "NGM", "messageBoardId": "finmb_1843173608", "exchangeTimezoneName": "America/New_York", "exchangeTimezoneShortName": "EDT", "gmtOffSetMilliseconds": -14400000, "market": "us_market", "esgPopulated": false, "regularMarketChangePercent": -0.93907434, "regularMarketPrice": 43.25, "trailingPegRatio": null}, "timestamp": "2025-05-24T13:52:58.472581"}

View File

@ -1 +0,0 @@
{"data": {"longBusinessSummary": "The fund is an actively managed ETF that seeks current income while maintaining the opportunity for exposure to the share price of the common stock of MicroStrategy Incorporated, subject to a limit on potential investment gains. It will seek to employ its investment strategy as it relates to MSTR regardless of whether there are periods of adverse market, economic, or other conditions and will not seek to take temporary defensive positions during such periods. The fund is non-diversified.", "companyOfficers": [], "executiveTeam": [], "maxAge": 86400, "priceHint": 2, "previousClose": 23.08, "open": 22.68, "dayLow": 21.2901, "dayHigh": 22.7, "regularMarketPreviousClose": 23.08, "regularMarketOpen": 22.68, "regularMarketDayLow": 21.2901, "regularMarketDayHigh": 22.7, "volume": 18359202, "regularMarketVolume": 18359202, "averageVolume": 7904033, "averageVolume10days": 11083420, "averageDailyVolume10Day": 11083420, "bid": 21.5, "ask": 21.63, "bidSize": 12, "askSize": 32, "yield": 1.2471, "totalAssets": 3270944256, "fiftyTwoWeekLow": 17.1, "fiftyTwoWeekHigh": 46.5, "fiftyDayAverage": 22.1824, "twoHundredDayAverage": 26.1411, "navPrice": 23.0514, "currency": "USD", "tradeable": false, "category": "Derivative Income", "ytdReturn": 24.89181, "beta3Year": 0.0, "fundFamily": "YieldMax ETFs", "fundInceptionDate": 1708473600, "legalType": "Exchange Traded Fund", "quoteType": "ETF", "symbol": "MSTY", "language": "en-US", "region": "US", "typeDisp": "ETF", "quoteSourceName": "Delayed Quote", "triggerable": true, "customPriceAlertConfidence": "HIGH", "marketState": "CLOSED", "shortName": "Tidal Trust II YieldMax MSTR Op", "regularMarketChangePercent": -6.7591, "regularMarketPrice": 21.52, "corporateActions": [], "longName": "Yieldmax MSTR Option Income Strategy ETF", "postMarketTime": 1748044797, "regularMarketTime": 1748030400, "regularMarketDayRange": "21.2901 - 22.7", "fullExchangeName": "NYSEArca", "averageDailyVolume3Month": 7904033, "fiftyTwoWeekLowChange": 4.42, "fiftyTwoWeekLowChangePercent": 0.25847954, "fiftyTwoWeekRange": "17.1 - 46.5", "fiftyTwoWeekHighChange": -24.98, "fiftyTwoWeekHighChangePercent": -0.53720427, "fiftyTwoWeekChangePercent": -38.654507, "dividendYield": 124.71, "trailingThreeMonthReturns": 13.21012, "trailingThreeMonthNavReturns": 13.21012, "netAssets": 3270944260.0, "fiftyDayAverageChange": -0.6623993, "fiftyDayAverageChangePercent": -0.02986148, "twoHundredDayAverageChange": -4.6210995, "twoHundredDayAverageChangePercent": -0.17677525, "netExpenseRatio": 0.99, "sourceInterval": 15, "exchangeDataDelayedBy": 0, "cryptoTradeable": false, "hasPrePostMarketData": true, "firstTradeDateMilliseconds": 1708612200000, "postMarketChangePercent": -0.18587786, "postMarketPrice": 21.48, "postMarketChange": -0.040000916, "regularMarketChange": -1.56, "exchange": "PCX", "messageBoardId": "finmb_1850981069", "exchangeTimezoneName": "America/New_York", "exchangeTimezoneShortName": "EDT", "gmtOffSetMilliseconds": -14400000, "market": "us_market", "esgPopulated": false, "trailingPegRatio": null}, "timestamp": "2025-05-24T13:52:58.456018"}

265
cli_manager.py Executable file
View File

@ -0,0 +1,265 @@
#!/usr/bin/env python3
import os
import sys
import time
import signal
import logging
import argparse
import subprocess
from pathlib import Path
from typing import Optional, Dict, Any
import psutil
class CLIManager:
"""CLI Manager for ETF Portal application.
Provides commands to:
- start: Launch the application
- stop: Gracefully stop the application
- restart: Stop and start the application
- status: Check if the application is running
"""
def __init__(self):
self.app_name = "ETF_Portal"
self.pid_file = Path("/var/run/etf-portal/etf_portal.pid")
self.log_file = Path("/var/log/etf-portal/cli_manager.log")
self._setup_logging()
def _setup_logging(self):
"""Configure logging for the CLI manager."""
self.log_file.parent.mkdir(parents=True, exist_ok=True)
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(self.log_file),
logging.StreamHandler()
]
)
self.logger = logging.getLogger("CLIManager")
def _get_pid(self) -> Optional[int]:
"""Get the PID from the PID file if it exists."""
try:
if self.pid_file.exists():
with open(self.pid_file, 'r') as f:
pid = int(f.read().strip())
return pid
except (ValueError, IOError) as e:
self.logger.error(f"Error reading PID file: {e}")
return None
def _save_pid(self, pid: int):
"""Save the PID to the PID file."""
try:
self.pid_file.parent.mkdir(parents=True, exist_ok=True)
with open(self.pid_file, 'w') as f:
f.write(str(pid))
except IOError as e:
self.logger.error(f"Error saving PID file: {e}")
def _is_process_running(self, pid: int) -> bool:
"""Check if a process with the given PID is running."""
try:
process = psutil.Process(pid)
# Check if it's a Python process running Streamlit
is_running = process.is_running()
is_streamlit = any('streamlit' in cmd.lower() for cmd in process.cmdline())
is_root = process.uids().real == 0 # Check if process is running as root
if is_running and is_streamlit:
if is_root:
self.logger.warning(f"Process {pid} is running as root")
return True
return False
except psutil.NoSuchProcess:
return False
except psutil.AccessDenied:
self.logger.warning(f"Access denied to process {pid}")
return False
def _find_streamlit_process(self) -> Optional[int]:
"""Find the main Streamlit process."""
for proc in psutil.process_iter(['pid', 'cmdline', 'uids']):
try:
if any('streamlit' in cmd.lower() for cmd in proc.info['cmdline']):
if proc.info['uids'].real == 0:
self.logger.warning(f"Found Streamlit process {proc.info['pid']} running as root")
return proc.info['pid']
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
return None
def start(self):
"""Start the ETF Portal application."""
# Check if Streamlit is already running
existing_pid = self._find_streamlit_process()
if existing_pid:
self.logger.info(f"{self.app_name} is already running with PID {existing_pid}")
self._save_pid(existing_pid)
return
try:
# Get the full path to streamlit in the virtual environment
venv_streamlit = os.path.join(os.path.dirname(sys.executable), "streamlit")
if not os.path.exists(venv_streamlit):
raise Exception(f"Streamlit not found at {venv_streamlit}")
# Ensure we're using the virtual environment's Python
env = os.environ.copy()
env["PATH"] = os.path.dirname(sys.executable) + ":" + env["PATH"]
# Start the application using streamlit
process = subprocess.Popen(
[venv_streamlit, "run", "ETF_Suite_Launcher.py", "--server.port=8500"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
start_new_session=True,
env=env,
preexec_fn=os.setsid # Create a new session
)
# Wait a moment for Streamlit to start
time.sleep(2)
# Find the actual Streamlit process
streamlit_pid = self._find_streamlit_process()
if streamlit_pid:
# Verify the process is running with correct permissions
try:
proc = psutil.Process(streamlit_pid)
if proc.uids().real == 0:
self.logger.error("Process started as root. Stopping...")
self.stop()
raise Exception("Process started with incorrect permissions")
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
self._save_pid(streamlit_pid)
self.logger.info(f"{self.app_name} started with PID {streamlit_pid}")
else:
raise Exception("Streamlit process not found after startup")
except Exception as e:
self.logger.error(f"Failed to start {self.app_name}: {e}")
sys.exit(1)
def stop(self):
"""Stop the ETF Portal application."""
# First check the PID file
pid = self._get_pid()
if not pid:
# If no PID file, try to find running Streamlit process
pid = self._find_streamlit_process()
if not pid:
self.logger.info(f"{self.app_name} is not running")
return
try:
if self._is_process_running(pid):
# Get the process using psutil
process = psutil.Process(pid)
# Check if process is running as root
if process.uids().real == 0:
self.logger.error(f"Cannot stop process {pid} - it is running as root")
self.logger.info("Please use sudo to stop the process:")
self.logger.info(f"sudo kill {pid}")
return
# Try to terminate the process and its children
try:
# Get all child processes
children = process.children(recursive=True)
# Terminate children first
for child in children:
try:
child.terminate()
except psutil.NoSuchProcess:
pass
# Terminate the main process
process.terminate()
# Wait for processes to terminate
gone, alive = psutil.wait_procs([process] + children, timeout=10)
# If any processes are still alive, force kill them
for p in alive:
try:
p.kill()
except psutil.NoSuchProcess:
pass
self.logger.info(f"{self.app_name} stopped")
except psutil.NoSuchProcess:
self.logger.info(f"{self.app_name} is not running")
else:
self.logger.info(f"{self.app_name} is not running")
# Remove PID file
if self.pid_file.exists():
self.pid_file.unlink()
except Exception as e:
self.logger.error(f"Error stopping {self.app_name}: {e}")
# Don't exit with error, just log it
self.logger.info("Attempting to clean up PID file...")
if self.pid_file.exists():
self.pid_file.unlink()
def restart(self):
"""Restart the ETF Portal application."""
self.logger.info("Restarting ETF Portal...")
self.stop()
time.sleep(2) # Wait for processes to fully stop
self.start()
def status(self):
"""Check the status of the ETF Portal application."""
# First check the PID file
pid = self._get_pid()
if not pid:
# If no PID file, try to find running Streamlit process
pid = self._find_streamlit_process()
if not pid:
print(f"{self.app_name} is not running")
return
if self._is_process_running(pid):
print(f"{self.app_name} is running with PID {pid}")
else:
print(f"{self.app_name} is not running (stale PID file)")
if self.pid_file.exists():
self.pid_file.unlink()
def main():
"""Main entry point for the CLI."""
parser = argparse.ArgumentParser(
description="ETF Portal CLI Manager",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
etf-portal start # Start the ETF Portal application
etf-portal stop # Stop the ETF Portal application
etf-portal restart # Restart the ETF Portal application
etf-portal status # Check the status of the ETF Portal application
"""
)
parser.add_argument(
"command",
choices=["start", "stop", "restart", "status"],
help="Command to execute"
)
args = parser.parse_args()
manager = CLIManager()
# Execute the requested command
getattr(manager, args.command)()
if __name__ == "__main__":
main()

View File

@ -0,0 +1,8 @@
{
"ports": {
"launcher": 8500,
"portfolio_builder": 8501,
"analyzer": 8502
},
"streamlit_path": "/home/pascal/Dev/ETF_Portal/venv/bin/streamlit"
}

View File

@ -6,6 +6,8 @@ services:
command: streamlit run ETF_Suite_Launcher.py --server.port=8500
volumes:
- .:/app
ports:
- "8500:8500"
networks:
- etf_network
environment:
@ -16,6 +18,8 @@ services:
- STREAMLIT_SERVER_ENABLE_CORS=true
- STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION=false
- STREAMLIT_SERVER_BASE_URL_PATH=
env_file:
- .env
restart: unless-stopped
# etf-portfolio:

View File

@ -0,0 +1,162 @@
# Dividend Trend Analysis Framework
## 1. Theoretical Foundations
### 1.1 Gordon Growth Model
- Basic formula: P = D/(r-g)
- Where:
- P = Price
- D = Dividend
- r = Required rate of return
- g = Growth rate
- Application: Use to validate dividend sustainability
### 1.2 Dividend Discount Model (DDM)
- Multi-stage DDM for ETFs with varying growth phases
- Terminal value calculation using industry averages
- Sensitivity analysis for different growth scenarios
### 1.3 Modern Portfolio Theory (MPT)
- Dividend yield as a risk factor
- Correlation with market returns
- Beta calculation specific to dividend-paying securities
## 2. Empirical Analysis Framework
### 2.1 Historical Data Analysis
- Rolling 12-month dividend growth rates
- Year-over-year comparisons
- Seasonality analysis
- Maximum drawdown during dividend cuts
### 2.2 Statistical Measures
- Mean reversion analysis
- Volatility clustering
- Autocorrelation of dividend payments
- Skewness and kurtosis of dividend distributions
### 2.3 Machine Learning Components
- Time series forecasting (ARIMA/SARIMA)
- Random Forest for feature importance
- Gradient Boosting for non-linear relationships
- Clustering for similar ETF behavior
## 3. Risk Assessment Framework
### 3.1 Quantitative Risk Metrics
- Dividend Coverage Ratio
- Payout Ratio
- Free Cash Flow to Dividend Ratio
- Interest Coverage Ratio
### 3.2 Market Risk Factors
- Interest Rate Sensitivity
- Credit Spread Impact
- Market Volatility Correlation
- Sector-Specific Risks
### 3.3 Structural Risk Analysis
- ETF Structure (Physical vs Synthetic)
- Tracking Error
- Liquidity Risk
- Counterparty Risk
## 4. Implementation Guidelines
### 4.1 Data Requirements
- Minimum 5 years of historical data
- Monthly dividend payments
- NAV/Price history
- Trading volume
- AUM (Assets Under Management)
### 4.2 Calculation Methodology
```python
def calculate_dividend_trend(etf_data: Dict) -> Dict:
"""
Calculate comprehensive dividend trend analysis
Returns:
{
'gordon_growth': float, # Growth rate from Gordon model
'ddm_value': float, # Value from DDM
'empirical_metrics': {
'rolling_growth': float,
'volatility': float,
'autocorrelation': float
},
'risk_metrics': {
'coverage_ratio': float,
'payout_ratio': float,
'market_correlation': float
},
'ml_predictions': {
'next_year_growth': float,
'confidence_interval': Tuple[float, float]
}
}
"""
pass
```
### 4.3 Validation Framework
- Backtesting against historical data
- Cross-validation with similar ETFs
- Stress testing under market conditions
- Sensitivity analysis of parameters
## 5. Practical Considerations
### 5.1 ETF-Specific Adjustments
- New ETFs (< 2 years): Use peer comparison
- Established ETFs: Focus on historical patterns
- Sector ETFs: Consider industry cycles
- Global ETFs: Account for currency effects
### 5.2 Market Conditions
- Interest rate environment
- Economic cycle position
- Sector rotation impact
- Market sentiment indicators
### 5.3 Reporting Standards
- Clear confidence intervals
- Multiple scenario analysis
- Risk factor decomposition
- Historical comparison benchmarks
## 6. Continuous Improvement
### 6.1 Performance Monitoring
- Track prediction accuracy
- Monitor model drift
- Update parameters quarterly
- Validate against new data
### 6.2 Model Updates
- Incorporate new market data
- Adjust for structural changes
- Update peer comparisons
- Refine risk parameters
## 7. Implementation Roadmap
1. Phase 1: Basic Implementation
- Gordon Growth Model
- Historical trend analysis
- Basic risk metrics
2. Phase 2: Advanced Features
- Machine Learning components
- Market risk factors
- Structural analysis
3. Phase 3: Optimization
- Parameter tuning
- Performance validation
- Reporting improvements
4. Phase 4: Maintenance
- Regular updates
- Performance monitoring
- Model refinement

415
etf_suite_cli.py Normal file → Executable file
View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
"""
ETF Suite CLI
@ -24,7 +24,7 @@ from datetime import datetime
# Path configurations
WORKSPACE_PATH = Path(__file__).parent
LAUNCHER_PATH = WORKSPACE_PATH / "ETF_Suite_Launcher.py"
PORTFOLIO_BUILDER_PATH = WORKSPACE_PATH / "pages" / "ETF_Dividend_Portfolio_Builder.py"
PORTFOLIO_BUILDER_PATH = WORKSPACE_PATH / "pages" / "ETF_Portfolio_Builder.py"
ANALYZER_PATH = WORKSPACE_PATH / "pages" / "ETF_Analyzer.py"
CONFIG_DIR = WORKSPACE_PATH / "config"
CONFIG_FILE = CONFIG_DIR / "etf_suite_config.json"
@ -38,7 +38,7 @@ DEFAULT_PORTS = {
}
# Full path to streamlit executable - may need to be adjusted
STREAMLIT_PATH = "/home/pascal/.local/bin/streamlit"
STREAMLIT_PATH = "/home/pascal/Dev/ETF_Portal/venv/bin/streamlit"
# Process tracking
active_processes = {}
@ -72,12 +72,14 @@ def ensure_config_exists():
"streamlit_path": STREAMLIT_PATH
}, f, indent=2)
def get_config():
"""Get the configuration from the config file."""
ensure_config_exists()
with open(CONFIG_FILE, 'r') as f:
return json.load(f)
def update_config(key, value):
"""Update a specific configuration value."""
config = get_config()
@ -94,6 +96,7 @@ def update_config(key, value):
with open(CONFIG_FILE, 'w') as f:
json.dump(config, f, indent=2)
def cleanup_streamlit_processes():
"""Kill any existing Streamlit processes to prevent conflicts."""
click.echo("Cleaning up existing Streamlit processes...")
@ -101,22 +104,56 @@ def cleanup_streamlit_processes():
try:
config = get_config()
ports = config["ports"]
processed_pids = set() # Track PIDs we've already handled
# Find processes using our target ports
# First, find and kill processes using our target ports
for port in ports.values():
cmd = f"lsof -i :{port} | grep LISTEN | awk '{{print $2}}' | xargs kill -9 2>/dev/null || true"
subprocess.run(cmd, shell=True)
# Find and kill any lingering Streamlit processes
cmd = "pkill -f streamlit || true"
subprocess.run(cmd, shell=True)
try:
# Find process using the port
cmd = f"lsof -i :{port} | grep LISTEN | awk '{{print $2}}'"
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
if result.stdout.strip():
pids = result.stdout.strip().split('\n')
for pid in pids:
pid = int(pid)
if pid not in processed_pids:
try:
os.kill(pid, signal.SIGTERM)
logger.info(f"Terminated process {pid} using port {port}")
processed_pids.add(pid)
except ProcessLookupError:
pass
except Exception as e:
logger.error(f"Error cleaning up port {port}: {e}")
# Then find and kill any remaining Streamlit processes
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
try:
if proc.info['pid'] not in processed_pids and 'streamlit' in ' '.join(proc.info['cmdline'] or []).lower():
proc.terminate()
logger.info(f"Terminated Streamlit process {proc.info['pid']}")
processed_pids.add(proc.info['pid'])
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
pass
# Give processes time to shut down
time.sleep(1)
time.sleep(2)
# Force kill any remaining processes
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
try:
if proc.info['pid'] not in processed_pids and 'streamlit' in ' '.join(proc.info['cmdline'] or []).lower():
proc.kill()
logger.info(f"Force killed Streamlit process {proc.info['pid']}")
processed_pids.add(proc.info['pid'])
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
pass
except Exception as e:
logger.error(f"Error during cleanup: {e}")
click.echo(f"Warning during cleanup: {e}")
def port_is_available(port):
"""Check if a port is available."""
try:
@ -126,12 +163,14 @@ def port_is_available(port):
except socket.error:
return False
def open_browser(url, delay=3):
"""Open browser after a delay to ensure app is running."""
time.sleep(delay)
click.echo(f"Opening browser to {url}")
webbrowser.open(url)
def start_component(component: str, open_browser_tab=True, background=False) -> Optional[subprocess.Popen]:
"""Start a specific component of the ETF Suite."""
config = get_config()
@ -192,95 +231,146 @@ def start_component(component: str, open_browser_tab=True, background=False) ->
streamlit_path, "run",
str(component_config["path"]),
"--server.port", str(port),
"--server.address", "0.0.0.0",
"--server.headless", "true",
"--browser.gatherUsageStats", "false"
"--server.fileWatcherType", "none" # Disable file watcher to prevent inotify issues
]
# Add component-specific options
if component == "portfolio_builder":
cmd.extend(["--server.baseUrlPath", "/portfolio"])
elif component == "analyzer":
cmd.extend(["--server.baseUrlPath", "/analyzer"])
try:
if background:
with open(log_file, 'w') as f:
process = subprocess.Popen(
cmd,
stdout=f,
stderr=f,
start_new_session=True
)
else:
# Launch the component
if background:
# In background mode, redirect output to log file
with open(log_file, 'w') as log:
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True
stdout=log,
stderr=log,
# Make the process independent of the parent
start_new_session=True
)
else:
# In foreground mode, just run normally
process = subprocess.Popen(cmd)
# Store process for tracking
active_processes[component] = {
"process": process,
"port": port,
"pid": process.pid,
"background": background
}
# Open browser pointing to the component
if open_browser_tab:
thread = threading.Thread(
target=open_browser,
args=(f"http://localhost:{port}",)
)
thread.daemon = True
thread.start()
# If running in background, we don't need to wait
if background:
logger.info(f"Started {component} in background mode (PID: {process.pid})")
# Give it a moment to start
time.sleep(1)
active_processes[component] = process
if open_browser_tab and not background:
url = f"http://localhost:{port}"
threading.Thread(target=open_browser, args=(url,)).start()
return process
except Exception as e:
error_msg = f"Error starting {component}: {e}"
logger.error(error_msg)
click.echo(error_msg)
return None
# Check if the process is still running
if process.poll() is not None:
error_msg = f"Error: {component} failed to start in background mode"
logger.error(error_msg)
click.echo(error_msg)
return None
return process
def get_streamlit_processes() -> List[Dict]:
"""Get information about running Streamlit processes."""
"""Get a list of running Streamlit processes."""
processes = []
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
try:
if 'streamlit' in ' '.join(proc.info['cmdline'] or []):
cmdline = proc.info['cmdline']
if cmdline and 'streamlit' in ' '.join(cmdline):
port = None
component = None
# Extract port from command line
for i, arg in enumerate(cmdline):
if arg == '--server.port' and i + 1 < len(cmdline):
port = cmdline[i + 1]
# Identify which component based on file path
for i, arg in enumerate(cmdline):
if arg == 'run' and i + 1 < len(cmdline):
path = cmdline[i + 1]
if 'ETF_Suite_Launcher.py' in path:
component = 'launcher'
elif 'ETF_Portfolio_Builder.py' in path:
component = 'portfolio_builder'
elif 'ETF_Analyzer.py' in path:
component = 'analyzer'
processes.append({
'pid': proc.info['pid'],
'name': proc.info['name'],
'cmdline': ' '.join(proc.info['cmdline'] or [])
'port': port,
'component': component,
'cmdline': ' '.join(cmdline if cmdline else [])
})
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
return processes
def stop_component(component=None, pid=None):
"""Stop a specific component or process."""
"""Stop a specific component or Streamlit process."""
if pid:
try:
os.kill(pid, signal.SIGTERM)
click.echo(f"Stopped process {pid}")
return
except ProcessLookupError:
click.echo(f"Process {pid} not found")
return
if component == "all":
cleanup_streamlit_processes()
return
if component in active_processes:
process = active_processes[component]
try:
process = psutil.Process(pid)
process.terminate()
process.wait(timeout=5)
click.echo(f"Stopped {component}")
except subprocess.TimeoutExpired:
process.kill()
click.echo(f"Force killed {component}")
del active_processes[component]
else:
click.echo(f"No active process found for {component}")
try:
process.wait(timeout=5)
except psutil.TimeoutExpired:
process.kill()
click.echo(f"Stopped process with PID {pid}")
logger.info(f"Stopped process with PID {pid}")
return True
except psutil.NoSuchProcess:
click.echo(f"No process found with PID {pid}")
return False
elif component:
# Check active tracked processes first
if component in active_processes:
process_info = active_processes[component]
try:
process = process_info["process"]
process.terminate()
try:
process.wait(timeout=5)
except subprocess.TimeoutExpired:
process.kill()
click.echo(f"Stopped {component}")
logger.info(f"Stopped {component}")
del active_processes[component]
return True
except Exception:
pass
# Fall back to finding by component name in running processes
processes = get_streamlit_processes()
for proc in processes:
if proc['component'] == component:
return stop_component(pid=proc['pid'])
click.echo(f"No running {component} process found")
return False
@click.group()
def cli():
"""ETF Suite CLI - Manage your ETF Suite applications."""
"""ETF Suite CLI - Manage the ETF Suite application."""
pass
@cli.command()
@click.option('--component', type=click.Choice(['launcher', 'portfolio_builder', 'analyzer', 'all']),
default='launcher', help='Component to start')
@ -288,11 +378,39 @@ def cli():
@click.option('--background', is_flag=True, help="Run in background mode (daemon)")
def start(component, no_browser, background):
"""Start ETF Suite components."""
if component == "all":
for comp in ['launcher', 'portfolio_builder', 'analyzer']:
start_component(comp, not no_browser, background)
if component == 'all':
# Start launcher first, it will manage the other components
process = start_component('launcher', not no_browser, background)
if not process:
return
else:
start_component(component, not no_browser, background)
process = start_component(component, not no_browser, background)
if not process:
return
click.echo(f"Started {component}" + (" in background mode" if background else ""))
# In background mode, we just return immediately
if background:
return
# In foreground mode, wait for the process
click.echo("Press Ctrl+C to exit")
# Keep running until interrupted
try:
if component == 'all' or component == 'launcher':
process.wait()
else:
# For individual components, we'll just exit
return
except KeyboardInterrupt:
click.echo("\nShutting down...")
if component == 'all':
stop_component('launcher')
else:
stop_component(component)
@cli.command()
@click.option('--component', type=click.Choice(['launcher', 'portfolio_builder', 'analyzer', 'all']),
@ -300,7 +418,15 @@ def start(component, no_browser, background):
@click.option('--pid', type=int, help='Process ID to stop')
def stop(component, pid):
"""Stop ETF Suite components."""
stop_component(component, pid)
if pid:
stop_component(pid=pid)
elif component == 'all':
cleanup_streamlit_processes()
click.echo("Stopped all ETF Suite components")
logger.info("Stopped all ETF Suite components")
else:
stop_component(component)
@cli.command()
@click.option('--component', type=click.Choice(['launcher', 'portfolio_builder', 'analyzer', 'all']),
@ -308,67 +434,120 @@ def stop(component, pid):
@click.option('--no-browser', is_flag=True, help="Don't open browser automatically")
@click.option('--background', is_flag=True, help="Run in background mode (daemon)")
def restart(component, no_browser, background):
"""Restart ETF Suite components."""
if component == "all":
stop_component("all")
time.sleep(2)
for comp in ['launcher', 'portfolio_builder', 'analyzer']:
start_component(comp, not no_browser, background)
"""Restart ETF Suite components (stop and then start)."""
# First stop the components
if component == 'all':
cleanup_streamlit_processes()
click.echo("Stopped all ETF Suite components")
logger.info("Stopped all ETF Suite components")
else:
stop_component(component)
time.sleep(2)
start_component(component, not no_browser, background)
# Give processes time to fully shut down
time.sleep(2)
# Then start them again
if component == 'all':
# Start launcher first, it will manage the other components
process = start_component('launcher', not no_browser, background)
if not process:
return
else:
process = start_component(component, not no_browser, background)
if not process:
return
click.echo(f"Restarted {component}" + (" in background mode" if background else ""))
# In background mode, we just return immediately
if background:
return
# In foreground mode, wait for the process
click.echo("Press Ctrl+C to exit")
# Keep running until interrupted
try:
if component == 'all' or component == 'launcher':
process.wait()
else:
# For individual components, we'll just exit
return
except KeyboardInterrupt:
click.echo("\nShutting down...")
if component == 'all':
stop_component('launcher')
else:
stop_component(component)
@cli.command()
def status():
"""Show status of ETF Suite components."""
"""Check status of ETF Suite components."""
processes = get_streamlit_processes()
if not processes:
click.echo("No ETF Suite components are running")
click.echo("No ETF Suite processes are currently running.")
return
click.echo("Running components:")
for proc in processes:
click.echo(f"PID: {proc['pid']}")
click.echo(f"Command: {proc['cmdline']}")
click.echo("---")
click.echo("Running ETF Suite processes:")
for i, proc in enumerate(processes):
component = proc['component'] or 'unknown'
port = proc['port'] or 'unknown'
click.echo(f"{i+1}. {component.upper()} (PID: {proc['pid']}, Port: {port})")
@cli.command()
@click.option('--key', required=True, help='Configuration key to update (e.g., ports.launcher)')
@click.option('--value', required=True, help='New value')
def config(key, value):
"""Update ETF Suite configuration."""
"""View or update configuration."""
try:
# Convert value to appropriate type
if value.isdigit():
# Convert value to integer if possible
try:
value = int(value)
elif value.lower() in ('true', 'false'):
value = value.lower() == 'true'
except ValueError:
pass
update_config(key, value)
click.echo(f"Updated {key} to {value}")
logger.info(f"Updated configuration: {key}={value}")
except Exception as e:
click.echo(f"Error updating config: {e}")
error_msg = f"Error updating configuration: {e}"
logger.error(error_msg)
click.echo(error_msg)
@cli.command()
def logs():
"""Show recent logs."""
try:
log_files = sorted(LOGS_DIR.glob("*.log"), key=lambda x: x.stat().st_mtime, reverse=True)
if not log_files:
click.echo("No log files found")
return
latest_log = log_files[0]
click.echo(f"Showing last 20 lines of {latest_log.name}:")
click.echo("---")
with open(latest_log) as f:
lines = f.readlines()
for line in lines[-20:]:
click.echo(line.strip())
except Exception as e:
click.echo(f"Error reading logs: {e}")
"""Show recent logs from ETF Suite components."""
LOGS_DIR.mkdir(exist_ok=True)
log_files = sorted(LOGS_DIR.glob("*.log"), key=os.path.getmtime, reverse=True)
if not log_files:
click.echo("No log files found.")
return
click.echo("Recent log files:")
for i, log_file in enumerate(log_files[:5]): # Show 5 most recent logs
size = os.path.getsize(log_file) / 1024 # Size in KB
modified = datetime.fromtimestamp(os.path.getmtime(log_file)).strftime('%Y-%m-%d %H:%M:%S')
click.echo(f"{i+1}. {log_file.name} ({size:.1f} KB, last modified: {modified})")
# Show most recent log contents
if log_files:
most_recent = log_files[0]
click.echo(f"\nMost recent log ({most_recent.name}):")
try:
# Show last 20 lines of the most recent log
with open(most_recent, 'r') as f:
lines = f.readlines()
for line in lines[-20:]:
click.echo(line.strip())
except Exception as e:
click.echo(f"Error reading log file: {e}")
if __name__ == '__main__':
if __name__ == "__main__":
cli()

View File

@ -1,3 +1,11 @@
# Set page config first, before any other Streamlit commands
st.set_page_config(
page_title="ETF Analyzer",
page_icon="📊",
layout="wide",
initial_sidebar_state="expanded"
)
"""
ETF Analyzer - Comprehensive ETF Analysis Tool
@ -21,7 +29,46 @@ import time
from typing import Dict, List, Tuple, Any, Optional, Union
import sys
import yfinance as yf
from dotenv import load_dotenv
import logging
# Load environment variables
load_dotenv()
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# FMP API configuration
FMP_API_KEY = st.session_state.get('fmp_api_key', os.getenv('FMP_API_KEY', ''))
FMP_BASE_URL = "https://financialmodelingprep.com/api/v3"
def test_fmp_connection():
"""Test the FMP API connection and display status."""
try:
if not FMP_API_KEY:
return False, "No API key found"
session = get_fmp_session()
test_url = f"{FMP_BASE_URL}/profile/AAPL?apikey={FMP_API_KEY}"
response = session.get(test_url)
if response.status_code == 200:
data = response.json()
if data and isinstance(data, list) and len(data) > 0:
return True, "Connected"
return False, f"Error: {response.status_code}"
except Exception as e:
return False, f"Error: {str(e)}"
# Add FMP connection status to the navigation bar
st.sidebar.markdown("---")
st.sidebar.subheader("FMP API Status")
connection_status, message = test_fmp_connection()
if connection_status:
st.sidebar.success(f"✅ FMP API: {message}")
else:
st.sidebar.error(f"❌ FMP API: {message}")
# --- Constants and Settings ---
CACHE_DIR = Path("cache")

File diff suppressed because it is too large Load Diff

235
pages/cache_manager.py Normal file
View File

@ -0,0 +1,235 @@
import json
import os
from datetime import datetime, timedelta
from pathlib import Path
import logging
from typing import Any, Dict, Optional, Tuple, Union
import hashlib
import sys
# Configure logging
log_dir = Path("logs")
log_dir.mkdir(exist_ok=True)
log_file = log_dir / f"cache_manager_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
# Remove any existing handlers to avoid duplicate logs
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
# Create a formatter
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
# Create file handler
file_handler = logging.FileHandler(log_file, mode='a')
file_handler.setFormatter(formatter)
file_handler.setLevel(logging.INFO)
# Create console handler
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(formatter)
console_handler.setLevel(logging.INFO)
# Configure root logger
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger.addHandler(file_handler)
logger.addHandler(console_handler)
class CacheManager:
"""
Manages caching of ETF data to reduce API calls and improve performance.
Implements a time-based cache expiration system.
"""
def __init__(self, cache_dir: str = "cache", cache_duration_hours: int = 24):
"""
Initialize the cache manager.
Args:
cache_dir: Directory to store cache files
cache_duration_hours: Number of hours before cache expires
"""
self.cache_dir = Path(cache_dir)
self.cache_duration = timedelta(hours=cache_duration_hours)
# Create cache directory if it doesn't exist
self.cache_dir.mkdir(parents=True, exist_ok=True)
# Configure logging
self.logger = logging.getLogger(__name__)
self.logger.setLevel(logging.INFO)
# Create formatter
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
# Create file handler
log_file = Path("logs") / f"cache_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
log_file.parent.mkdir(exist_ok=True)
file_handler = logging.FileHandler(log_file)
file_handler.setFormatter(formatter)
self.logger.addHandler(file_handler)
self.logger.info(f"CacheManager initialized with directory: {self.cache_dir}")
self.logger.info(f"Cache duration: {cache_duration_hours} hours")
def _generate_cache_key(self, source: str, ticker: str, data_type: str) -> str:
"""
Generate a unique cache key for the data.
Args:
source: Data source (e.g., 'fmp', 'yf')
ticker: ETF ticker symbol
data_type: Type of data (e.g., 'profile', 'history')
Returns:
Cache key string
"""
return f"{source}_{ticker}_{data_type}.json"
def _get_cache_path(self, cache_key: str) -> Path:
"""
Get the full path for a cache file.
Args:
cache_key: Cache key string
Returns:
Path object for the cache file
"""
return self.cache_dir / cache_key
def _is_cache_valid(self, cache_path: Path) -> bool:
"""
Check if a cache file is still valid based on its age.
Args:
cache_path: Path to the cache file
Returns:
True if cache is valid, False otherwise
"""
if not cache_path.exists():
return False
file_age = datetime.now() - datetime.fromtimestamp(cache_path.stat().st_mtime)
is_valid = file_age < self.cache_duration
self.logger.debug(f"Cache file {cache_path} age: {file_age}, valid: {is_valid}")
return is_valid
def save_to_cache(self, cache_key: str, data: Any) -> bool:
"""
Save data to cache.
Args:
cache_key: Cache key string
data: Data to cache
Returns:
True if save was successful, False otherwise
"""
try:
cache_path = self._get_cache_path(cache_key)
# Create cache directory if it doesn't exist
cache_path.parent.mkdir(parents=True, exist_ok=True)
# Save data to JSON file
with open(cache_path, 'w') as f:
json.dump(data, f, indent=2)
self.logger.info(f"Data saved to cache: {cache_path}")
return True
except Exception as e:
self.logger.error(f"Error saving to cache: {str(e)}")
return False
def load_from_cache(self, cache_key: str) -> Tuple[Optional[Any], bool]:
"""
Load data from cache if it exists and is valid.
Args:
cache_key: Cache key string
Returns:
Tuple of (cached data, is_valid)
"""
try:
cache_path = self._get_cache_path(cache_key)
if not cache_path.exists():
self.logger.debug(f"Cache miss: {cache_path}")
return None, False
if not self._is_cache_valid(cache_path):
self.logger.info(f"Cache expired: {cache_path}")
return None, False
# Load data from JSON file
with open(cache_path, 'r') as f:
data = json.load(f)
self.logger.info(f"Data loaded from cache: {cache_path}")
return data, True
except Exception as e:
self.logger.error(f"Error loading from cache: {str(e)}")
return None, False
def clear_expired_cache(self) -> int:
"""
Clear all expired cache files.
Returns:
Number of files cleared
"""
try:
cleared_count = 0
for cache_file in self.cache_dir.glob("*.json"):
if not self._is_cache_valid(cache_file):
cache_file.unlink()
cleared_count += 1
self.logger.info(f"Cleared expired cache: {cache_file}")
self.logger.info(f"Cleared {cleared_count} expired cache files")
return cleared_count
except Exception as e:
self.logger.error(f"Error clearing expired cache: {str(e)}")
return 0
def get_cache_stats(self) -> Dict[str, Any]:
"""
Get statistics about the cache.
Returns:
Dictionary with cache statistics
"""
try:
total_files = 0
expired_files = 0
total_size = 0
for cache_file in self.cache_dir.glob("*.json"):
total_files += 1
total_size += cache_file.stat().st_size
if not self._is_cache_valid(cache_file):
expired_files += 1
stats = {
"total_files": total_files,
"expired_files": expired_files,
"total_size_bytes": total_size,
"cache_dir": str(self.cache_dir),
"cache_duration_hours": self.cache_duration.total_seconds() / 3600
}
self.logger.info(f"Cache statistics: {stats}")
return stats
except Exception as e:
self.logger.error(f"Error getting cache stats: {str(e)}")
return {}

29
portfolios/Testing1.json Normal file
View File

@ -0,0 +1,29 @@
{
"name": "Testing1",
"created_at": "2025-05-25T15:29:13.669593",
"mode": "Income Target",
"target": 1000.0,
"allocations": [
{
"ticker": "MSTY",
"allocation": 70.0,
"yield": 142.7343086361491,
"price": 21.19,
"risk_level": "High"
},
{
"ticker": "ULTY",
"allocation": 20.0,
"yield": 45.937467700258395,
"price": 19.35,
"risk_level": "Medium"
},
{
"ticker": "CONY",
"allocation": 10.0,
"yield": 67.58371269600406,
"price": 19.77,
"risk_level": "Medium"
}
]
}

View File

@ -16,4 +16,5 @@ plotly>=5.14.1
requests>=2.31.0
reportlab>=3.6.13
psutil>=5.9.0
click>=8.1.0
click>=8.1.0
yfinance>=0.2.36

20
requirements.txt.backup Normal file
View File

@ -0,0 +1,20 @@
openai
langchain
langchain_openai
chromadb
docx2txt
pypdf
streamlit>=1.28.0
tiktoken
pdfkit
pandas>=1.5.3
numpy>=1.24.3
matplotlib>=3.7.1
seaborn>=0.12.2
fmp-python>=0.1.5
plotly>=5.14.1
requests>=2.31.0
reportlab>=3.6.13
psutil>=5.9.0
click>=8.1.0
yfinance>=0.2.36

71
scripts/setup_secrets.py Normal file
View File

@ -0,0 +1,71 @@
import os
import sys
import logging
from pathlib import Path
def setup_secrets():
"""Set up secrets for the ETF Portal application."""
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
try:
# Get the FMP API key from user
fmp_api_key = input("Enter your FMP API key: ").strip()
if not fmp_api_key:
logger.error("❌ FMP API key is required")
return False
# Create .streamlit directory if it doesn't exist
streamlit_dir = Path(".streamlit")
streamlit_dir.mkdir(exist_ok=True)
# Create secrets.toml
secrets_path = streamlit_dir / "secrets.toml"
with open(secrets_path, "w") as f:
f.write(f'# FMP API Configuration\n')
f.write(f'FMP_API_KEY = "{fmp_api_key}"\n\n')
f.write(f'# Cache Configuration\n')
f.write(f'CACHE_DURATION_HOURS = 24\n')
# Set proper permissions
secrets_path.chmod(0o600) # Only owner can read/write
logger.info(f"✅ Secrets file created at {secrets_path}")
logger.info("🔒 File permissions set to 600 (owner read/write only)")
# Create cache directories
cache_dirs = [
Path("cache/FMP_cache"),
Path("cache/yfinance_cache")
]
for cache_dir in cache_dirs:
cache_dir.mkdir(parents=True, exist_ok=True)
cache_dir.chmod(0o755) # Owner can read/write/execute, others can read/execute
logger.info(f"✅ Cache directory created: {cache_dir}")
return True
except Exception as e:
logger.error(f"❌ Setup failed: {str(e)}")
return False
if __name__ == "__main__":
print("🔧 ETF Portal Secrets Setup")
print("===========================")
print("This script will help you set up the secrets for the ETF Portal application.")
print("Make sure you have your FMP API key ready.")
print()
success = setup_secrets()
if success:
print("\n✅ Setup completed successfully!")
print("\nNext steps:")
print("1. Run the test script to verify the configuration:")
print(" python -m ETF_Portal.tests.test_api_config")
print("\n2. If you're deploying to a server, make sure to:")
print(" - Set the secrets in your Streamlit dashboard")
print(" - Create the cache directories with proper permissions")
else:
print("\n❌ Setup failed. Check the logs for details.")
sys.exit(1)

102
scripts/setup_vps.py Normal file
View File

@ -0,0 +1,102 @@
import os
import sys
import logging
from pathlib import Path
import subprocess
def setup_vps_environment():
"""Set up the environment for the ETF Portal on VPS."""
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
try:
# Get the FMP API key from user
fmp_api_key = input("Enter your FMP API key: ").strip()
if not fmp_api_key:
logger.error("❌ FMP API key is required")
return False
# Add to environment file
env_file = Path("/etc/environment")
if not env_file.exists():
env_file = Path.home() / ".bashrc"
# Check if key already exists
with open(env_file, 'r') as f:
content = f.read()
if f"FMP_API_KEY={fmp_api_key}" not in content:
with open(env_file, 'a') as f:
f.write(f'\n# ETF Portal Configuration\n')
f.write(f'export FMP_API_KEY="{fmp_api_key}"\n')
logger.info(f"✅ Added FMP_API_KEY to {env_file}")
else:
logger.info("✅ FMP_API_KEY already exists in environment file")
# Create cache directories
cache_dirs = [
Path("cache/FMP_cache"),
Path("cache/yfinance_cache")
]
for cache_dir in cache_dirs:
cache_dir.mkdir(parents=True, exist_ok=True)
cache_dir.chmod(0o755) # Owner can read/write/execute, others can read/execute
logger.info(f"✅ Cache directory created: {cache_dir}")
# Set up systemd service (if needed)
if input("Do you want to set up a systemd service for the ETF Portal? (y/n): ").lower() == 'y':
service_content = f"""[Unit]
Description=ETF Portal Streamlit App
After=network.target
[Service]
User={os.getenv('USER')}
WorkingDirectory={Path.cwd()}
Environment="FMP_API_KEY={fmp_api_key}"
ExecStart=/usr/local/bin/streamlit run ETF_Portal/pages/ETF_Analyzer.py
Restart=always
[Install]
WantedBy=multi-user.target
"""
service_path = Path("/etc/systemd/system/etf-portal.service")
if not service_path.exists():
with open(service_path, 'w') as f:
f.write(service_content)
logger.info("✅ Created systemd service file")
# Reload systemd and enable service
subprocess.run(["sudo", "systemctl", "daemon-reload"])
subprocess.run(["sudo", "systemctl", "enable", "etf-portal"])
logger.info("✅ Enabled ETF Portal service")
else:
logger.info("✅ Service file already exists")
return True
except Exception as e:
logger.error(f"❌ Setup failed: {str(e)}")
return False
if __name__ == "__main__":
print("🔧 ETF Portal VPS Setup")
print("======================")
print("This script will help you set up the ETF Portal on your VPS.")
print("Make sure you have your FMP API key ready.")
print()
success = setup_vps_environment()
if success:
print("\n✅ Setup completed successfully!")
print("\nNext steps:")
print("1. Source your environment file:")
print(" source /etc/environment # or source ~/.bashrc")
print("\n2. Run the test script to verify the configuration:")
print(" python -m ETF_Portal.tests.test_api_config")
print("\n3. If you set up the systemd service:")
print(" sudo systemctl start etf-portal")
print(" sudo systemctl status etf-portal # Check status")
else:
print("\n❌ Setup failed. Check the logs for details.")
sys.exit(1)

View File

@ -0,0 +1,4 @@
from .service import DripService
from .models import DripConfig, DripResult, MonthlyData, PortfolioAllocation
__all__ = ['DripService', 'DripConfig', 'DripResult', 'MonthlyData', 'PortfolioAllocation']

View File

@ -0,0 +1,23 @@
import logging
import sys
def setup_logger():
# Create logger
logger = logging.getLogger('drip_service')
logger.setLevel(logging.DEBUG)
# Create console handler with formatting
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(logging.DEBUG)
# Create formatter
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
console_handler.setFormatter(formatter)
# Add handler to logger
logger.addHandler(console_handler)
return logger
# Create logger instance
logger = setup_logger()

View File

@ -0,0 +1,46 @@
from dataclasses import dataclass
from typing import Dict, List, Optional
from datetime import datetime
@dataclass
class PortfolioAllocation:
ticker: str
price: float
yield_annual: float
initial_shares: float
initial_allocation: float
distribution: str
@dataclass
class MonthlyData:
month: int
total_value: float
monthly_income: float
cumulative_income: float
shares: Dict[str, float]
prices: Dict[str, float]
yields: Dict[str, float]
@dataclass
class DripConfig:
months: int
erosion_type: str
erosion_level: Dict
dividend_frequency: Dict[str, int] = None
def __post_init__(self):
if self.dividend_frequency is None:
self.dividend_frequency = {
"Monthly": 12,
"Quarterly": 4,
"Semi-Annually": 2,
"Annually": 1,
"Unknown": 12 # Default to monthly if unknown
}
@dataclass
class DripResult:
monthly_data: List[MonthlyData]
final_portfolio_value: float
total_income: float
total_shares: Dict[str, float]

View File

@ -0,0 +1,455 @@
from typing import Dict, List, Optional, Tuple, Any
import pandas as pd
import numpy as np
import traceback
from dataclasses import dataclass, field
from enum import Enum
from .models import PortfolioAllocation, MonthlyData, DripConfig, DripResult
from ..nav_erosion_service import NavErosionService
from .logger import logger
class DistributionFrequency(Enum):
"""Enum for distribution frequencies"""
MONTHLY = ("Monthly", 12)
QUARTERLY = ("Quarterly", 4)
SEMI_ANNUALLY = ("Semi-Annually", 2)
ANNUALLY = ("Annually", 1)
UNKNOWN = ("Unknown", 12)
def __init__(self, name: str, payments_per_year: int):
self.display_name = name
self.payments_per_year = payments_per_year
@dataclass
class TickerData:
"""Data structure for individual ticker information"""
ticker: str
price: float
annual_yield: float
shares: float
allocation_pct: float
distribution_freq: DistributionFrequency
@property
def market_value(self) -> float:
return self.price * self.shares
@property
def monthly_yield(self) -> float:
return self.annual_yield / 12
@property
def distribution_yield(self) -> float:
return self.annual_yield / self.distribution_freq.payments_per_year
class DripService:
"""Enhanced DRIP calculation service with improved performance and accuracy"""
def __init__(self) -> None:
self.DISTRIBUTION_FREQUENCIES = {freq.display_name: freq for freq in DistributionFrequency}
self.nav_erosion_service = NavErosionService()
def calculate_drip_growth(self, portfolio_df: pd.DataFrame, config: DripConfig) -> DripResult:
"""
Calculate DRIP growth for a portfolio over a specified period with enhanced accuracy.
Args:
portfolio_df: DataFrame containing portfolio allocation
config: DripConfig object with simulation parameters
Returns:
DripResult object containing the simulation results
"""
try:
# Validate inputs
self._validate_inputs(portfolio_df, config)
# Get erosion data from nav_erosion_service
erosion_data = self.nav_erosion_service.analyze_etf_erosion_risk(portfolio_df["Ticker"].tolist())
logger.info(f"Erosion data results: {erosion_data.results}")
# Initialize erosion rates dictionary
erosion_rates = {}
# Use erosion rates from nav_erosion_service
for ticker in portfolio_df["Ticker"]:
# Find the result for this ticker in erosion_data.results
result = next((r for r in erosion_data.results if r.ticker == ticker), None)
if result:
erosion_rates[ticker] = {
"nav": result.monthly_nav_erosion_rate,
"yield": result.monthly_yield_erosion_rate
}
logger.info(f"=== EROSION RATE DEBUG ===")
logger.info(f"Ticker: {ticker}")
logger.info(f"Erosion rates from nav_erosion_service:")
logger.info(f" NAV: {erosion_rates[ticker]['nav']:.4%}")
logger.info(f" Yield: {erosion_rates[ticker]['yield']:.4%}")
logger.info(f"=== END EROSION RATE DEBUG ===\n")
else:
# Use default erosion rates if not found
erosion_rates[ticker] = {
"nav": 0.05, # 5% per month (very high, for test)
"yield": 0.07 # 7% per month (very high, for test)
}
logger.info(f"=== EROSION RATE DEBUG ===")
logger.info(f"Ticker: {ticker}")
logger.info(f"Using default erosion rates:")
logger.info(f" NAV: {erosion_rates[ticker]['nav']:.4%}")
logger.info(f" Yield: {erosion_rates[ticker]['yield']:.4%}")
logger.info(f"=== END EROSION RATE DEBUG ===\n")
# Log the final erosion rates dictionary
logger.info(f"Final erosion rates dictionary: {erosion_rates}")
# Initialize portfolio data
ticker_data = self._initialize_ticker_data(portfolio_df)
# Pre-calculate distribution schedule for performance
distribution_schedule = self._create_distribution_schedule(ticker_data, config.months)
# Initialize simulation state
simulation_state = self._initialize_simulation_state(ticker_data)
monthly_data: List[MonthlyData] = []
# Run monthly simulation
for month in range(1, config.months + 1):
logger.info(f"\n=== Starting Month {month} ===")
logger.info(f"Initial state for month {month}:")
for ticker in ticker_data.keys():
logger.info(f" {ticker}:")
logger.info(f" Price: ${simulation_state['current_prices'][ticker]:.2f}")
logger.info(f" Yield: {simulation_state['current_yields'][ticker]:.2%}")
logger.info(f" Shares: {simulation_state['current_shares'][ticker]:.4f}")
month_result = self._simulate_month(
month,
simulation_state,
ticker_data,
erosion_rates,
distribution_schedule
)
monthly_data.append(month_result)
logger.info(f"Final state for month {month}:")
for ticker in ticker_data.keys():
logger.info(f" {ticker}:")
logger.info(f" Price: ${simulation_state['current_prices'][ticker]:.2f}")
logger.info(f" Yield: {simulation_state['current_yields'][ticker]:.2%}")
logger.info(f" Shares: {simulation_state['current_shares'][ticker]:.4f}")
logger.info(f"=== End Month {month} ===\n")
# Calculate final results
return self._create_drip_result(monthly_data, simulation_state)
except Exception as e:
logger.error(f"Error calculating DRIP growth: {str(e)}")
logger.error(traceback.format_exc())
raise
def _validate_inputs(self, portfolio_df: pd.DataFrame, config: DripConfig) -> None:
"""Validate input parameters"""
required_columns = ["Ticker", "Price", "Yield (%)", "Shares"]
missing_columns = [col for col in required_columns if col not in portfolio_df.columns]
if missing_columns:
raise ValueError(f"Missing required columns: {missing_columns}")
if config.months <= 0:
raise ValueError("Months must be positive")
if portfolio_df.empty:
raise ValueError("Portfolio DataFrame is empty")
def _initialize_ticker_data(self, portfolio_df: pd.DataFrame) -> Dict[str, TickerData]:
"""Initialize ticker data with validation"""
ticker_data = {}
for _, row in portfolio_df.iterrows():
ticker = row["Ticker"]
# Handle distribution frequency
dist_period = row.get("Distribution Period", "Monthly")
dist_freq = self.DISTRIBUTION_FREQUENCIES.get(dist_period, DistributionFrequency.MONTHLY)
ticker_data[ticker] = TickerData(
ticker=ticker,
price=max(0.01, float(row["Price"])), # Prevent zero/negative prices
annual_yield=max(0.0, float(row["Yield (%)"] / 100)), # Convert to decimal
shares=max(0.0, float(row["Shares"])),
allocation_pct=float(row.get("Allocation (%)", 0) / 100),
distribution_freq=dist_freq
)
return ticker_data
def _create_distribution_schedule(self, ticker_data: Dict[str, TickerData], total_months: int) -> Dict[str, List[int]]:
"""Pre-calculate which months each ticker pays distributions"""
schedule = {}
for ticker, data in ticker_data.items():
distribution_months = []
freq = data.distribution_freq
for month in range(1, total_months + 1):
if self._is_distribution_month(month, freq):
distribution_months.append(month)
schedule[ticker] = distribution_months
return schedule
def _initialize_simulation_state(self, ticker_data: Dict[str, TickerData]) -> Dict[str, Any]:
"""Initialize simulation state variables"""
return {
'current_shares': {ticker: data.shares for ticker, data in ticker_data.items()},
'current_prices': {ticker: data.price for ticker, data in ticker_data.items()},
'current_yields': {ticker: data.annual_yield for ticker, data in ticker_data.items()},
'cumulative_income': 0.0
}
def _simulate_month(
self,
month: int,
state: Dict[str, Any],
ticker_data: Dict[str, TickerData],
erosion_rates: Dict[str, Dict[str, float]],
distribution_schedule: Dict[str, List[int]]
) -> MonthlyData:
"""Simulate a single month with improved accuracy"""
# Debug logging for erosion rates
logger.info(f"\n=== EROSION RATES DEBUG ===")
logger.info(f"Erosion rates dictionary: {erosion_rates}")
for ticker, rates in erosion_rates.items():
logger.info(f" {ticker}:")
logger.info(f" nav: {rates['nav']:.4%}")
logger.info(f" yield: {rates['yield']:.4%}")
logger.info(f"=== END EROSION RATES DEBUG ===\n")
# Apply erosion first
for ticker, rates in erosion_rates.items():
if ticker in state['current_prices']:
# Get monthly erosion rates (already in decimal form)
monthly_nav_erosion = rates['nav']
monthly_yield_erosion = rates['yield']
# Get current values
old_price = state['current_prices'][ticker]
old_yield = state['current_yields'][ticker]
# Debug logging
logger.info(f"\n=== EROSION CALCULATION DEBUG ===")
logger.info(f"Ticker: {ticker}")
logger.info(f"Raw erosion rates from nav_erosion_service:")
logger.info(f" monthly_nav_erosion: {monthly_nav_erosion:.4%}")
logger.info(f" monthly_yield_erosion: {monthly_yield_erosion:.4%}")
logger.info(f"Current values:")
logger.info(f" old_price: ${old_price:.4f}")
logger.info(f" old_yield: {old_yield:.4%}")
# Calculate new values
new_price = old_price * (1 - monthly_nav_erosion)
new_yield = old_yield * (1 - monthly_yield_erosion)
logger.info(f"Calculated new values:")
logger.info(f" new_price = ${old_price:.4f} * (1 - {monthly_nav_erosion:.4%})")
logger.info(f" new_price = ${old_price:.4f} * {1 - monthly_nav_erosion:.4f}")
logger.info(f" new_price = ${new_price:.4f}")
logger.info(f" new_yield = {old_yield:.4%} * (1 - {monthly_yield_erosion:.4%})")
logger.info(f" new_yield = {old_yield:.4%} * {1 - monthly_yield_erosion:.4f}")
logger.info(f" new_yield = {new_yield:.4%}")
# Apply the new values with bounds checking
state['current_prices'][ticker] = max(0.01, new_price) # Prevent zero/negative prices
state['current_yields'][ticker] = max(0.0, new_yield) # Prevent negative yields
logger.info(f"Final values after bounds checking:")
logger.info(f" final_price: ${state['current_prices'][ticker]:.4f}")
logger.info(f" final_yield: {state['current_yields'][ticker]:.4%}")
logger.info(f"=== END EROSION CALCULATION DEBUG ===\n")
# Log the actual erosion being applied
logger.info(f"Applied erosion to {ticker}:")
logger.info(f" NAV: {monthly_nav_erosion:.4%} -> New price: ${state['current_prices'][ticker]:.2f}")
logger.info(f" Yield: {monthly_yield_erosion:.4%} -> New yield: {state['current_yields'][ticker]:.2%}")
# Calculate monthly income from distributions using eroded values
monthly_income = self._calculate_monthly_distributions(
month, state, ticker_data, distribution_schedule
)
# Update cumulative income
state['cumulative_income'] += monthly_income
# Reinvest dividends (DRIP)
self._reinvest_dividends(month, state, distribution_schedule)
# Calculate total portfolio value with bounds checking
total_value = 0.0
for ticker in ticker_data.keys():
shares = state['current_shares'][ticker]
price = state['current_prices'][ticker]
if shares > 0 and price > 0:
total_value += shares * price
return MonthlyData(
month=month,
total_value=total_value,
monthly_income=monthly_income,
cumulative_income=state['cumulative_income'],
shares=state['current_shares'].copy(),
prices=state['current_prices'].copy(),
yields=state['current_yields'].copy()
)
def _calculate_monthly_distributions(
self,
month: int,
state: Dict[str, Any],
ticker_data: Dict[str, TickerData],
distribution_schedule: Dict[str, List[int]]
) -> float:
"""Calculate distributions for the current month"""
monthly_income = 0.0
for ticker, data in ticker_data.items():
if month in distribution_schedule[ticker]:
shares = state['current_shares'][ticker]
price = state['current_prices'][ticker]
yield_rate = state['current_yields'][ticker]
# Calculate distribution amount using annual yield divided by payments per year
distribution_yield = yield_rate / data.distribution_freq.payments_per_year
distribution_amount = shares * price * distribution_yield
monthly_income += distribution_amount
return monthly_income
def _reinvest_dividends(
self,
month: int,
state: Dict[str, Any],
distribution_schedule: Dict[str, List[int]]
) -> None:
"""Reinvest dividends for tickers that distributed in this month"""
for ticker, distribution_months in distribution_schedule.items():
if month in distribution_months:
shares = state['current_shares'][ticker]
price = state['current_prices'][ticker]
yield_rate = state['current_yields'][ticker]
# Calculate dividend income using the correct distribution frequency
freq = self.DISTRIBUTION_FREQUENCIES.get(ticker, DistributionFrequency.MONTHLY)
dividend_income = shares * price * (yield_rate / freq.payments_per_year)
# Purchase additional shares
if price > 0:
new_shares = dividend_income / price
state['current_shares'][ticker] += new_shares
def _is_distribution_month(self, month: int, frequency: DistributionFrequency) -> bool:
"""Check if current month is a distribution month"""
if frequency == DistributionFrequency.MONTHLY:
return True
elif frequency == DistributionFrequency.QUARTERLY:
return month % 3 == 0
elif frequency == DistributionFrequency.SEMI_ANNUALLY:
return month % 6 == 0
elif frequency == DistributionFrequency.ANNUALLY:
return month % 12 == 0
else:
return True # Default to monthly for unknown
def _create_drip_result(self, monthly_data: List[MonthlyData], state: Dict[str, Any]) -> DripResult:
"""Create final DRIP result object"""
if not monthly_data:
raise ValueError("No monthly data generated")
final_data = monthly_data[-1]
return DripResult(
monthly_data=monthly_data,
final_portfolio_value=final_data.total_value,
total_income=final_data.cumulative_income,
total_shares=state['current_shares'].copy()
)
# Utility methods for analysis and comparison
def calculate_drip_vs_no_drip_comparison(
self,
portfolio_df: pd.DataFrame,
config: DripConfig
) -> Dict[str, Any]:
"""Calculate comparison between DRIP and no-DRIP scenarios"""
# Calculate DRIP scenario
drip_result = self.calculate_drip_growth(portfolio_df, config)
# Calculate no-DRIP scenario (dividends not reinvested)
no_drip_result = self._calculate_no_drip_scenario(portfolio_df, config)
# Calculate comparison metrics
drip_advantage = drip_result.final_portfolio_value - no_drip_result['final_value']
percentage_advantage = (drip_advantage / no_drip_result['final_value']) * 100
return {
'drip_final_value': drip_result.final_portfolio_value,
'no_drip_final_value': no_drip_result['final_value'],
'drip_advantage': drip_advantage,
'percentage_advantage': percentage_advantage,
'total_dividends_reinvested': drip_result.total_income,
'cash_dividends_no_drip': no_drip_result['total_dividends']
}
def _calculate_no_drip_scenario(self, portfolio_df: pd.DataFrame, config: DripConfig) -> Dict[str, float]:
"""Calculate scenario where dividends are not reinvested"""
ticker_data = self._initialize_ticker_data(portfolio_df)
erosion_data = self.nav_erosion_service.analyze_etf_erosion_risk(portfolio_df["Ticker"].tolist())
erosion_rates = {
result.ticker: {
"nav": result.monthly_nav_erosion_rate,
"yield": result.monthly_yield_erosion_rate
}
for result in erosion_data.results
}
state = self._initialize_simulation_state(ticker_data)
total_dividends = 0.0
for month in range(1, config.months + 1):
# Calculate dividends but don't reinvest
monthly_dividends = self._calculate_monthly_distributions(
month, state, ticker_data,
self._create_distribution_schedule(ticker_data, config.months)
)
total_dividends += monthly_dividends
# Apply erosion
for ticker, rates in erosion_rates.items():
if ticker in state['current_prices']:
# Get monthly erosion rates (already in decimal form)
monthly_nav_erosion = rates['nav']
monthly_yield_erosion = rates['yield']
# Apply NAV erosion (decrease price)
old_price = state['current_prices'][ticker]
new_price = old_price * (1 - monthly_nav_erosion)
state['current_prices'][ticker] = max(0.01, new_price) # Prevent zero/negative prices
# Apply yield erosion (decrease yield)
old_yield = state['current_yields'][ticker]
new_yield = old_yield * (1 - monthly_yield_erosion)
state['current_yields'][ticker] = max(0.0, new_yield) # Prevent negative yields
final_value = sum(
state['current_shares'][ticker] * state['current_prices'][ticker]
for ticker in ticker_data.keys()
)
return {
'final_value': final_value,
'total_dividends': total_dividends
}

View File

@ -0,0 +1,8 @@
"""
Nav Erosion Service package
"""
from .service import NavErosionService
from .models import NavErosionResult
__all__ = ['NavErosionService', 'NavErosionResult']

View File

@ -0,0 +1,31 @@
from dataclasses import dataclass
from typing import Dict, List, Optional
from datetime import datetime
@dataclass
class NavErosionConfig:
max_erosion_level: int = 9
max_monthly_erosion: float = 1 - (0.1)**(1/12) # ~17.54% monthly for 90% annual erosion
use_per_ticker: bool = False
global_nav_rate: float = 0
per_ticker_rates: Dict[str, float] = None
@dataclass
class NavErosionResult:
ticker: str
nav_erosion_rate: float
monthly_erosion_rate: float
annual_erosion_rate: float
risk_level: int # 0-9 scale
risk_explanation: str
max_drawdown: float
volatility: float
is_new_etf: bool
etf_age_years: Optional[float]
@dataclass
class NavErosionAnalysis:
results: List[NavErosionResult]
portfolio_nav_risk: float # Average risk level
portfolio_erosion_rate: float # Weighted average erosion rate
risk_summary: str

View File

@ -0,0 +1,209 @@
from typing import Dict, List, Tuple
from .models import NavErosionConfig, NavErosionResult, NavErosionAnalysis
from enum import Enum
from dataclasses import dataclass
import streamlit as st
class ETFType(Enum):
INCOME = "Income"
GROWTH = "Growth"
BALANCED = "Balanced"
@dataclass
class NavErosionResult:
"""Result of NAV erosion analysis for a single ETF"""
ticker: str
nav_erosion_rate: float # Annual NAV erosion rate
yield_erosion_rate: float # Annual yield erosion rate
monthly_nav_erosion_rate: float # Monthly NAV erosion rate
monthly_yield_erosion_rate: float # Monthly yield erosion rate
risk_level: int
risk_explanation: str
max_drawdown: float
volatility: float
is_new_etf: bool
etf_age_years: float
@dataclass
class NavErosionAnalysis:
"""Complete NAV erosion analysis results"""
results: List[NavErosionResult]
portfolio_nav_risk: float = 0.0
portfolio_erosion_rate: float = 0.0
risk_summary: str = ""
class NavErosionService:
def __init__(self):
self.NAV_COMPONENT_WEIGHTS = {
'drawdown': 0.4,
'volatility': 0.3,
'sharpe': 0.15,
'sortino': 0.15
}
# Default erosion rates based on risk level (0-9)
self.RISK_TO_EROSION = {
0: 0.01, # 1% annual
1: 0.02, # 2% annual
2: 0.03, # 3% annual
3: 0.04, # 4% annual
4: 0.05, # 5% annual
5: 0.06, # 6% annual
6: 0.07, # 7% annual
7: 0.08, # 8% annual
8: 0.09, # 9% annual
9: 0.10 # 10% annual
}
def analyze_etf_erosion_risk(self, tickers: List[str]) -> NavErosionAnalysis:
"""Analyze NAV erosion risk for a list of ETFs"""
results = []
print("\n=== NAV EROSION SERVICE DEBUG ===")
print(f"Session state keys: {st.session_state.keys()}")
print(f"Erosion level from session state: {st.session_state.get('erosion_level')}")
for ticker in tickers:
# Get erosion rates from session state
erosion_level = st.session_state.get('erosion_level', {'nav': 5.0, 'yield': 5.0})
annual_nav_erosion = erosion_level['nav'] / 100 # Convert from percentage to decimal
annual_yield_erosion = erosion_level['yield'] / 100 # Convert from percentage to decimal
# Convert annual rates to monthly
monthly_nav_erosion = 1 - (1 - annual_nav_erosion) ** (1/12)
monthly_yield_erosion = 1 - (1 - annual_yield_erosion) ** (1/12)
print(f"\n=== NAV EROSION SERVICE DEBUG ===")
print(f"Ticker: {ticker}")
print(f"Session State Values:")
print(f" Annual NAV Erosion: {annual_nav_erosion:.4%}")
print(f" Annual Yield Erosion: {annual_yield_erosion:.4%}")
print(f" Monthly NAV Erosion: {monthly_nav_erosion:.4%}")
print(f" Monthly Yield Erosion: {monthly_yield_erosion:.4%}")
print(f"=== END NAV EROSION SERVICE DEBUG ===\n")
result = NavErosionResult(
ticker=ticker,
nav_erosion_rate=annual_nav_erosion,
yield_erosion_rate=annual_yield_erosion,
monthly_nav_erosion_rate=monthly_nav_erosion,
monthly_yield_erosion_rate=monthly_yield_erosion,
risk_level=5, # Arbitrary risk level
risk_explanation="Using erosion rates from session state",
max_drawdown=0.2,
volatility=0.25,
is_new_etf=False,
etf_age_years=1.0
)
results.append(result)
print(f"Created NavErosionResult for {ticker}:")
print(f" monthly_nav_erosion_rate: {result.monthly_nav_erosion_rate:.4%}")
print(f" monthly_yield_erosion_rate: {result.monthly_yield_erosion_rate:.4%}")
# Calculate portfolio-level metrics
portfolio_nav_risk = sum(r.risk_level for r in results) / len(results)
portfolio_erosion_rate = sum(r.nav_erosion_rate for r in results) / len(results)
analysis = NavErosionAnalysis(
results=results,
portfolio_nav_risk=portfolio_nav_risk,
portfolio_erosion_rate=portfolio_erosion_rate,
risk_summary="Portfolio has moderate NAV erosion risk"
)
print("\nFinal NavErosionAnalysis:")
for r in analysis.results:
print(f" {r.ticker}:")
print(f" monthly_nav_erosion_rate: {r.monthly_nav_erosion_rate:.4%}")
print(f" monthly_yield_erosion_rate: {r.monthly_yield_erosion_rate:.4%}")
print("=== END NAV EROSION SERVICE DEBUG ===\n")
return analysis
def _calculate_nav_risk(self, etf_data: Dict, etf_type: ETFType) -> Tuple[float, Dict]:
"""Calculate NAV risk components with ETF-type specific adjustments"""
components = {}
# Base risk calculation with ETF-type specific thresholds
if etf_data.get('max_drawdown') is not None:
if etf_type == ETFType.INCOME:
# Income ETFs typically have lower drawdowns
if etf_data['max_drawdown'] > 0.25:
components['drawdown'] = 7
elif etf_data['max_drawdown'] > 0.15:
components['drawdown'] = 5
elif etf_data['max_drawdown'] > 0.10:
components['drawdown'] = 3
else:
components['drawdown'] = 2
elif etf_type == ETFType.GROWTH:
# Growth ETFs typically have higher drawdowns
if etf_data['max_drawdown'] > 0.35:
components['drawdown'] = 7
elif etf_data['max_drawdown'] > 0.25:
components['drawdown'] = 5
elif etf_data['max_drawdown'] > 0.15:
components['drawdown'] = 3
else:
components['drawdown'] = 2
else: # BALANCED
# Balanced ETFs have moderate drawdowns
if etf_data['max_drawdown'] > 0.30:
components['drawdown'] = 7
elif etf_data['max_drawdown'] > 0.20:
components['drawdown'] = 5
elif etf_data['max_drawdown'] > 0.12:
components['drawdown'] = 3
else:
components['drawdown'] = 2
else:
components['drawdown'] = 4 # Default medium risk if no data
# Rest of the method remains unchanged
if etf_data.get('volatility') is not None:
if etf_data['volatility'] > 0.40:
components['volatility'] = 7
elif etf_data['volatility'] > 0.25:
components['volatility'] = 5
elif etf_data['volatility'] > 0.15:
components['volatility'] = 3
else:
components['volatility'] = 2
else:
components['volatility'] = 4
if etf_data.get('sharpe_ratio') is not None:
if etf_data['sharpe_ratio'] >= 2.0:
components['sharpe'] = 1
elif etf_data['sharpe_ratio'] >= 1.5:
components['sharpe'] = 2
elif etf_data['sharpe_ratio'] >= 1.0:
components['sharpe'] = 3
elif etf_data['sharpe_ratio'] >= 0.5:
components['sharpe'] = 4
else:
components['sharpe'] = 5
else:
components['sharpe'] = 4
if etf_data.get('sortino_ratio') is not None:
if etf_data['sortino_ratio'] >= 2.0:
components['sortino'] = 1
elif etf_data['sortino_ratio'] >= 1.5:
components['sortino'] = 2
elif etf_data['sortino_ratio'] >= 1.0:
components['sortino'] = 3
elif etf_data['sortino_ratio'] >= 0.5:
components['sortino'] = 4
else:
components['sortino'] = 5
else:
components['sortino'] = 4
# Calculate weighted NAV risk
nav_risk = sum(
components[component] * weight
for component, weight in self.NAV_COMPONENT_WEIGHTS.items()
)
return nav_risk, components

View File

@ -2,17 +2,27 @@
from setuptools import setup, find_packages
setup(
name="etf-suite-cli",
name="ETF_Portal",
version="0.1.0",
description="Command-line interface for ETF Suite",
author="Pascal",
py_modules=["etf_suite_cli"],
packages=find_packages(),
include_package_data=True,
install_requires=[
"Click",
"psutil",
"streamlit",
"pandas",
"numpy",
"plotly",
"yfinance",
"requests",
"python-dotenv"
],
entry_points="""
[console_scripts]
etf-suite=etf_suite_cli:cli
""",
entry_points={
"console_scripts": [
"etf-portal=ETF_Portal.cli:main",
],
},
python_requires=">=3.8",
author="Pascal",
description="ETF Portal CLI tool",
long_description=open("README.md").read() if open("README.md").read() else "",
long_description_content_type="text/markdown",
)

68
test_data_service.py Normal file
View File

@ -0,0 +1,68 @@
from ETF_Portal.services.data_service import DataService
import logging
import json
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def test_data_fetching():
# Initialize service
service = DataService()
# Test portfolio
portfolio = ['MSTY', 'FEPI', 'JEPI', 'VTI', 'VOO']
for ticker in portfolio:
print(f"\nTesting {ticker}:")
print("=" * 50)
try:
# Get ETF data
data = service.get_etf_data(ticker)
# Print data source
if data.get('is_estimated'):
print("\nData Source: High Yield Estimates")
elif 'info' in data and data['info']:
print("\nData Source: FMP API")
else:
print("\nData Source: yfinance")
# Print data structure
print("\nData Structure:")
for key, value in data.items():
if isinstance(value, dict):
print(f"{key}: {len(value)} items")
if key == 'info' and value:
print(" Sample info fields:")
for k, v in list(value.items())[:5]:
print(f" {k}: {v}")
else:
print(f"{key}: {value}")
# Print key metrics
print("\nKey Metrics:")
print(f"Volatility: {data.get('volatility', 'N/A')}")
print(f"Max Drawdown: {data.get('max_drawdown', 'N/A')}")
print(f"Sharpe Ratio: {data.get('sharpe_ratio', 'N/A')}")
print(f"Sortino Ratio: {data.get('sortino_ratio', 'N/A')}")
print(f"Dividend Trend: {data.get('dividend_trend', 'N/A')}")
print(f"ETF Age: {data.get('age_years', 'N/A')} years")
print(f"Is New ETF: {data.get('is_new', 'N/A')}")
print(f"Is Estimated: {data.get('is_estimated', 'N/A')}")
# Save raw data for inspection
with open(f"{ticker}_data.json", 'w') as f:
json.dump(data, f, indent=2)
print(f"\nRaw data saved to {ticker}_data.json")
except Exception as e:
logger.error(f"Error testing {ticker}: {str(e)}")
raise
if __name__ == "__main__":
test_data_fetching()

1
tests/__init__.py Normal file
View File

@ -0,0 +1 @@
"""Test package for ETF Portal."""

74
tests/test_api_config.py Normal file
View File

@ -0,0 +1,74 @@
import streamlit as st
import logging
from api import APIFactory
import pandas as pd
def test_api_configuration():
"""Test the API configuration and secrets."""
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
try:
# Initialize API factory
api_factory = APIFactory()
# Test FMP client
logger.info("Testing FMP client...")
fmp_client = api_factory.get_client('fmp')
# Test with a known ETF
test_symbol = "SPY"
# Test profile data
logger.info(f"Getting profile data for {test_symbol}...")
profile = fmp_client.get_etf_profile(test_symbol)
if isinstance(profile, (dict, list)) and (isinstance(profile, dict) and not profile.get('error') or isinstance(profile, list) and len(profile) > 0):
logger.info("✅ Profile data retrieved successfully")
if isinstance(profile, list):
logger.info(f"Retrieved {len(profile)} profile entries")
else:
logger.error("❌ Failed to get profile data")
if isinstance(profile, dict):
logger.error(f"Error: {profile.get('message', 'Unknown error')}")
else:
logger.error(f"Error: Unexpected response type: {type(profile)}")
# Test historical data
logger.info(f"Getting historical data for {test_symbol}...")
historical = fmp_client.get_historical_data(test_symbol, period='1mo')
if isinstance(historical, pd.DataFrame) and not historical.empty:
logger.info("✅ Historical data retrieved successfully")
logger.info(f"Data points: {len(historical)}")
else:
logger.error("❌ Failed to get historical data")
if isinstance(historical, dict):
logger.error(f"Error: {historical.get('message', 'Unknown error')}")
else:
logger.error(f"Error: Unexpected response type: {type(historical)}")
# Test cache
logger.info("Testing cache...")
cache_stats = api_factory.get_cache_stats()
logger.info(f"Cache stats: {cache_stats}")
# Test fallback to yfinance
logger.info("Testing fallback to yfinance...")
yfinance_data = api_factory.get_data(test_symbol, 'etf_profile', provider='yfinance')
if isinstance(yfinance_data, (dict, list)) and (isinstance(yfinance_data, dict) and not yfinance_data.get('error') or isinstance(yfinance_data, list) and len(yfinance_data) > 0):
logger.info("✅ YFinance fallback working")
else:
logger.error("❌ YFinance fallback failed")
logger.info("\n✅ All tests passed!")
return True
except Exception as e:
logger.error(f"❌ Test failed: {str(e)}")
return False
if __name__ == "__main__":
success = test_api_configuration()
if success:
print("\n✅ All tests passed!")
else:
print("\n❌ Some tests failed. Check the logs for details.")