feat: update API and cache management with improved logging

This commit is contained in:
Pascal BIBEHE 2025-05-27 16:27:13 +02:00
parent 027febf7da
commit e0dc6e57eb
8 changed files with 250 additions and 171 deletions

37
=0.2.36 Normal file
View File

@ -0,0 +1,37 @@
Collecting yfinance
Using cached yfinance-0.2.61-py2.py3-none-any.whl (117 kB)
Requirement already satisfied: pandas>=1.3.0 in ./venv/lib/python3.11/site-packages (from yfinance) (2.2.3)
Requirement already satisfied: numpy>=1.16.5 in ./venv/lib/python3.11/site-packages (from yfinance) (2.2.6)
Requirement already satisfied: requests>=2.31 in ./venv/lib/python3.11/site-packages (from yfinance) (2.32.3)
Collecting multitasking>=0.0.7
Using cached multitasking-0.0.11-py3-none-any.whl (8.5 kB)
Collecting platformdirs>=2.0.0
Using cached platformdirs-4.3.8-py3-none-any.whl (18 kB)
Requirement already satisfied: pytz>=2022.5 in ./venv/lib/python3.11/site-packages (from yfinance) (2025.2)
Collecting frozendict>=2.3.4
Using cached frozendict-2.4.6-py311-none-any.whl (16 kB)
Collecting peewee>=3.16.2
Using cached peewee-3.18.1-py3-none-any.whl
Collecting beautifulsoup4>=4.11.1
Using cached beautifulsoup4-4.13.4-py3-none-any.whl (187 kB)
Collecting curl_cffi>=0.7
Using cached curl_cffi-0.11.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.5 MB)
Requirement already satisfied: protobuf>=3.19.0 in ./venv/lib/python3.11/site-packages (from yfinance) (6.31.0)
Collecting websockets>=13.0
Using cached websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (182 kB)
Collecting soupsieve>1.2
Using cached soupsieve-2.7-py3-none-any.whl (36 kB)
Requirement already satisfied: typing-extensions>=4.0.0 in ./venv/lib/python3.11/site-packages (from beautifulsoup4>=4.11.1->yfinance) (4.13.2)
Collecting cffi>=1.12.0
Using cached cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (467 kB)
Requirement already satisfied: certifi>=2024.2.2 in ./venv/lib/python3.11/site-packages (from curl_cffi>=0.7->yfinance) (2025.4.26)
Requirement already satisfied: python-dateutil>=2.8.2 in ./venv/lib/python3.11/site-packages (from pandas>=1.3.0->yfinance) (2.9.0.post0)
Requirement already satisfied: tzdata>=2022.7 in ./venv/lib/python3.11/site-packages (from pandas>=1.3.0->yfinance) (2025.2)
Requirement already satisfied: charset-normalizer<4,>=2 in ./venv/lib/python3.11/site-packages (from requests>=2.31->yfinance) (3.4.2)
Requirement already satisfied: idna<4,>=2.5 in ./venv/lib/python3.11/site-packages (from requests>=2.31->yfinance) (3.10)
Requirement already satisfied: urllib3<3,>=1.21.1 in ./venv/lib/python3.11/site-packages (from requests>=2.31->yfinance) (2.4.0)
Collecting pycparser
Using cached pycparser-2.22-py3-none-any.whl (117 kB)
Requirement already satisfied: six>=1.5 in ./venv/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas>=1.3.0->yfinance) (1.17.0)
Installing collected packages: peewee, multitasking, websockets, soupsieve, pycparser, platformdirs, frozendict, cffi, beautifulsoup4, curl_cffi, yfinance
Successfully installed beautifulsoup4-4.13.4 cffi-1.17.1 curl_cffi-0.11.1 frozendict-2.4.6 multitasking-0.0.11 peewee-3.18.1 platformdirs-4.3.8 pycparser-2.22 soupsieve-2.7 websockets-15.0.1 yfinance-0.2.61

View File

@ -1,4 +1,6 @@
from .factory import APIFactory
from .base import BaseAPIClient
from .fmp.client import FMPClient
from .yfinance.client import YFinanceClient
__all__ = ['APIFactory', 'BaseAPIClient']
__all__ = ['APIFactory', 'BaseAPIClient', 'FMPClient', 'YFinanceClient']

View File

@ -1,16 +1,23 @@
from abc import ABC, abstractmethod
from typing import Dict, List, Optional, Union
from typing import Dict, List, Optional, Union, Any
import pandas as pd
from datetime import datetime
import time
import logging
class BaseAPIClient(ABC):
"""Base class for all API clients."""
def __init__(self, api_key: Optional[str] = None):
"""Initialize base API client.
Args:
api_key: Optional API key
"""
self.api_key = api_key
self.last_request_time = None
self.rate_limit_delay = 1.0 # Default 1 second between requests
self.logger = logging.getLogger(self.__class__.__name__)
@abstractmethod
def get_etf_profile(self, symbol: str) -> Dict:
@ -81,28 +88,26 @@ class BaseAPIClient(ABC):
time.sleep(self.rate_limit_delay - time_since_last)
self.last_request_time = datetime.now()
@abstractmethod
def _validate_symbol(self, symbol: str) -> bool:
"""Validate ETF symbol format.
"""Validate a symbol.
Args:
symbol: ETF ticker symbol
symbol: Symbol to validate
Returns:
True if valid, False otherwise
"""
return bool(symbol and isinstance(symbol, str) and symbol.isupper())
pass
def _handle_error(self, error: Exception) -> Dict:
"""Handle API errors consistently.
"""Handle API errors.
Args:
error: Exception that occurred
Returns:
Dictionary with error information
Error response dictionary
"""
return {
'error': True,
'message': str(error),
'timestamp': datetime.now().isoformat()
}
self.logger.error(f"API error: {str(error)}")
return {"error": str(error)}

View File

@ -22,6 +22,17 @@ class FMPClient(BaseAPIClient):
self.cache_manager = cache_manager or FMPCacheManager()
self.logger = logging.getLogger(self.__class__.__name__)
def _validate_symbol(self, symbol: str) -> bool:
"""Validate ETF symbol format.
Args:
symbol: ETF ticker symbol
Returns:
True if valid, False otherwise
"""
return bool(symbol and isinstance(symbol, str) and symbol.isupper())
def _make_request(self, endpoint: str, params: Dict = None) -> Dict:
"""Make API request to FMP.
@ -32,30 +43,15 @@ class FMPClient(BaseAPIClient):
Returns:
API response data
"""
# Check cache first
if self.cache_manager:
cached_data, is_valid = self.cache_manager.get(endpoint, params)
if is_valid:
return cached_data
# Prepare request
url = f"{self.BASE_URL}/{endpoint}"
params = params or {}
params['apikey'] = self.api_key
# Check rate limit
self._check_rate_limit()
try:
response = requests.get(url, params=params)
response.raise_for_status()
data = response.json()
# Cache the response
if self.cache_manager:
self.cache_manager.set(endpoint, data, params)
return data
return response.json()
except requests.exceptions.RequestException as e:
self.logger.error(f"FMP API request failed: {str(e)}")
@ -68,44 +64,78 @@ class FMPClient(BaseAPIClient):
symbol: ETF ticker symbol
Returns:
Dictionary containing ETF profile information
Dictionary with ETF profile data
"""
if not self._validate_symbol(symbol):
return self._handle_error(ValueError(f"Invalid symbol: {symbol}"))
return self._make_request(f"etf/profile/{symbol}")
# Try cache first
if self.cache_manager:
is_valid, cached_data = self.cache_manager.load('fmp', symbol, 'profile')
if is_valid:
return cached_data
# Fetch from API
data = self._make_request(f"etf/profile/{symbol}")
# Cache the response
if self.cache_manager and data:
self.cache_manager.save('fmp', symbol, 'profile', data)
return data
def get_etf_holdings(self, symbol: str) -> List[Dict]:
"""Get ETF holdings data.
"""Get ETF holdings.
Args:
symbol: ETF ticker symbol
Returns:
List of dictionaries containing holding information
List of holdings
"""
if not self._validate_symbol(symbol):
return [self._handle_error(ValueError(f"Invalid symbol: {symbol}"))]
return []
return self._make_request(f"etf/holdings/{symbol}")
# Try cache first
if self.cache_manager:
is_valid, cached_data = self.cache_manager.load('fmp', symbol, 'holdings')
if is_valid:
return cached_data
# Fetch from API
data = self._make_request(f"etf/holdings/{symbol}")
# Cache the response
if self.cache_manager and data:
self.cache_manager.save('fmp', symbol, 'holdings', data)
return data
def get_historical_data(self, symbol: str, period: str = '1y') -> pd.DataFrame:
"""Get historical price data.
def get_etf_historical_data(self, symbol: str, timeframe: str = '1d') -> pd.DataFrame:
"""Get ETF historical data.
Args:
symbol: ETF ticker symbol
period: Time period (e.g., '1d', '1w', '1m', '1y')
timeframe: Timeframe for historical data
Returns:
DataFrame with historical price data
DataFrame with historical data
"""
if not self._validate_symbol(symbol):
return pd.DataFrame()
data = self._make_request(f"etf/historical-price/{symbol}", {'period': period})
# Try cache first
if self.cache_manager:
is_valid, cached_data = self.cache_manager.load('fmp', symbol, f'historical_{timeframe}')
if is_valid:
return pd.DataFrame(cached_data)
if isinstance(data, dict) and data.get('error'):
return pd.DataFrame()
# Fetch from API
data = self._make_request(f"etf/historical-price/{symbol}", {'timeframe': timeframe})
# Cache the response
if self.cache_manager and data:
self.cache_manager.save('fmp', symbol, f'historical_{timeframe}', data)
return pd.DataFrame(data)
@ -121,10 +151,18 @@ class FMPClient(BaseAPIClient):
if not self._validate_symbol(symbol):
return pd.DataFrame()
# Try cache first
if self.cache_manager:
is_valid, cached_data = self.cache_manager.load('fmp', symbol, 'dividend_history')
if is_valid:
return pd.DataFrame(cached_data)
# Fetch from API
data = self._make_request(f"etf/dividend/{symbol}")
if isinstance(data, dict) and data.get('error'):
return pd.DataFrame()
# Cache the response
if self.cache_manager and data:
self.cache_manager.save('fmp', symbol, 'dividend_history', data)
return pd.DataFrame(data)
@ -140,7 +178,20 @@ class FMPClient(BaseAPIClient):
if not self._validate_symbol(symbol):
return self._handle_error(ValueError(f"Invalid symbol: {symbol}"))
return self._make_request(f"etf/sector-weightings/{symbol}")
# Try cache first
if self.cache_manager:
is_valid, cached_data = self.cache_manager.load('fmp', symbol, 'sector_weightings')
if is_valid:
return cached_data
# Fetch from API
data = self._make_request(f"etf/sector-weightings/{symbol}")
# Cache the response
if self.cache_manager and data:
self.cache_manager.save('fmp', symbol, 'sector_weightings', data)
return data
def clear_cache(self) -> int:
"""Clear expired cache entries.

View File

@ -1,7 +1,7 @@
import yfinance as yf
import pandas as pd
from typing import Dict, List, Optional
from datetime import datetime
from datetime import datetime, timedelta
import logging
from ..base import BaseAPIClient
from ...cache.yfinance_cache import YFinanceCacheManager
@ -10,109 +10,65 @@ class YFinanceClient(BaseAPIClient):
"""Yahoo Finance API client."""
def __init__(self, cache_manager: Optional[YFinanceCacheManager] = None):
"""Initialize yfinance client.
"""Initialize YFinance client.
Args:
cache_manager: Optional cache manager instance
"""
super().__init__(None) # yfinance doesn't need API key
super().__init__()
self.cache_manager = cache_manager or YFinanceCacheManager()
self.logger = logging.getLogger(self.__class__.__name__)
def _get_ticker(self, symbol: str) -> Optional[yf.Ticker]:
"""Get yfinance Ticker object.
def _validate_symbol(self, symbol: str) -> bool:
"""Validate ETF symbol format.
Args:
symbol: ETF ticker symbol
Returns:
yfinance Ticker object or None if invalid
True if valid, False otherwise
"""
if not self._validate_symbol(symbol):
return None
return yf.Ticker(symbol)
return bool(symbol and isinstance(symbol, str) and symbol.isupper())
def _make_request(self, endpoint: str, params: Dict = None) -> Dict:
"""Make API request to yfinance.
Args:
endpoint: API endpoint
params: Query parameters
Returns:
API response data
"""
# Check cache first
if self.cache_manager:
cached_data, is_valid = self.cache_manager.get(endpoint, params)
if is_valid:
return cached_data
try:
symbol = params.get('symbol') if params else None
if not symbol:
raise ValueError("Symbol is required")
ticker = self._get_ticker(symbol)
if not ticker:
raise ValueError(f"Invalid symbol: {symbol}")
# Get data based on endpoint
if endpoint == "info":
data = ticker.info
elif endpoint == "holdings":
data = ticker.get_holdings()
elif endpoint == "history":
period = params.get('period', '1y')
data = ticker.history(period=period).to_dict('records')
else:
raise ValueError(f"Unknown endpoint: {endpoint}")
# Cache the response
if self.cache_manager:
self.cache_manager.set(endpoint, data, params)
return data
except Exception as e:
self.logger.error(f"yfinance API request failed: {str(e)}")
return self._handle_error(e)
def get_etf_profile(self, symbol: str) -> Dict:
"""Get ETF profile data.
def get_etf_info(self, symbol: str) -> Dict:
"""Get ETF information.
Args:
symbol: ETF ticker symbol
Returns:
Dictionary containing ETF profile information
Dictionary with ETF information
"""
if not self._validate_symbol(symbol):
return self._handle_error(ValueError(f"Invalid symbol: {symbol}"))
return self._make_request("info", {'symbol': symbol})
def get_etf_holdings(self, symbol: str) -> List[Dict]:
"""Get ETF holdings data.
# Try cache first
if self.cache_manager:
is_valid, cached_data = self.cache_manager.load('yfinance', symbol, 'info')
if is_valid:
return cached_data
Args:
symbol: ETF ticker symbol
try:
etf = yf.Ticker(symbol)
info = etf.info
Returns:
List of dictionaries containing holding information
"""
if not self._validate_symbol(symbol):
return [self._handle_error(ValueError(f"Invalid symbol: {symbol}"))]
data = self._make_request("holdings", {'symbol': symbol})
return data if isinstance(data, list) else [data]
# Cache the response
if self.cache_manager and info:
self.cache_manager.save('yfinance', symbol, 'info', info)
return info
except Exception as e:
self.logger.error(f"Error fetching ETF info: {str(e)}")
return self._handle_error(e)
def get_historical_data(self, symbol: str, period: str = '1y') -> pd.DataFrame:
def get_historical_data(self, symbol: str, period: str = '1y', interval: str = '1d') -> pd.DataFrame:
"""Get historical price data.
Args:
symbol: ETF ticker symbol
period: Time period (e.g., '1d', '1w', '1m', '1y')
interval: Data interval (e.g., '1d', '1wk', '1mo')
Returns:
DataFrame with historical price data
@ -120,12 +76,25 @@ class YFinanceClient(BaseAPIClient):
if not self._validate_symbol(symbol):
return pd.DataFrame()
data = self._make_request("history", {'symbol': symbol, 'period': period})
# Try cache first
if self.cache_manager:
is_valid, cached_data = self.cache_manager.load('yfinance', symbol, f'historical_{period}_{interval}')
if is_valid:
return pd.DataFrame(cached_data)
if isinstance(data, dict) and data.get('error'):
try:
etf = yf.Ticker(symbol)
data = etf.history(period=period, interval=interval)
# Cache the response
if self.cache_manager and not data.empty:
self.cache_manager.save('yfinance', symbol, f'historical_{period}_{interval}', data.to_dict('records'))
return data
except Exception as e:
self.logger.error(f"Error fetching historical data: {str(e)}")
return pd.DataFrame()
return pd.DataFrame(data)
def get_dividend_history(self, symbol: str) -> pd.DataFrame:
"""Get dividend history.
@ -139,45 +108,26 @@ class YFinanceClient(BaseAPIClient):
if not self._validate_symbol(symbol):
return pd.DataFrame()
# Try cache first
if self.cache_manager:
is_valid, cached_data = self.cache_manager.load('yfinance', symbol, 'dividend_history')
if is_valid:
return pd.DataFrame(cached_data)
try:
ticker = self._get_ticker(symbol)
if not ticker:
return pd.DataFrame()
etf = yf.Ticker(symbol)
data = etf.dividends.to_frame()
dividends = ticker.dividends
return pd.DataFrame(dividends).reset_index()
# Cache the response
if self.cache_manager and not data.empty:
self.cache_manager.save('yfinance', symbol, 'dividend_history', data.to_dict('records'))
return data
except Exception as e:
self.logger.error(f"Error getting dividend history: {str(e)}")
self.logger.error(f"Error fetching dividend history: {str(e)}")
return pd.DataFrame()
def get_sector_weightings(self, symbol: str) -> Dict:
"""Get sector weightings.
Args:
symbol: ETF ticker symbol
Returns:
Dictionary with sector weightings
"""
if not self._validate_symbol(symbol):
return self._handle_error(ValueError(f"Invalid symbol: {symbol}"))
try:
ticker = self._get_ticker(symbol)
if not ticker:
return self._handle_error(ValueError(f"Invalid symbol: {symbol}"))
info = ticker.info
return {
'sector_weightings': info.get('sectorWeights', {}),
'asset_allocation': info.get('assetAllocation', {})
}
except Exception as e:
self.logger.error(f"Error getting sector weightings: {str(e)}")
return self._handle_error(e)
def clear_cache(self) -> int:
"""Clear expired cache entries.

View File

@ -77,9 +77,19 @@ class CacheManager:
Returns:
Path object for the cache file
"""
# Create filename in format: {source}_{ticker}_{data_type}.json
filename = f"{source}_{ticker}_{data_type}.json"
cache_path = self.cache_dir / filename
# Create subdirectory based on source and data type
if source == 'fmp':
subdir = f"FMP_cache/{data_type}"
else:
subdir = f"{source}_cache"
# Create the subdirectory
subdir_path = self.cache_dir / subdir
subdir_path.mkdir(parents=True, exist_ok=True)
# Create filename in format: {ticker}.json
filename = f"{ticker}.json"
cache_path = subdir_path / filename
cache_logger.debug(f"Cache path: {cache_path}")
return cache_path
@ -131,6 +141,12 @@ class CacheManager:
start_time = time.time()
cache_path = self._get_cache_path(source, ticker, data_type)
# Log the data being cached
cache_logger.debug(f"Caching data for {source}/{ticker}/{data_type}")
cache_logger.debug(f"Data type: {type(data)}")
if isinstance(data, (list, dict)):
cache_logger.debug(f"Data length: {len(data)}")
# Prepare cache data with timestamp
cache_data = {
'timestamp': datetime.now().isoformat(),
@ -144,8 +160,17 @@ class CacheManager:
with open(cache_path, 'w') as f:
json.dump(cache_data, f, indent=2)
# Update stats
# Verify the file was written correctly
if not cache_path.exists():
cache_logger.error(f"Cache file was not created: {cache_path}")
return False
file_size = os.path.getsize(cache_path)
if file_size == 0:
cache_logger.error(f"Cache file is empty: {cache_path}")
return False
# Update stats
self.stats.total_size += file_size
self._save_stats()
@ -184,7 +209,11 @@ class CacheManager:
start_time = time.time()
cache_path = self._get_cache_path(source, ticker, data_type)
cache_logger.debug(f"Attempting to load cache for {source}/{ticker}/{data_type}")
cache_logger.debug(f"Cache path: {cache_path}")
if not cache_path.exists():
cache_logger.debug(f"Cache file does not exist: {cache_path}")
cache_logger.log_cache_operation(
"load",
f"{source}/{ticker}/{data_type}",
@ -202,7 +231,11 @@ class CacheManager:
timestamp = datetime.fromisoformat(cache_data['timestamp'])
age = datetime.now() - timestamp
cache_logger.debug(f"Cache age: {age.total_seconds()} seconds")
cache_logger.debug(f"Cache duration: {self.cache_duration} seconds")
if age.total_seconds() > self.cache_duration:
cache_logger.debug(f"Cache expired for {source}/{ticker}/{data_type}")
cache_logger.log_cache_operation(
"load",
f"{source}/{ticker}/{data_type}",
@ -213,6 +246,7 @@ class CacheManager:
return False, None
duration = time.time() - start_time
cache_logger.debug(f"Cache hit for {source}/{ticker}/{data_type}")
cache_logger.log_cache_operation(
"load",
f"{source}/{ticker}/{data_type}",

View File

@ -116,15 +116,6 @@ st.markdown("""
# Create sidebar for API keys
with st.sidebar:
st.markdown("### API Configuration")
fmp_key = st.text_input(
"FMP API Key",
value=st.session_state.fmp_api_key,
type="password",
key="fmp_api_key_field"
)
if fmp_key != st.session_state.fmp_api_key:
st.session_state.fmp_api_key = fmp_key
openai_key = st.text_input(
"OpenAI API Key",
value=st.session_state.openai_api_key,

View File

@ -22,11 +22,16 @@ def test_api_configuration():
# Test profile data
logger.info(f"Getting profile data for {test_symbol}...")
profile = fmp_client.get_etf_profile(test_symbol)
if isinstance(profile, dict) and not profile.get('error'):
if isinstance(profile, (dict, list)) and (isinstance(profile, dict) and not profile.get('error') or isinstance(profile, list) and len(profile) > 0):
logger.info("✅ Profile data retrieved successfully")
if isinstance(profile, list):
logger.info(f"Retrieved {len(profile)} profile entries")
else:
logger.error("❌ Failed to get profile data")
logger.error(f"Error: {profile.get('message', 'Unknown error')}")
if isinstance(profile, dict):
logger.error(f"Error: {profile.get('message', 'Unknown error')}")
else:
logger.error(f"Error: Unexpected response type: {type(profile)}")
# Test historical data
logger.info(f"Getting historical data for {test_symbol}...")
@ -36,6 +41,10 @@ def test_api_configuration():
logger.info(f"Data points: {len(historical)}")
else:
logger.error("❌ Failed to get historical data")
if isinstance(historical, dict):
logger.error(f"Error: {historical.get('message', 'Unknown error')}")
else:
logger.error(f"Error: Unexpected response type: {type(historical)}")
# Test cache
logger.info("Testing cache...")
@ -45,12 +54,12 @@ def test_api_configuration():
# Test fallback to yfinance
logger.info("Testing fallback to yfinance...")
yfinance_data = api_factory.get_data(test_symbol, 'etf_profile', provider='yfinance')
if isinstance(yfinance_data, dict) and not yfinance_data.get('error'):
if isinstance(yfinance_data, (dict, list)) and (isinstance(yfinance_data, dict) and not yfinance_data.get('error') or isinstance(yfinance_data, list) and len(yfinance_data) > 0):
logger.info("✅ YFinance fallback working")
else:
logger.error("❌ YFinance fallback failed")
logger.error(f"Error: {yfinance_data.get('message', 'Unknown error')}")
logger.info("\n✅ All tests passed!")
return True
except Exception as e: