Machine Learning Aplicado al Trading Cuantitativo

Introducción

El Machine Learning ofrece herramientas poderosas para detectar patrones no lineales en mercados financieros, identificar regímenes de mercado y crear modelos predictivos. Esta documentación cubre implementaciones prácticas específicamente validadas para trading cuantitativo.

Modelos No Supervisados

Hidden Markov Models (HMM) para Detección de Regímenes

Los HMM son especialmente útiles para identificar estados ocultos del mercado (alcista/bajista, alta/baja volatilidad) que no son directamente observables pero influyen en el comportamiento de los precios.

Conceptos Fundamentales

¿Qué son los Estados de Markov?

  • Representan condiciones discretas que un sistema puede ocupar
  • Solo importa el estado actual para predecir el siguiente paso
  • Los estados ocultos influyen en las observaciones (precios) que vemos

Aplicaciones en Trading:

  • Detección de regímenes de mercado (alcista/bajista)
  • Identificación de períodos de alta/baja volatilidad
  • Cambios en la estructura del mercado
  • Señales de entrada/salida basadas en transiciones de estado

Implementación Básica con HMM

import numpy as np
import pandas as pd
import yfinance as yf
from hmmlearn import hmm
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

class MarketRegimeDetector:
    """
    Detector de regímenes de mercado usando Hidden Markov Models
    """
    
    def __init__(self, n_components=2, covariance_type="full", random_state=42):
        """
        Parámetros
        ----------
        n_components : int
            Número de estados ocultos (típicamente 2-4 para mercados)
        covariance_type : str
            Tipo de matriz de covarianza ('full', 'diag', 'tied', 'spherical')
        random_state : int
            Semilla para reproducibilidad
        """
        self.n_components = n_components
        self.model = hmm.GaussianHMM(
            n_components=n_components,
            covariance_type=covariance_type,
            random_state=random_state
        )
        self.scaler = StandardScaler()
        self.is_fitted = False
        
    def prepare_features(self, df):
        """
        Preparar características para el modelo HMM
        
        Parámetros
        ----------
        df : pd.DataFrame
            DataFrame con columnas OHLCV
            
        Returns
        -------
        np.array
            Array de características normalizadas
        """
        features = pd.DataFrame(index=df.index)
        
        # Retornos logarítmicos
        features['log_returns'] = np.log(df['Close'] / df['Close'].shift(1))
        
        # Rango diario normalizado
        features['daily_range'] = (df['High'] / df['Low']) - 1
        
        # Volatilidad realizada (ventana de 5 días)
        features['realized_vol'] = features['log_returns'].rolling(5).std()
        
        # Volumen relativo
        features['volume_ratio'] = df['Volume'] / df['Volume'].rolling(20).mean()
        
        # RSI como proxy de momentum
        features['rsi'] = self.calculate_rsi(df['Close'], period=14)
        
        # Eliminar NaN y normalizar
        features_clean = features.dropna()
        features_scaled = self.scaler.fit_transform(features_clean)
        
        return features_scaled, features_clean.index
    
    def calculate_rsi(self, series, period=14):
        """Calcular RSI"""
        delta = series.diff()
        gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
        rs = gain / loss
        return 100 - (100 / (1 + rs))
    
    def fit(self, df):
        """
        Entrenar el modelo HMM
        
        Parámetros
        ----------
        df : pd.DataFrame
            Datos históricos con columnas OHLCV
        """
        features, self.feature_index = self.prepare_features(df)
        
        # Entrenar el modelo
        self.model.fit(features)
        self.is_fitted = True
        
        # Predecir estados
        self.hidden_states = self.model.predict(features)
        
        # Guardar datos para análisis
        self.features = features
        self.original_data = df.loc[self.feature_index]
        
        return self
    
    def predict_current_regime(self, df):
        """
        Predecir el régimen actual del mercado
        
        Returns
        -------
        dict
            Información del régimen actual
        """
        if not self.is_fitted:
            raise ValueError("Modelo no entrenado. Ejecuta fit() primero.")
        
        features, _ = self.prepare_features(df)
        
        # Predecir estado actual
        current_state = self.model.predict(features[-1:].reshape(1, -1))[0]
        
        # Calcular probabilidades
        log_prob, state_sequence = self.model.decode(features[-10:], algorithm="viterbi")
        state_probs = np.exp(self.model.predict_proba(features[-1:].reshape(1, -1)))[0]
        
        return {
            'current_state': current_state,
            'state_probabilities': state_probs,
            'confidence': np.max(state_probs),
            'recent_sequence': state_sequence
        }
    
    def analyze_regimes(self):
        """
        Analizar las características de cada régimen
        """
        if not self.is_fitted:
            raise ValueError("Modelo no entrenado.")
        
        regime_analysis = {}
        
        for state in range(self.n_components):
            mask = self.hidden_states == state
            state_data = self.original_data[mask]
            
            if len(state_data) > 0:
                avg_return = state_data['Close'].pct_change().mean()
                volatility = state_data['Close'].pct_change().std()
                avg_volume = state_data['Volume'].mean()
                duration = len(state_data)
                
                regime_analysis[f'State_{state}'] = {
                    'average_return': avg_return,
                    'volatility': volatility,
                    'average_volume': avg_volume,
                    'duration_days': duration,
                    'percentage_time': duration / len(self.hidden_states),
                    'regime_type': 'Bullish' if avg_return > 0 else 'Bearish'
                }
        
        return regime_analysis
    
    def plot_regimes(self, title="Market Regimes Detection"):
        """
        Visualizar los regímenes detectados
        """
        if not self.is_fitted:
            raise ValueError("Modelo no entrenado.")
        
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10))
        
        # Plot 1: Precio con regímenes
        colors = ['green', 'red', 'blue', 'orange'][:self.n_components]
        
        for state in range(self.n_components):
            mask = self.hidden_states == state
            state_data = self.original_data[mask]
            
            ax1.scatter(state_data.index, state_data['Close'], 
                       c=colors[state], label=f'Regime {state}', alpha=0.6, s=10)
        
        ax1.plot(self.original_data.index, self.original_data['Close'], 
                'k-', alpha=0.3, linewidth=0.5)
        ax1.set_title(f'{title} - Price Action')
        ax1.set_ylabel('Price')
        ax1.legend()
        ax1.grid(True, alpha=0.3)
        
        # Plot 2: Secuencia de estados
        ax2.plot(self.feature_index, self.hidden_states, 'k-', linewidth=2)
        ax2.fill_between(self.feature_index, 0, self.hidden_states, alpha=0.3)
        ax2.set_title('Hidden States Sequence')
        ax2.set_ylabel('State')
        ax2.set_xlabel('Date')
        ax2.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        
        return fig

def hmm_trading_strategy(df, detector, confidence_threshold=0.7):
    """
    Estrategia de trading basada en HMM
    
    Parámetros
    ----------
    df : pd.DataFrame
        Datos históricos
    detector : MarketRegimeDetector
        Detector entrenado
    confidence_threshold : float
        Umbral de confianza para generar señales
    """
    signals = pd.DataFrame(index=df.index)
    signals['price'] = df['Close']
    signals['signal'] = 0
    signals['regime'] = np.nan
    signals['confidence'] = np.nan
    
    # Ventana móvil para predicciones
    window_size = 252  # 1 año de datos
    
    for i in range(window_size, len(df)):
        # Datos para entrenamiento
        train_data = df.iloc[i-window_size:i]
        
        # Entrenar detector
        temp_detector = MarketRegimeDetector(n_components=2)
        temp_detector.fit(train_data)
        
        # Predecir régimen actual
        current_data = df.iloc[i-50:i+1]  # Últimos 50 días para contexto
        regime_info = temp_detector.predict_current_regime(current_data)
        
        current_idx = df.index[i]
        signals.loc[current_idx, 'regime'] = regime_info['current_state']
        signals.loc[current_idx, 'confidence'] = regime_info['confidence']
        
        # Generar señales solo con alta confianza
        if regime_info['confidence'] > confidence_threshold:
            # Analizar características del régimen
            regime_analysis = temp_detector.analyze_regimes()
            current_regime = f"State_{regime_info['current_state']}"
            
            if current_regime in regime_analysis:
                regime_return = regime_analysis[current_regime]['average_return']
                
                # Señal basada en tipo de régimen
                if regime_return > 0.001:  # Régimen alcista
                    signals.loc[current_idx, 'signal'] = 1
                elif regime_return < -0.001:  # Régimen bajista
                    signals.loc[current_idx, 'signal'] = -1
    
    return signals

# Ejemplo de uso completo
def hmm_example_analysis():
    """
    Ejemplo completo de análisis HMM para trading
    """
    # Obtener datos
    ticker = "SPY"
    df = yf.download(ticker, start="2020-01-01", end="2024-01-01", interval="1d")
    
    print(f"=== ANÁLISIS HMM: {ticker} ===\n")
    
    # Crear y entrenar detector
    detector = MarketRegimeDetector(n_components=2, random_state=42)
    detector.fit(df)
    
    # Analizar regímenes
    regime_analysis = detector.analyze_regimes()
    
    print("📊 ANÁLISIS DE REGÍMENES:")
    for regime, stats in regime_analysis.items():
        print(f"\n{regime} ({stats['regime_type']}):")
        print(f"   Retorno Promedio: {stats['average_return']:.4f}")
        print(f"   Volatilidad: {stats['volatility']:.4f}")
        print(f"   Duración: {stats['duration_days']} días")
        print(f"   % del Tiempo: {stats['percentage_time']:.1%}")
    
    # Predecir régimen actual
    current_regime = detector.predict_current_regime(df)
    print(f"\n🎯 RÉGIMEN ACTUAL:")
    print(f"   Estado: {current_regime['current_state']}")
    print(f"   Confianza: {current_regime['confidence']:.1%}")
    print(f"   Probabilidades: {current_regime['state_probabilities']}")
    
    # Generar estrategia
    strategy_signals = hmm_trading_strategy(df, detector)
    
    # Estadísticas de la estrategia
    total_signals = strategy_signals['signal'].abs().sum()
    long_signals = (strategy_signals['signal'] == 1).sum()
    short_signals = (strategy_signals['signal'] == -1).sum()
    
    print(f"\n📈 ESTADÍSTICAS DE ESTRATEGIA:")
    print(f"   Total Señales: {total_signals}")
    print(f"   Señales Long: {long_signals}")
    print(f"   Señales Short: {short_signals}")
    
    # Visualizar
    detector.plot_regimes(f"HMM Regime Detection - {ticker}")
    
    return detector, strategy_signals

if __name__ == "__main__":
    hmm_example_analysis()

Estrategia Avanzada: Multi-Estado HMM

class AdvancedMarketRegimeDetector:
    """
    Detector avanzado con múltiples estados para mercados complejos
    """
    
    def __init__(self, n_components=4):
        """
        4 Estados típicos:
        0: Bull Market (alcista)
        1: Bear Market (bajista)  
        2: High Volatility (crisis)
        3: Low Volatility (consolidación)
        """
        self.n_components = n_components
        self.model = hmm.GaussianHMM(
            n_components=n_components,
            covariance_type="full",
            random_state=42
        )
        
    def prepare_advanced_features(self, df):
        """
        Características avanzadas para detección multi-estado
        """
        features = pd.DataFrame(index=df.index)
        
        # Retornos en múltiples timeframes
        features['returns_1d'] = df['Close'].pct_change()
        features['returns_5d'] = df['Close'].pct_change(5)
        features['returns_20d'] = df['Close'].pct_change(20)
        
        # Volatilidades realizadas
        features['vol_5d'] = features['returns_1d'].rolling(5).std()
        features['vol_20d'] = features['returns_1d'].rolling(20).std()
        features['vol_60d'] = features['returns_1d'].rolling(60).std()
        
        # Momentum indicators
        features['rsi'] = self.calculate_rsi(df['Close'])
        features['macd'] = self.calculate_macd(df['Close'])
        
        # Volume patterns
        features['volume_trend'] = df['Volume'].rolling(20).mean() / df['Volume'].rolling(60).mean()
        features['volume_spike'] = df['Volume'] / df['Volume'].rolling(20).mean()
        
        # VIX proxy (volatility of volatility)
        features['vol_of_vol'] = features['vol_20d'].rolling(10).std()
        
        return features.dropna()
    
    def calculate_macd(self, series, fast=12, slow=26, signal=9):
        """Calcular MACD"""
        ema_fast = series.ewm(span=fast).mean()
        ema_slow = series.ewm(span=slow).mean()
        macd_line = ema_fast - ema_slow
        return macd_line
    
    def fit_advanced(self, df):
        """Entrenar modelo avanzado"""
        features = self.prepare_advanced_features(df)
        
        # Normalizar características
        scaler = StandardScaler()
        features_scaled = scaler.fit_transform(features)
        
        # Entrenar modelo
        self.model.fit(features_scaled)
        self.hidden_states = self.model.predict(features_scaled)
        
        # Interpretar estados
        self.regime_interpretation = self.interpret_regimes(df, features)
        
        return self
    
    def interpret_regimes(self, df, features):
        """
        Interpretar automáticamente qué representa cada estado
        """
        interpretation = {}
        
        for state in range(self.n_components):
            mask = self.hidden_states == state
            state_features = features[mask]
            
            if len(state_features) > 0:
                avg_return = state_features['returns_1d'].mean()
                avg_vol = state_features['vol_20d'].mean()
                avg_rsi = state_features['rsi'].mean()
                
                # Clasificar estado basado en características
                if avg_return > 0.001 and avg_vol < state_features['vol_20d'].quantile(0.5):
                    regime_type = "Bull Market"
                elif avg_return < -0.001 and avg_vol < state_features['vol_20d'].quantile(0.5):
                    regime_type = "Bear Market"
                elif avg_vol > state_features['vol_20d'].quantile(0.75):
                    regime_type = "High Volatility/Crisis"
                else:
                    regime_type = "Consolidation/Low Volatility"
                
                interpretation[state] = {
                    'type': regime_type,
                    'avg_return': avg_return,
                    'avg_volatility': avg_vol,
                    'avg_rsi': avg_rsi,
                    'frequency': np.mean(mask)
                }
        
        return interpretation

def small_cap_hmm_strategy(df, lookback_days=252):
    """
    Estrategia HMM específica para small caps
    """
    # Parámetros específicos para small caps
    detector = MarketRegimeDetector(n_components=3)  # 3 estados: alcista, bajista, volátil
    
    # Características específicas para small caps
    features = pd.DataFrame(index=df.index)
    
    # Gap detection
    features['gap_pct'] = (df['Open'] / df['Close'].shift(1)) - 1
    
    # Intraday range
    features['intraday_range'] = (df['High'] - df['Low']) / df['Open']
    
    # Volume spikes (crucial for small caps)
    features['volume_spike'] = df['Volume'] / df['Volume'].rolling(20).mean()
    
    # Price momentum
    features['momentum_5d'] = df['Close'] / df['Close'].shift(5) - 1
    
    # Relative strength vs market
    spy_data = yf.download("SPY", start=df.index[0], end=df.index[-1])
    features['relative_strength'] = (df['Close'].pct_change() - 
                                   spy_data['Close'].pct_change().reindex(df.index))
    
    # Fit model with small cap specific features
    features_clean = features.dropna()
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features_clean)
    
    detector.model.fit(features_scaled)
    hidden_states = detector.model.predict(features_scaled)
    
    return {
        'states': hidden_states,
        'features': features_clean,
        'index': features_clean.index,
        'detector': detector
    }

Modelos Supervisados para Trading

Predicción de Precios con XGBoost

Los modelos supervisados pueden predecir movimientos futuros de precios basándose en características históricas.

import pandas as pd
import numpy as np
from xgboost import XGBRegressor, XGBClassifier
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.metrics import mean_squared_error, classification_report
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

class TradingPredictor:
    """
    Predictor de precios y direcciones usando XGBoost
    """
    
    def __init__(self, prediction_type='price', target_days=1):
        """
        Parámetros
        ----------
        prediction_type : str
            'price' para regresión, 'direction' para clasificación
        target_days : int
            Días hacia adelante para predecir
        """
        self.prediction_type = prediction_type
        self.target_days = target_days
        
        if prediction_type == 'price':
            self.model = XGBRegressor(
                max_depth=6,
                n_estimators=100,
                learning_rate=0.1,
                random_state=42
            )
        else:
            self.model = XGBClassifier(
                max_depth=6,
                n_estimators=100,
                learning_rate=0.1,
                random_state=42
            )
    
    def create_features(self, df):
        """
        Crear características para machine learning
        """
        features = pd.DataFrame(index=df.index)
        
        # Price-based features
        features['sma_5'] = df['Close'].rolling(5).mean() / df['Close']
        features['sma_10'] = df['Close'].rolling(10).mean() / df['Close']
        features['sma_20'] = df['Close'].rolling(20).mean() / df['Close']
        
        # Volatility features
        features['volatility_5'] = df['Close'].pct_change().rolling(5).std()
        features['volatility_20'] = df['Close'].pct_change().rolling(20).std()
        
        # Momentum features
        features['roc_5'] = df['Close'].pct_change(5)
        features['roc_10'] = df['Close'].pct_change(10)
        features['roc_20'] = df['Close'].pct_change(20)
        
        # Technical indicators
        features['rsi'] = self.calculate_rsi(df['Close'])
        features['bb_position'] = self.calculate_bb_position(df['Close'])
        
        # Volume features
        features['volume_sma'] = df['Volume'] / df['Volume'].rolling(20).mean()
        features['price_volume'] = df['Close'] * df['Volume']
        
        # OHLC features
        features['high_low_pct'] = (df['High'] - df['Low']) / df['Close']
        features['open_close_pct'] = (df['Close'] - df['Open']) / df['Open']
        
        # Lag features
        for lag in [1, 2, 3, 5]:
            features[f'return_lag_{lag}'] = df['Close'].pct_change().shift(lag)
            features[f'volume_lag_{lag}'] = df['Volume'].pct_change().shift(lag)
        
        return features.dropna()
    
    def calculate_rsi(self, series, period=14):
        """Calcular RSI"""
        delta = series.diff()
        gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
        rs = gain / loss
        return 100 - (100 / (1 + rs))
    
    def calculate_bb_position(self, series, period=20, std_mult=2):
        """Calcular posición dentro de Bollinger Bands"""
        sma = series.rolling(period).mean()
        std = series.rolling(period).std()
        upper = sma + (std * std_mult)
        lower = sma - (std * std_mult)
        return (series - lower) / (upper - lower)
    
    def create_targets(self, df):
        """
        Crear variables objetivo
        """
        if self.prediction_type == 'price':
            # Predecir precio futuro
            target = df['Close'].shift(-self.target_days)
        else:
            # Predecir dirección (clasificación)
            future_return = df['Close'].pct_change(self.target_days).shift(-self.target_days)
            target = (future_return > 0).astype(int)  # 1 si sube, 0 si baja
        
        return target
    
    def fit(self, df, test_size=0.2):
        """
        Entrenar el modelo
        """
        # Crear características y objetivos
        features = self.create_features(df)
        targets = self.create_targets(df)
        
        # Alinear datos
        aligned_data = pd.concat([features, targets], axis=1).dropna()
        X = aligned_data.iloc[:, :-1]  # Todas las columnas excepto la última
        y = aligned_data.iloc[:, -1]   # Última columna (target)
        
        # Split temporal (importante para series de tiempo)
        split_point = int(len(X) * (1 - test_size))
        X_train, X_test = X.iloc[:split_point], X.iloc[split_point:]
        y_train, y_test = y.iloc[:split_point], y.iloc[split_point:]
        
        # Entrenar modelo
        self.model.fit(X_train, y_train)
        
        # Evaluar
        train_pred = self.model.predict(X_train)
        test_pred = self.model.predict(X_test)
        
        # Métricas
        if self.prediction_type == 'price':
            train_mse = mean_squared_error(y_train, train_pred)
            test_mse = mean_squared_error(y_test, test_pred)
            
            self.metrics = {
                'train_mse': train_mse,
                'test_mse': test_mse,
                'train_rmse': np.sqrt(train_mse),
                'test_rmse': np.sqrt(test_mse)
            }
        else:
            train_accuracy = (train_pred.round() == y_train).mean()
            test_accuracy = (test_pred.round() == y_test).mean()
            
            self.metrics = {
                'train_accuracy': train_accuracy,
                'test_accuracy': test_accuracy
            }
        
        # Guardar datos para análisis
        self.X_train, self.X_test = X_train, X_test
        self.y_train, self.y_test = y_train, y_test
        self.train_pred, self.test_pred = train_pred, test_pred
        self.feature_names = X.columns.tolist()
        
        return self
    
    def predict_next(self, df, periods=1):
        """
        Predecir próximos períodos
        """
        features = self.create_features(df)
        latest_features = features.iloc[-periods:].values
        
        if latest_features.shape[0] == 0:
            raise ValueError("No hay suficientes datos para generar características")
        
        predictions = self.model.predict(latest_features)
        
        return predictions
    
    def get_feature_importance(self, top_n=10):
        """
        Obtener importancia de características
        """
        if not hasattr(self.model, 'feature_importances_'):
            raise ValueError("Modelo no entrenado")
        
        importance_df = pd.DataFrame({
            'feature': self.feature_names,
            'importance': self.model.feature_importances_
        }).sort_values('importance', ascending=False)
        
        return importance_df.head(top_n)
    
    def plot_predictions(self, title="Predictions vs Reality"):
        """
        Visualizar predicciones vs realidad
        """
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
        
        # Training set
        ax1.scatter(self.y_train, self.train_pred, alpha=0.6)
        ax1.plot([self.y_train.min(), self.y_train.max()], 
                [self.y_train.min(), self.y_train.max()], 'r--', lw=2)
        ax1.set_xlabel('Actual')
        ax1.set_ylabel('Predicted')
        ax1.set_title('Training Set')
        ax1.grid(True, alpha=0.3)
        
        # Test set
        ax2.scatter(self.y_test, self.test_pred, alpha=0.6, color='orange')
        ax2.plot([self.y_test.min(), self.y_test.max()], 
                [self.y_test.min(), self.y_test.max()], 'r--', lw=2)
        ax2.set_xlabel('Actual')
        ax2.set_ylabel('Predicted')
        ax2.set_title('Test Set')
        ax2.grid(True, alpha=0.3)
        
        plt.suptitle(title)
        plt.tight_layout()
        plt.show()
        
        return fig

# Estrategia de trading basada en ML
def ml_trading_strategy(df, prediction_threshold=0.6):
    """
    Estrategia de trading usando predicciones de ML
    """
    # Entrenar predictor de dirección
    direction_predictor = TradingPredictor(prediction_type='direction', target_days=1)
    direction_predictor.fit(df)
    
    # Entrenar predictor de precio
    price_predictor = TradingPredictor(prediction_type='price', target_days=1)
    price_predictor.fit(df)
    
    # Generar señales
    signals = pd.DataFrame(index=df.index)
    signals['price'] = df['Close']
    signals['signal'] = 0
    signals['confidence'] = 0
    
    # Ventana móvil para predicciones
    window_size = 252
    
    for i in range(window_size, len(df) - 1):
        train_data = df.iloc[i-window_size:i]
        
        try:
            # Entrenar modelos con datos hasta el momento
            temp_direction = TradingPredictor(prediction_type='direction')
            temp_direction.fit(train_data, test_size=0.3)
            
            # Predecir dirección
            direction_pred = temp_direction.predict_next(train_data)[0]
            
            # Solo generar señal si hay confianza alta
            if temp_direction.metrics['test_accuracy'] > prediction_threshold:
                current_idx = df.index[i]
                
                if direction_pred > 0.5:  # Predicción alcista
                    signals.loc[current_idx, 'signal'] = 1
                else:  # Predicción bajista
                    signals.loc[current_idx, 'signal'] = -1
                
                signals.loc[current_idx, 'confidence'] = temp_direction.metrics['test_accuracy']
        
        except Exception as e:
            continue
    
    return signals

# Ejemplo de uso completo
def ml_example_analysis():
    """
    Ejemplo completo de análisis ML para trading
    """
    # Obtener datos
    ticker = "AAPL"
    df = yf.download(ticker, start="2020-01-01", end="2024-01-01", interval="1d")
    
    print(f"=== ANÁLISIS ML: {ticker} ===\n")
    
    # Predictor de dirección
    print("🎯 PREDICTOR DE DIRECCIÓN:")
    direction_model = TradingPredictor(prediction_type='direction', target_days=1)
    direction_model.fit(df)
    
    print(f"   Precisión Entrenamiento: {direction_model.metrics['train_accuracy']:.1%}")
    print(f"   Precisión Prueba: {direction_model.metrics['test_accuracy']:.1%}")
    
    # Predictor de precio
    print(f"\n📈 PREDICTOR DE PRECIO:")
    price_model = TradingPredictor(prediction_type='price', target_days=1)
    price_model.fit(df)
    
    print(f"   RMSE Entrenamiento: ${price_model.metrics['train_rmse']:.2f}")
    print(f"   RMSE Prueba: ${price_model.metrics['test_rmse']:.2f}")
    
    # Importancia de características
    print(f"\n🔍 TOP CARACTERÍSTICAS:")
    importance = direction_model.get_feature_importance(5)
    for _, row in importance.iterrows():
        print(f"   {row['feature']}: {row['importance']:.3f}")
    
    # Predicciones actuales
    latest_direction = direction_model.predict_next(df, 1)[0]
    latest_price = price_model.predict_next(df, 1)[0]
    current_price = df['Close'].iloc[-1]
    
    print(f"\n🔮 PREDICCIONES:")
    print(f"   Dirección Próximo Día: {'⬆️ Alcista' if latest_direction > 0.5 else '⬇️ Bajista'}")
    print(f"   Precio Actual: ${current_price:.2f}")
    print(f"   Precio Predicho: ${latest_price:.2f}")
    print(f"   Cambio Esperado: {(latest_price/current_price - 1):.1%}")
    
    # Generar estrategia
    strategy_signals = ml_trading_strategy(df)
    
    # Estadísticas de estrategia
    total_signals = strategy_signals['signal'].abs().sum()
    avg_confidence = strategy_signals[strategy_signals['confidence'] > 0]['confidence'].mean()
    
    print(f"\n📊 ESTRATEGIA ML:")
    print(f"   Total Señales: {total_signals}")
    print(f"   Confianza Promedio: {avg_confidence:.1%}")
    
    # Visualizar
    direction_model.plot_predictions(f"Direction Prediction - {ticker}")
    price_model.plot_predictions(f"Price Prediction - {ticker}")
    
    return direction_model, price_model, strategy_signals

if __name__ == "__main__":
    ml_example_analysis()

Mejores Prácticas para ML en Trading

1. Validación Temporal

def time_series_cross_validation(df, model_class, n_splits=5):
    """
    Cross-validation específico para series de tiempo
    """
    tscv = TimeSeriesSplit(n_splits=n_splits)
    scores = []
    
    for train_idx, test_idx in tscv.split(df):
        train_data = df.iloc[train_idx]
        test_data = df.iloc[test_idx]
        
        model = model_class()
        model.fit(train_data)
        
        # Evaluar en datos de prueba
        test_score = model.evaluate(test_data)
        scores.append(test_score)
    
    return np.array(scores)

2. Feature Engineering Avanzado

def create_advanced_features(df, market_data=None):
    """
    Crear características avanzadas para ML
    """
    features = df.copy()
    
    # Market regime features
    if market_data is not None:
        features['beta'] = calculate_rolling_beta(df['Close'], market_data['Close'])
        features['relative_strength'] = df['Close'].pct_change() - market_data['Close'].pct_change()
    
    # Technical pattern features
    features['doji'] = detect_doji_patterns(df)
    features['hammer'] = detect_hammer_patterns(df)
    features['engulfing'] = detect_engulfing_patterns(df)
    
    # Volatility clustering
    features['vol_regime'] = detect_volatility_regime(df['Close'])
    
    # Seasonal features
    features['day_of_week'] = df.index.dayofweek
    features['month'] = df.index.month
    features['quarter'] = df.index.quarter
    
    return features

3. Gestión de Overfitting

class OverfittingDetector:
    """
    Detector de overfitting para modelos de trading
    """
    
    def __init__(self):
        self.warnings = []
    
    def check_overfitting(self, train_score, test_score, threshold=0.1):
        """
        Detectar overfitting comparando scores
        """
        if abs(train_score - test_score) > threshold:
            self.warnings.append("High difference between train/test scores")
        
        if train_score > 0.95:  # Demasiado perfecto
            self.warnings.append("Training score suspiciously high")
        
        return len(self.warnings) == 0
    
    def suggest_fixes(self):
        """
        Sugerir soluciones para overfitting
        """
        suggestions = [
            "Reduce model complexity (max_depth, n_estimators)",
            "Add regularization (L1/L2)",
            "Increase training data",
            "Use feature selection",
            "Implement early stopping"
        ]
        return suggestions

Aplicaciones Específicas para Small Caps

1. Predicción de Gaps

def gap_prediction_model(df, gap_threshold=0.02):
    """
    Modelo específico para predecir gaps en small caps
    """
    features = pd.DataFrame(index=df.index)
    
    # Previous day features
    features['prev_close_vol'] = df['Volume'].shift(1)
    features['prev_range'] = ((df['High'] - df['Low']) / df['Close']).shift(1)
    features['prev_return'] = df['Close'].pct_change().shift(1)
    
    # After-hours indicators
    features['ah_volume'] = df['Volume'].rolling(5).mean()  # Proxy
    features['news_sentiment'] = 0  # Placeholder for news sentiment
    
    # Create gap target
    gap_pct = (df['Open'] / df['Close'].shift(1)) - 1
    target = (abs(gap_pct) > gap_threshold).astype(int)
    
    return features, target

2. Volatility Prediction

def volatility_prediction_model(df, horizon=5):
    """
    Predecir volatilidad futura para small caps
    """
    # GARCH-like features
    returns = df['Close'].pct_change()
    
    features = pd.DataFrame(index=df.index)
    features['returns_lag1'] = returns.shift(1)
    features['returns_lag2'] = returns.shift(2)
    features['vol_lag1'] = returns.rolling(5).std().shift(1)
    features['vol_lag2'] = returns.rolling(10).std().shift(1)
    
    # Target: future volatility
    target = returns.rolling(horizon).std().shift(-horizon)
    
    return features.dropna(), target.dropna()

Métricas de Evaluación para Trading

def evaluate_trading_model(predictions, actual_returns, transaction_cost=0.001):
    """
    Evaluar modelo desde perspectiva de trading
    """
    # Convert predictions to trading signals
    signals = np.where(predictions > 0.5, 1, -1)
    
    # Calculate strategy returns
    strategy_returns = signals * actual_returns - abs(np.diff(signals, prepend=signals[0])) * transaction_cost
    
    # Trading-specific metrics
    sharpe_ratio = np.sqrt(252) * strategy_returns.mean() / strategy_returns.std()
    max_drawdown = calculate_max_drawdown(strategy_returns.cumsum())
    hit_rate = (strategy_returns > 0).mean()
    
    return {
        'sharpe_ratio': sharpe_ratio,
        'max_drawdown': max_drawdown,
        'hit_rate': hit_rate,
        'total_return': strategy_returns.sum(),
        'volatility': strategy_returns.std() * np.sqrt(252)
    }

def calculate_max_drawdown(equity_curve):
    """Calcular maximum drawdown"""
    peak = equity_curve.cummax()
    drawdown = (equity_curve - peak) / peak
    return drawdown.min()

Siguiente Paso

Con Machine Learning dominado, continuemos con Análisis de Sentimiento para incorporar datos alternativos en nuestras estrategias.