Community Scripts · QuantifyMe

Score▼	Strategy	Author	All 45 EURUSD 14 USDJPY 2 GBPUSD 5 AUDUSD 7 USDCAD 5 USDCHF 1 NZDUSD 6	All 45 15min 40	Win Rate▼	Return▼	PF▼	MDD▼	Trades▼
🥇	USD/CAD BB Mean-Reversion + ATR XGBoost Maximise risk-adjusted return (Sharpe/Calmar) on USD/CAD 15-min using Bollinger Band mean-reversion signals augmented by ATR, RSI, MACD, and…	C @candle_owl	USDCAD	15min	59.1%72.1%	+4.84%+25.91%	1.313.34	1.34%1.34%	36268
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-06 02:36:58 # Model : XGBoost # Feature Eng. : BB (20,2.0), ATR 14 + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/USDCAD_15min.parquet" START_DATE = "2025-04-24" END_DATE = "2026-04-24" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── Bollinger Bands (20, 2) ────────────────────────────────────────────── bb_period = 20 bb_std = 2.0 bb_mid = close.rolling(bb_period).mean() bb_sigma = close.rolling(bb_period).std(ddof=0) bb_upper = bb_mid + bb_std * bb_sigma bb_lower = bb_mid - bb_std * bb_sigma df["bb_mid"] = bb_mid df["bb_upper"] = bb_upper df["bb_lower"] = bb_lower df["bb_width"] = (bb_upper - bb_lower) / bb_mid df["bb_pct"] = (close - bb_lower) / (bb_upper - bb_lower) # ── ATR (14) & Normalised ATR ──────────────────────────────────────────── atr_period = 14 tr = pd.concat([ high - low, (high - close.shift(1)).abs(), (low - close.shift(1)).abs() ], axis=1).max(axis=1) atr = tr.ewm(alpha=1.0 / atr_period, min_periods=atr_period, adjust=False).mean() natr = atr / close df["atr"] = atr df["natr"] = natr # ── Price momentum / returns ───────────────────────────────────────────── df["ret_1"] = close.pct_change(1) df["ret_4"] = close.pct_change(4) df["ret_8"] = close.pct_change(8) df["ret_16"] = close.pct_change(16) # ── Distance from Bollinger mid / bands ────────────────────────────────── df["close_minus_mid"] = (close - bb_mid) / bb_mid df["close_minus_upper"] = (close - bb_upper) / bb_mid df["close_minus_lower"] = (close - bb_lower) / bb_mid # ── BB squeeze flag: width below rolling median ─────────────────────────── bb_width_med = df["bb_width"].rolling(50).median() df["bb_squeeze"] = np.where(df["bb_width"] < bb_width_med, 1.0, 0.0) # ── BB mean-reversion z-score ──────────────────────────────────────────── df["bb_z"] = (close - bb_mid) / (bb_sigma + 1e-12) # ── Candle body / wick features ────────────────────────────────────────── body = (close - open_).abs() candle_rng = (high - low).replace(0, np.nan) df["body_ratio"] = body / candle_rng df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle_rng df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle_rng df["bull_candle"] = np.where(close > open_, 1.0, 0.0) # ── RSI (14) built from scratch ────────────────────────────────────────── rsi_period = 14 delta = close.diff() gain = delta.clip(lower=0) loss = (-delta).clip(lower=0) avg_g = gain.ewm(alpha=1.0 / rsi_period, min_periods=rsi_period, adjust=False).mean() avg_l = loss.ewm(alpha=1.0 / rsi_period, min_periods=rsi_period, adjust=False).mean() rs = avg_g / (avg_l + 1e-12) rsi = 100.0 - (100.0 / (1.0 + rs)) df["rsi_14"] = rsi # RSI deviation from neutral 50 df["rsi_dev"] = (rsi - 50.0) / 50.0 # ── MACD (12, 26, 9) ───────────────────────────────────────────────────── ema12 = close.ewm(span=12, adjust=False).mean() ema26 = close.ewm(span=26, adjust=False).mean() macd_line = ema12 - ema26 macd_sig = macd_line.ewm(span=9, adjust=False).mean() df["macd"] = macd_line / close df["macd_hist"] = (macd_line - macd_sig) / close # ── Rolling volatility (realised over 20 bars) ─────────────────────────── df["vol_20"] = df["ret_1"].rolling(20).std() # ── ATR z-score vs 50-bar rolling mean ─────────────────────────────────── atr_mean = atr.rolling(50).mean() atr_std = atr.rolling(50).std(ddof=0) df["atr_z"] = (atr - atr_mean) / (atr_std + 1e-12) # ── Volume-of-BB-touches over last 10 bars ─────────────────────────────── near_upper = (close >= bb_upper * 0.998).astype(float) near_lower = (close <= bb_lower * 1.002).astype(float) df["touch_upper_10"] = near_upper.rolling(10).sum() df["touch_lower_10"] = near_lower.rolling(10).sum() # ── SMA 50 (trend filter helper) ───────────────────────────────────────── df["sma_50"] = close.rolling(50).mean() df["close_vs_sma"] = (close - df["sma_50"]) / df["sma_50"] # ── EMA cross (9 / 21) ─────────────────────────────────────────────────── ema9 = close.ewm(span=9, adjust=False).mean() ema21 = close.ewm(span=21, adjust=False).mean() df["ema_cross"] = (ema9 - ema21) / close # ── Bar-of-day / session ───────────────────────────────────────────────── if hasattr(df.index, "hour"): df["hour_sin"] = np.sin(2 * np.pi * df.index.hour / 24.0) df["hour_cos"] = np.cos(2 * np.pi * df.index.hour / 24.0) else: df["hour_sin"] = 0.0 df["hour_cos"] = 1.0 # ── Lag features on bb_pct and rsi ─────────────────────────────────────── for lag in [1, 2, 4]: df[f"bb_pct_lag{lag}"] = df["bb_pct"].shift(lag) df[f"rsi_14_lag{lag}"] = df["rsi_14"].shift(lag) df[f"macd_hist_lag{lag}"] = df["macd_hist"].shift(lag) # ── Fill NaN from warm-up ───────────────────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "USD/CAD BB Mean-Reversion + ATR XGBoost", "model_type": "XGBClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.75, "colsample_bytree": 0.70, "min_child_weight": 3, "gamma": 0.15, "reg_alpha": 0.10, "reg_lambda": 1.50, "objective": "binary:logistic", "random_state": 42, "n_jobs": -1, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.005, "take_profit": 0.010, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [7, 20], "min_atr": 0.0002, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximise risk-adjusted return (Sharpe/Calmar) on USD/CAD 15-min " "using Bollinger Band mean-reversion signals augmented by ATR, RSI, " "MACD, and EMA-cross features fed into a regularised XGBoost classifier. " "SL=0.5% / TP=1.0% gives a 1:2 RR floor. Conservative depth (4) and " "strong L1/L2 regularisation prevent overfitting on a single year of data." ), "notes": ( "BB squeeze flag and bb_z capture regime; atr_z filters noisy bars. " "Session filter 07-20 UTC covers London + NY overlap for tighter spreads. " "min_atr=0.0002 avoids dead-market whipsaws. Lag features on bb_pct and " "rsi_14 give the model short-term momentum context without look-ahead." ), }
🥈	EMA Cross 50/200 + ATR Momentum (XGBoost) Maximize risk-adjusted return (Sharpe/Calmar) on EUR/USD 15-min data. EMA 50/200 cross provides the primary trend regime filter. ATR 14 gate…	D @delta-atlas-858	EURUSD	15min	48.5%64.7%	+8.52%+12.40%	2.412.47	0.83%0.83%	6817
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-05 10:27:10 # Model : XGBoost # Feature Eng. : EMA (50,200), ATR 14 + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/EURUSD_15min.parquet" START_DATE = "2025-04-23" END_DATE = "2026-04-23" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── EMA 50 and EMA 200 ────────────────────────────────────────────────── ema_50 = close.ewm(span=50, adjust=False).mean() ema_200 = close.ewm(span=200, adjust=False).mean() df["ema_50"] = ema_50 df["ema_200"] = ema_200 df["dm_ema_50"] = (close - ema_50) / ema_50 df["dm_ema_200"] = (close - ema_200) / ema_200 # EMA cross signal: positive when fast > slow df["ema_cross"] = ema_50 - ema_200 df["ema_cross_norm"] = df["ema_cross"] / ema_200 # Cross direction change (momentum of the spread) df["ema_cross_delta"] = df["ema_cross"].diff(1) df["ema_cross_accel"] = df["ema_cross_delta"].diff(1) # ── ATR 14 ────────────────────────────────────────────────────────────── prev_close = close.shift(1) tr = pd.concat([ high - low, (high - prev_close).abs(), (low - prev_close).abs() ], axis=1).max(axis=1) atr = tr.ewm(span=14, adjust=False).mean() df["atr"] = atr df["natr"] = atr / close # ── Price momentum features ───────────────────────────────────────────── for lag in [1, 2, 4, 8, 16]: df[f"ret_{lag}"] = close.pct_change(lag) # ── Volatility regime ─────────────────────────────────────────────────── df["atr_ratio"] = atr / atr.rolling(50).mean() # ATR vs its own MA df["natr_ma20"] = df["natr"].rolling(20).mean() # ── RSI 14 ────────────────────────────────────────────────────────────── delta = close.diff(1) gain = delta.clip(lower=0) loss = (-delta).clip(lower=0) avg_g = gain.ewm(span=14, adjust=False).mean() avg_l = loss.ewm(span=14, adjust=False).mean() rs = avg_g / avg_l.replace(0, np.nan) df["rsi_14"] = 100 - (100 / (1 + rs)) df["rsi_delta"] = df["rsi_14"].diff(1) # ── MACD (12/26/9) ────────────────────────────────────────────────────── ema_12 = close.ewm(span=12, adjust=False).mean() ema_26 = close.ewm(span=26, adjust=False).mean() macd = ema_12 - ema_26 signal = macd.ewm(span=9, adjust=False).mean() df["macd"] = macd df["macd_signal"] = signal df["macd_hist"] = macd - signal df["macd_hist_delta"] = df["macd_hist"].diff(1) # ── Bollinger Bands (20, 2σ) ───────────────────────────────────────────── bb_mid = close.rolling(20).mean() bb_std = close.rolling(20).std() bb_up = bb_mid + 2 * bb_std bb_lo = bb_mid - 2 * bb_std df["bb_pos"] = (close - bb_lo) / (bb_up - bb_lo).replace(0, np.nan) df["bb_width"] = (bb_up - bb_lo) / bb_mid # ── Stochastic %K / %D (14, 3) ────────────────────────────────────────── low14 = low.rolling(14).min() high14 = high.rolling(14).max() stoch_k = 100 * (close - low14) / (high14 - low14).replace(0, np.nan) stoch_d = stoch_k.rolling(3).mean() df["stoch_k"] = stoch_k df["stoch_d"] = stoch_d df["stoch_kd"] = stoch_k - stoch_d # ── Volume / body / wick features ─────────────────────────────────────── body = (close - open_).abs() candle = (high - low).replace(0, np.nan) df["body_ratio"] = body / candle df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle df["bull_candle"] = np.where(close > open_, 1, 0) # ── Rolling z-score of close vs SMA 50 ────────────────────────────────── sma_50 = close.rolling(50).mean() sma_50_std = close.rolling(50).std() df["zscore_50"] = (close - sma_50) / sma_50_std.replace(0, np.nan) # ── High/Low breakout flags ────────────────────────────────────────────── df["high_20_break"] = np.where(close > high.rolling(20).max().shift(1), 1, 0) df["low_20_break"] = np.where(close < low.rolling(20).min().shift(1), 1, 0) # ── Time-of-day features (cyclical encoding) ───────────────────────────── if hasattr(df.index, 'hour'): hour = df.index.hour + df.index.minute / 60.0 df["hour_sin"] = np.sin(2 * np.pi * hour / 24.0) df["hour_cos"] = np.cos(2 * np.pi * hour / 24.0) # ── Fill NaN from indicator warm-up ───────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "EMA Cross 50/200 + ATR Momentum (XGBoost)", "model_type": "XGBClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.75, "colsample_bytree": 0.70, "min_child_weight": 3, "gamma": 0.15, "reg_alpha": 0.10, "reg_lambda": 1.50, "objective": "binary:logistic", "random_state": 42, "n_jobs": -1, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.005, "take_profit": 0.010, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [6, 20], "min_atr": 0.0002, "trend_filter": "sma_50", "target_horizon": 4, "objective": ( "Maximize risk-adjusted return (Sharpe/Calmar) on EUR/USD 15-min data. " "EMA 50/200 cross provides the primary trend regime filter. " "ATR 14 gates entries by volatility (min_atr avoids dead-market noise). " "XGBoost chosen for its ability to capture non-linear feature interactions. " "Conservative depth=4 and regularisation (alpha/lambda) prevent overfitting " "on the relatively short 1-year window. 2:1 reward/risk (SL=0.5%, TP=1.0%) " "ensures positive expectancy even at modest hit-rates. Session filter 06-20 UTC " "keeps the strategy in liquid London/NY hours only." ), "notes": ( "Feature set combines trend (EMA cross, z-score), momentum (RSI, MACD, returns), " "volatility (ATR ratio, BB width), and price structure (body/wick ratios, " "stochastic). Cyclical hour encoding captures intraday seasonality without " "introducing lookahead. bfill().ffill() handles EMA warm-up NaNs gracefully." ), }
🥉	GBP/USD Gradient Boosting Trend + Mean-Reversion Maximize risk-adjusted return (Sharpe/Calmar) on GBP/USD 15-min. GradientBoostingClassifier with 400 shallow trees (depth 4) and a conservat…	C @candid-owl-125	GBPUSD	15min	53.5%58.0%	+3.70%+34.59%	1.161.78	2.46%2.46%	31269
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-06 01:41:21 # Model : Gradient Boosting # Feature Eng. : SMA (20,50,200), BB (20,2.0), RSI 14, MACD (12,26,9), ATR 14 + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/GBPUSD_15min.parquet" START_DATE = "2025-04-24" END_DATE = "2026-04-24" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── SMA 20, 50, 200 + distance from close ────────────────────────────── for period in [20, 50, 200]: sma = close.rolling(period).mean() df[f"sma_{period}"] = sma df[f"dm_sma_{period}"] = (close - sma) / sma # ── Bollinger Bands (20, 2) ───────────────────────────────────────────── bb_mid = close.rolling(20).mean() bb_std = close.rolling(20).std(ddof=0) bb_upper = bb_mid + 2.0 * bb_std bb_lower = bb_mid - 2.0 * bb_std df["bb_mid"] = bb_mid df["bb_upper"] = bb_upper df["bb_lower"] = bb_lower df["bb_width"] = (bb_upper - bb_lower) / bb_mid bb_range = bb_upper - bb_lower df["bb_pct"] = np.where(bb_range != 0, (close - bb_lower) / bb_range, 0.5) # ── RSI 14 ───────────────────────────────────────────────────────────── delta = close.diff() gain = delta.clip(lower=0) loss = (-delta).clip(lower=0) avg_gain = gain.ewm(com=13, min_periods=14, adjust=False).mean() avg_loss = loss.ewm(com=13, min_periods=14, adjust=False).mean() rs = np.where(avg_loss != 0, avg_gain / avg_loss, 100.0) df["rsi_14"] = 100.0 - (100.0 / (1.0 + rs)) # ── MACD (12, 26, 9) ─────────────────────────────────────────────────── ema_12 = close.ewm(span=12, adjust=False).mean() ema_26 = close.ewm(span=26, adjust=False).mean() macd_line = ema_12 - ema_26 signal_line = macd_line.ewm(span=9, adjust=False).mean() df["macd_line"] = macd_line df["macd_signal"] = signal_line df["macd_hist"] = macd_line - signal_line # ── ATR 14 + NATR ────────────────────────────────────────────────────── prev_close = close.shift(1) tr = pd.concat([ high - low, (high - prev_close).abs(), (low - prev_close).abs() ], axis=1).max(axis=1) atr = tr.ewm(com=13, min_periods=14, adjust=False).mean() df["atr_14"] = atr df["natr"] = np.where(close != 0, atr / close, 0.0) # ── Price momentum (returns over multiple horizons) ──────────────────── for lag in [1, 4, 8, 16]: df[f"ret_{lag}"] = close.pct_change(lag) # ── Candle body & wick features ──────────────────────────────────────── body = (close - open_).abs() candle_range = (high - low).replace(0, np.nan) df["body_ratio"] = body / candle_range df["upper_wick"] = np.where( candle_range.notna(), (high - close.combine(open_, max)) / candle_range, 0.0 ) df["lower_wick"] = np.where( candle_range.notna(), (close.combine(open_, min) - low) / candle_range, 0.0 ) df["candle_dir"] = np.where(close >= open_, 1.0, -1.0) # ── Volume proxy: normalised candle range ────────────────────────────── rolling_range = candle_range.rolling(20).mean() df["norm_range"] = np.where( rolling_range != 0, (high - low) / rolling_range, 1.0 ) # ── RSI derived features ─────────────────────────────────────────────── df["rsi_ob"] = np.where(df["rsi_14"] > 70, 1.0, 0.0) df["rsi_os"] = np.where(df["rsi_14"] < 30, 1.0, 0.0) df["rsi_mid"] = df["rsi_14"] - 50.0 df["rsi_slope"] = df["rsi_14"].diff(3) # ── MACD histogram slope ─────────────────────────────────────────────── df["macd_hist_slope"] = df["macd_hist"].diff(2) df["macd_cross"] = np.where( (df["macd_hist"] > 0) & (df["macd_hist"].shift(1) <= 0), 1.0, np.where( (df["macd_hist"] < 0) & (df["macd_hist"].shift(1) >= 0), -1.0, 0.0 ) ) # ── Bollinger squeeze (low volatility precursor) ─────────────────────── bb_width_ma = df["bb_width"].rolling(20).mean() df["bb_squeeze"] = np.where(df["bb_width"] < bb_width_ma, 1.0, 0.0) # ── SMA slope features ───────────────────────────────────────────────── df["sma_20_slope"] = df["sma_20"].pct_change(4) df["sma_50_slope"] = df["sma_50"].pct_change(8) # ── Cross-SMA alignment (trend structure) ────────────────────────────── df["sma20_above_50"] = np.where(df["sma_20"] > df["sma_50"], 1.0, 0.0) df["sma50_above_200"] = np.where(df["sma_50"] > df["sma_200"], 1.0, 0.0) df["close_above_200"] = np.where(close > df["sma_200"], 1.0, 0.0) # ── Lagged close returns as additional features ──────────────────────── for lag in [1, 2, 3]: df[f"close_lag_{lag}"] = close.shift(lag) # ── Rolling volatility (std of returns) ──────────────────────────────── df["vol_10"] = close.pct_change().rolling(10).std() df["vol_20"] = close.pct_change().rolling(20).std() df["vol_ratio"] = np.where( df["vol_20"] != 0, df["vol_10"] / df["vol_20"], 1.0 ) # ── Hour-of-day (London/NY session proxy) ────────────────────────────── if hasattr(df.index, "hour"): df["hour_sin"] = np.sin(2 * np.pi * df.index.hour / 24.0) df["hour_cos"] = np.cos(2 * np.pi * df.index.hour / 24.0) else: df["hour_sin"] = 0.0 df["hour_cos"] = 1.0 # ── Fill NaN from warm-up periods ────────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "GBP/USD Gradient Boosting Trend + Mean-Reversion", "model_type": "GradientBoostingClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.8, "min_samples_leaf": 20, "max_features": "sqrt", "validation_fraction": 0.1, "n_iter_no_change": 30, "tol": 1e-4, "random_state": 42, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.005, "take_profit": 0.010, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [6, 20], "min_atr": 0.0002, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximize risk-adjusted return (Sharpe/Calmar) on GBP/USD 15-min. " "GradientBoostingClassifier with 400 shallow trees (depth 4) and a " "conservative learning rate of 0.04 avoids overfitting while capturing " "non-linear interactions between trend (SMA alignment, slope), momentum " "(MACD histogram, RSI), and volatility (ATR, BB squeeze) features. " "SL=0.5% / TP=1.0% gives 1:2 R/R. Session filter 06-20 UTC covers " "London open through NY close where GBP/USD liquidity is highest. " "min_atr filter avoids flat/illiquid bars." ), "notes": ( "Features include multi-period SMA distances, Bollinger Band pct/width, " "RSI with overbought/oversold flags, MACD histogram slope and crossover, " "ATR-normalised volatility, candle body/wick ratios, rolling vol ratio, " "and hour-of-day cyclical encoding. target_horizon=4 (1-hour forward) " "balances signal frequency against predictability at 15-min resolution." ), }
7.95	USD/JPY BB Mean-Reversion + ATR Gradient Boost Maximise Sharpe ratio via a Gradient Boosting classifier trained on Bollinger Band position (bb_pct), normalised bandwidth (bb_width), ATR/N…	R @ratio_witch	USDJPY	15min	60.2%62.9%	+4.28%+14.16%	1.232.07	2.32%2.32%	16670
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-06 02:01:39 # Model : Gradient Boosting # Feature Eng. : BB (20,2.0), ATR 14 + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/USDJPY_15min.parquet" START_DATE = "2025-04-24" END_DATE = "2026-04-24" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── Bollinger Bands (period=20, std_dev=2.0) ────────────────────────────── bb_period = 20 bb_std = 2.0 bb_mid = close.rolling(bb_period).mean() bb_sigma = close.rolling(bb_period).std(ddof=0) bb_upper = bb_mid + bb_std * bb_sigma bb_lower = bb_mid - bb_std * bb_sigma df["bb_mid"] = bb_mid df["bb_upper"] = bb_upper df["bb_lower"] = bb_lower # bb_width: normalised band width (volatility proxy) df["bb_width"] = (bb_upper - bb_lower) / bb_mid # bb_pct: position of close within the band [0, 1] band_range = bb_upper - bb_lower df["bb_pct"] = (close - bb_lower) / band_range # Distance from close to mid in units of band width df["bb_dist_mid"] = (close - bb_mid) / bb_mid # ── ATR (period=14) ─────────────────────────────────────────────────────── atr_period = 14 prev_close = close.shift(1) tr = pd.concat([ high - low, (high - prev_close).abs(), (low - prev_close).abs() ], axis=1).max(axis=1) atr = tr.ewm(span=atr_period, min_periods=atr_period, adjust=False).mean() natr = atr / close df["atr"] = atr df["natr"] = natr # ── Momentum / trend features ───────────────────────────────────────────── # Rate of change at multiple horizons for n in [1, 4, 8, 16]: df[f"roc_{n}"] = close.pct_change(n) # RSI (14) rsi_period = 14 delta = close.diff() gain = delta.clip(lower=0) loss = (-delta).clip(lower=0) avg_gain = gain.ewm(span=rsi_period, min_periods=rsi_period, adjust=False).mean() avg_loss = loss.ewm(span=rsi_period, min_periods=rsi_period, adjust=False).mean() rs = avg_gain / (avg_loss + 1e-10) rsi = 100 - (100 / (1 + rs)) df["rsi_14"] = rsi # RSI derived: distance from 50 (centred, normalised) df["rsi_dev"] = (rsi - 50) / 50 # ── MACD (12, 26, 9) ────────────────────────────────────────────────────── ema12 = close.ewm(span=12, adjust=False).mean() ema26 = close.ewm(span=26, adjust=False).mean() macd_line = ema12 - ema26 macd_signal = macd_line.ewm(span=9, adjust=False).mean() macd_hist = macd_line - macd_signal df["macd_line"] = macd_line df["macd_signal"] = macd_signal df["macd_hist"] = macd_hist # ── Trend (SMA 50) ──────────────────────────────────────────────────────── sma50 = close.rolling(50).mean() df["sma_50"] = sma50 df["close_vs_sma50"] = (close - sma50) / sma50 # normalised distance # ── Volume / candle structure features ──────────────────────────────────── body = (close - open_).abs() candle_rng = high - low df["body_ratio"] = body / (candle_rng + 1e-10) # body as fraction of range df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / (candle_rng + 1e-10) df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / (candle_rng + 1e-10) df["candle_dir"] = np.where(close > open_, 1.0, -1.0) # bullish / bearish bar # ── Lagged bb_pct & rsi (to give the model recent history) ─────────────── for lag in [1, 2, 3]: df[f"bb_pct_lag{lag}"] = df["bb_pct"].shift(lag) df[f"rsi_lag{lag}"] = df["rsi_14"].shift(lag) df[f"macd_hist_lag{lag}"] = df["macd_hist"].shift(lag) # ── Volatility regime flag ──────────────────────────────────────────────── natr_ma = natr.rolling(50).mean() df["vol_regime"] = np.where(natr > natr_ma, 1.0, 0.0) # 1 = high-vol regime # ── BB squeeze detection ────────────────────────────────────────────────── bb_width_ma = df["bb_width"].rolling(50).mean() df["bb_squeeze"] = np.where(df["bb_width"] < bb_width_ma, 1.0, 0.0) # ── Mean-reversion signal strength ──────────────────────────────────────── # Positive → oversold (close below lower band), Negative → overbought df["mr_signal"] = 0.5 - df["bb_pct"] # centred: +0.5 at lower band, -0.5 at upper # ── Fill NaN from warm-up ───────────────────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "USD/JPY BB Mean-Reversion + ATR Gradient Boost", "model_type": "GradientBoostingClassifier", "model_params": { "n_estimators": 500, "max_depth": 4, "learning_rate": 0.03, "subsample": 0.8, "min_samples_leaf": 20, "max_features": "sqrt", "validation_fraction": 0.1, "n_iter_no_change": 30, "tol": 1e-4, "random_state": 42, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.005, "take_profit": 0.010, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": None, "min_atr": None, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximise Sharpe ratio via a Gradient Boosting classifier trained on " "Bollinger Band position (bb_pct), normalised bandwidth (bb_width), " "ATR/NATR, RSI, MACD histogram, candle structure, and lagged features. " "GBM chosen for its ability to capture non-linear interactions between " "volatility (ATR) and mean-reversion (BB) signals. n_iter_no_change " "acts as early stopping to prevent overfitting on the 15-min USDJPY series. " "SL=0.5% / TP=1.0% gives a 1:2 risk-reward; threshold=0.55 reduces noise trades." ), "notes": ( "Bollinger Bands are the primary mean-reversion anchor; ATR/NATR filter " "entries to adequate volatility bars. RSI and MACD provide momentum context " "to avoid fading strong trends. Lagged features (up to 3 bars) give the model " "short-term regime memory without look-ahead. vol_regime and bb_squeeze flags " "allow the model to differentiate trending vs. ranging conditions automatically. " "No session filter applied — USDJPY is liquid across Asian and European sessions." ), }
5.21	GBP/USD SMA Trend Gradient Boosting Risk-Adj Maximize risk-adjusted return (Sharpe/Calmar) on GBP/USD 15-min data. GradientBoostingClassifier chosen for its strong bias-variance tradeof…	R @ratio_witch	GBPUSD	15min	43.1%46.2%	+6.85%+15.56%	1.711.95	2.69%2.69%	7213
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-06 01:47:56 # Model : Gradient Boosting # Feature Eng. : SMA (20,50,200) + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/GBPUSD_15min.parquet" START_DATE = "2025-04-24" END_DATE = "2026-04-24" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── SMA features (required) ────────────────────────────────────────────── for period in [20, 50, 200]: sma = close.rolling(period).mean() df[f"sma_{period}"] = sma df[f"dm_sma_{period}"] = (close - sma) / sma # ── SMA crossover signals ──────────────────────────────────────────────── sma_20 = close.rolling(20).mean() sma_50 = close.rolling(50).mean() sma_200 = close.rolling(200).mean() df["sma_20_50_cross"] = np.where(sma_20 > sma_50, 1.0, -1.0) df["sma_20_200_cross"] = np.where(sma_20 > sma_200, 1.0, -1.0) df["sma_50_200_cross"] = np.where(sma_50 > sma_200, 1.0, -1.0) # ── Price momentum features ────────────────────────────────────────────── for lag in [1, 2, 4, 8, 16]: df[f"ret_{lag}"] = close.pct_change(lag) # ── Volatility: rolling std of returns ────────────────────────────────── ret_1 = close.pct_change(1) for window in [8, 20, 50]: df[f"vol_{window}"] = ret_1.rolling(window).std() # ── ATR (Average True Range) ───────────────────────────────────────────── tr = pd.concat([ high - low, (high - close.shift(1)).abs(), (low - close.shift(1)).abs() ], axis=1).max(axis=1) for atr_period in [14, 50]: atr = tr.rolling(atr_period).mean() df[f"atr_{atr_period}"] = atr df[f"natr_{atr_period}"] = atr / close # ── RSI ────────────────────────────────────────────────────────────────── for rsi_period in [14, 28]: delta = close.diff() gain = delta.clip(lower=0).rolling(rsi_period).mean() loss = (-delta.clip(upper=0)).rolling(rsi_period).mean() rs = gain / (loss + 1e-10) df[f"rsi_{rsi_period}"] = 100 - (100 / (1 + rs)) # ── MACD ───────────────────────────────────────────────────────────────── ema_12 = close.ewm(span=12, adjust=False).mean() ema_26 = close.ewm(span=26, adjust=False).mean() macd_line = ema_12 - ema_26 signal_line = macd_line.ewm(span=9, adjust=False).mean() df["macd"] = macd_line df["macd_signal"] = signal_line df["macd_hist"] = macd_line - signal_line df["macd_hist_norm"] = (macd_line - signal_line) / (close + 1e-10) # ── Bollinger Bands ─────────────────────────────────────────────────────── for bb_period in [20, 50]: bb_mid = close.rolling(bb_period).mean() bb_std = close.rolling(bb_period).std() bb_upper = bb_mid + 2.0 * bb_std bb_lower = bb_mid - 2.0 * bb_std bb_width = (bb_upper - bb_lower) / (bb_mid + 1e-10) bb_pos = (close - bb_lower) / (bb_upper - bb_lower + 1e-10) df[f"bb_width_{bb_period}"] = bb_width df[f"bb_pos_{bb_period}"] = bb_pos # ── Stochastic Oscillator ──────────────────────────────────────────────── for stoch_period in [14, 28]: lowest_low = low.rolling(stoch_period).min() highest_high = high.rolling(stoch_period).max() stoch_k = (close - lowest_low) / (highest_high - lowest_low + 1e-10) * 100 stoch_d = stoch_k.rolling(3).mean() df[f"stoch_k_{stoch_period}"] = stoch_k df[f"stoch_d_{stoch_period}"] = stoch_d # ── Rate of Change (ROC) ────────────────────────────────────────────────── for roc_period in [5, 10, 20]: df[f"roc_{roc_period}"] = close.pct_change(roc_period) # ── Candle body and shadow features ────────────────────────────────────── body = (close - open_).abs() candle_range = (high - low).abs() df["body_ratio"] = body / (candle_range + 1e-10) df["upper_shadow"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / (candle_range + 1e-10) df["lower_shadow"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / (candle_range + 1e-10) df["bullish_candle"] = np.where(close > open_, 1.0, -1.0) # ── Volume-proxy: candle range as volatility proxy ──────────────────────── df["range_norm"] = candle_range / (close + 1e-10) df["range_ma_ratio"] = candle_range / (candle_range.rolling(20).mean() + 1e-10) # ── Lag features for return predictors ─────────────────────────────────── for col_lag in ["rsi_14", "macd_hist", "bb_pos_20"]: for lag in [1, 2, 3]: df[f"{col_lag}_lag{lag}"] = df[col_lag].shift(lag) # ── Distance of close from recent high/low ──────────────────────────────── for lookback in [10, 20, 50]: roll_high = high.rolling(lookback).max() roll_low = low.rolling(lookback).min() df[f"dist_high_{lookback}"] = (close - roll_high) / (roll_high + 1e-10) df[f"dist_low_{lookback}"] = (close - roll_low) / (roll_low + 1e-10) # ── Trend strength: ADX proxy ───────────────────────────────────────────── adx_period = 14 tr_adx = tr.copy() plus_dm = pd.Series(np.where((high.diff() > 0) & (high.diff() > -low.diff()), high.diff(), 0.0), index=close.index) minus_dm = pd.Series(np.where((-low.diff() > 0) & (-low.diff() > high.diff()), -low.diff(), 0.0), index=close.index) atr_adx = tr_adx.rolling(adx_period).mean() plus_di = 100 * plus_dm.rolling(adx_period).mean() / (atr_adx + 1e-10) minus_di = 100 * minus_dm.rolling(adx_period).mean() / (atr_adx + 1e-10) dx = (100 * (plus_di - minus_di).abs() / (plus_di + minus_di + 1e-10)) df["adx"] = dx.rolling(adx_period).mean() df["plus_di"] = plus_di df["minus_di"] = minus_di # ── Fill NaN from indicator warm-up ────────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "GBP/USD SMA Trend Gradient Boosting Risk-Adj", "model_type": "GradientBoostingClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.75, "min_samples_leaf": 20, "max_features": "sqrt", "n_iter_no_change": 30, "validation_fraction": 0.1, "tol": 1e-4, "random_state": 42, }, "signal_threshold": 0.57, "direction": "both", "stop_loss": 0.005, "take_profit": 0.010, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [6, 18], "min_atr": 0.0002, "trend_filter": "sma_50", "target_horizon": 4, "objective": ( "Maximize risk-adjusted return (Sharpe/Calmar) on GBP/USD 15-min data. " "GradientBoostingClassifier chosen for its strong bias-variance tradeoff " "on medium-sized tabular datasets without needing GPU. " "Hyperparameters: moderate depth=4 prevents overfitting, learning_rate=0.04 " "with 400 estimators balances convergence vs generalisation, subsample=0.75 " "adds stochasticity to reduce variance, min_samples_leaf=20 enforces statistical " "significance at each leaf. Early stopping via n_iter_no_change guards against " "overfit on the training fold. Signal threshold 0.57 filters marginal signals " "to improve precision. SL=0.5%, TP=1.0% gives 1:2 RR. Session filter 6-18 UTC " "covers London+NY overlap — highest GBP/USD liquidity and tighter spreads. " "sma_50 trend filter ensures we only trade in the direction of medium-term trend, " "reducing whipsaw losses. target_horizon=4 bars (1 hour) gives the model enough " "time for moves to develop while staying relevant for intraday trading." ), "notes": ( "Features: SMA 20/50/200 with distance metrics (core requirement), RSI 14/28, " "MACD, Bollinger Bands 20/50, Stochastic 14/28, ATR 14/50, NATR, ROC, ADX, " "candle body/shadow ratios, lagged RSI/MACD/BB features, distance from rolling " "high/low, SMA crossover signals, multi-lag return features. " "All features are backward-looking only (no lookahead bias). " "on_opposite=reverse for fast trend-following entries without missing reversals." ), }
4.42	GBP/USD BB Squeeze Breakout (GradientBoosting) Maximize risk-adjusted return (Sharpe / Calmar). GradientBoostingClassifier chosen for its strong performance on tabular financial data with…	E @elastic-moose-350	GBPUSD	15min	53.4%55.6%	+1.03%+24.81%	1.041.67	5.20%5.20%	34854
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-06 01:53:28 # Model : Gradient Boosting # Feature Eng. : BB (20,2.0), ATR 14 + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # Bollinger Bands Squeeze Breakout — GBP/USD 15-min # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/GBPUSD_15min.parquet" START_DATE = "2025-04-24" END_DATE = "2026-04-24" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── Bollinger Bands (20, 2) ────────────────────────────────────────────── bb_period = 20 bb_std = 2.0 bb_mid = close.rolling(bb_period).mean() bb_sigma = close.rolling(bb_period).std(ddof=0) bb_upper = bb_mid + bb_std * bb_sigma bb_lower = bb_mid - bb_std * bb_sigma bb_width = (bb_upper - bb_lower) / bb_mid bb_pct = (close - bb_lower) / (bb_upper - bb_lower) df["bb_mid"] = bb_mid df["bb_upper"] = bb_upper df["bb_lower"] = bb_lower df["bb_width"] = bb_width df["bb_pct"] = bb_pct # ── ATR (14) & NATR ───────────────────────────────────────────────────── atr_period = 14 prev_close = close.shift(1) tr = pd.concat([ high - low, (high - prev_close).abs(), (low - prev_close).abs() ], axis=1).max(axis=1) atr = tr.ewm(span=atr_period, min_periods=atr_period, adjust=False).mean() natr = atr / close df["atr"] = atr df["natr"] = natr # ── Squeeze detection ──────────────────────────────────────────────────── # Squeeze = BB width is in the bottom quartile over a 50-bar lookback bb_width_min = bb_width.rolling(50).min() bb_width_max = bb_width.rolling(50).max() bb_width_norm = (bb_width - bb_width_min) / (bb_width_max - bb_width_min + 1e-12) df["bb_width_norm"] = bb_width_norm df["squeeze"] = np.where(bb_width_norm < 0.25, 1.0, 0.0) # Squeeze released: was in squeeze 1 bar ago, now width is expanding bb_width_chg = bb_width.diff() df["squeeze_release"] = np.where( (df["squeeze"].shift(1) == 1.0) & (bb_width_chg > 0), 1.0, 0.0 ) # ── BB width momentum ──────────────────────────────────────────────────── df["bb_width_chg"] = bb_width_chg df["bb_width_chg_2"] = bb_width.diff(2) df["bb_width_chg_5"] = bb_width.diff(5) # ── Price position relative to bands ───────────────────────────────────── df["close_vs_mid"] = close - bb_mid df["close_vs_upper"] = close - bb_upper df["close_vs_lower"] = close - bb_lower # ── Momentum & returns ─────────────────────────────────────────────────── df["ret_1"] = close.pct_change(1) df["ret_3"] = close.pct_change(3) df["ret_5"] = close.pct_change(5) df["ret_10"] = close.pct_change(10) df["ret_20"] = close.pct_change(20) # ── RSI (14) ───────────────────────────────────────────────────────────── rsi_period = 14 delta = close.diff() gain = delta.clip(lower=0) loss = (-delta).clip(lower=0) avg_gain = gain.ewm(span=rsi_period, min_periods=rsi_period, adjust=False).mean() avg_loss = loss.ewm(span=rsi_period, min_periods=rsi_period, adjust=False).mean() rs = avg_gain / (avg_loss + 1e-12) rsi = 100.0 - 100.0 / (1.0 + rs) df["rsi"] = rsi # RSI divergence proxy: price makes new low/high but RSI does not df["rsi_5_min"] = rsi.rolling(5).min() df["close_5_min"] = close.rolling(5).min() df["rsi_5_max"] = rsi.rolling(5).max() df["close_5_max"] = close.rolling(5).max() # ── MACD ───────────────────────────────────────────────────────────────── ema_fast = close.ewm(span=12, adjust=False).mean() ema_slow = close.ewm(span=26, adjust=False).mean() macd_line = ema_fast - ema_slow macd_signal = macd_line.ewm(span=9, adjust=False).mean() macd_hist = macd_line - macd_signal df["macd_line"] = macd_line df["macd_signal"] = macd_signal df["macd_hist"] = macd_hist df["macd_hist_chg"] = macd_hist.diff() # ── Volume-like proxy: bar range ───────────────────────────────────────── bar_range = high - low df["bar_range"] = bar_range df["bar_range_norm"] = bar_range / (atr + 1e-12) # ── Candle body direction & size ───────────────────────────────────────── body = close - open_ df["body"] = body df["body_norm"] = body / (atr + 1e-12) df["body_dir"] = np.where(body > 0, 1.0, np.where(body < 0, -1.0, 0.0)) # ── Upper / lower wick ─────────────────────────────────────────────────── df["upper_wick"] = high - pd.concat([close, open_], axis=1).max(axis=1) df["lower_wick"] = pd.concat([close, open_], axis=1).min(axis=1) - low # ── SMA trend context ───────────────────────────────────────────────────── sma_50 = close.rolling(50).mean() sma_200 = close.rolling(200).mean() df["sma_50"] = sma_50 df["sma_200"] = sma_200 df["close_vs_sma50"] = (close - sma_50) / (sma_50 + 1e-12) df["sma50_vs_sma200"] = (sma_50 - sma_200) / (sma_200 + 1e-12) # ── Volatility regime ──────────────────────────────────────────────────── natr_ma = natr.rolling(50).mean() df["natr_ratio"] = natr / (natr_ma + 1e-12) # >1 = above-avg vol # ── Mean-reversion distance ─────────────────────────────────────────────── df["z_score_20"] = (close - bb_mid) / (bb_sigma + 1e-12) # ── Rolling realized vol ───────────────────────────────────────────────── df["rvol_10"] = df["ret_1"].rolling(10).std() df["rvol_20"] = df["ret_1"].rolling(20).std() # ── ATR-normalised returns ──────────────────────────────────────────────── df["ret_1_natr"] = df["ret_1"] / (natr + 1e-12) # ── Lagged features ─────────────────────────────────────────────────────── for lag in [1, 2, 3, 5]: df[f"bb_pct_lag{lag}"] = bb_pct.shift(lag) df[f"bb_width_lag{lag}"] = bb_width.shift(lag) df[f"rsi_lag{lag}"] = rsi.shift(lag) df[f"macd_hist_lag{lag}"] = macd_hist.shift(lag) # ── Hour-of-day (cyclical) ──────────────────────────────────────────────── hour = pd.Series(df.index.hour, index=df.index).astype(float) df["hour_sin"] = np.sin(2 * np.pi * hour / 24.0) df["hour_cos"] = np.cos(2 * np.pi * hour / 24.0) # ── Day-of-week (cyclical) ──────────────────────────────────────────────── dow = pd.Series(df.index.dayofweek, index=df.index).astype(float) df["dow_sin"] = np.sin(2 * np.pi * dow / 5.0) df["dow_cos"] = np.cos(2 * np.pi * dow / 5.0) # ── Fill NaN from warm-up ──────────────────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "GBP/USD BB Squeeze Breakout (GradientBoosting)", "model_type": "GradientBoostingClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.75, "min_samples_leaf": 20, "min_samples_split": 40, "max_features": "sqrt", "n_iter_no_change": 30, "validation_fraction": 0.1, "tol": 1e-4, "random_state": 42, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.005, "take_profit": 0.010, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [6, 20], "min_atr": 0.0002, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximize risk-adjusted return (Sharpe / Calmar). " "GradientBoostingClassifier chosen for its strong performance on " "tabular financial data with noisy labels. Shallow trees (max_depth=4) " "with shrinkage (lr=0.04) and subsample=0.75 reduce overfitting. " "Early stopping (n_iter_no_change=30) prevents over-training. " "SL=0.5%, TP=1.0% gives a 1:2 risk/reward ratio. " "Session filter 06-20 UTC captures London + New York overlap for GBP/USD." ), "notes": ( "Core signal: BB squeeze (narrow band width) followed by expansion " "breakout, confirmed by MACD histogram direction and RSI. " "ATR filter ensures minimum volatility for entries. " "Lagged BB features capture the squeeze build-up dynamic. " "Z-score and normalized returns give the model mean-reversion context. " "Cyclical time features allow the model to learn intraday seasonality." ), }
3.97	NZD/USD RSI-MACD Gradient Boost Risk-Adjusted Maximize risk-adjusted return (Sharpe/Calmar) using a deep GradientBoostingClassifier with many slow-learning trees and aggressive regularis…	S @silver-bull-130	NZDUSD	15min	60.9%0.0%	+18.36%+0.00%	1.35—	3.80%3.80%	7320
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-06 01:35:33 # Model : Gradient Boosting # Feature Eng. : RSI 14, MACD (12,26,9) + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/NZDUSD_15min.parquet" START_DATE = "2025-04-24" END_DATE = "2026-04-24" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── RSI 14 ────────────────────────────────────────────────────────────── delta = close.diff() gain = delta.clip(lower=0) loss = (-delta).clip(lower=0) avg_gain = gain.ewm(alpha=1/14, min_periods=14, adjust=False).mean() avg_loss = loss.ewm(alpha=1/14, min_periods=14, adjust=False).mean() rs = avg_gain / avg_loss.replace(0, np.nan) df["rsi_14"] = 100 - (100 / (1 + rs)) # RSI derived signals df["rsi_ob"] = np.where(df["rsi_14"] > 70, 1, 0) # overbought flag df["rsi_os"] = np.where(df["rsi_14"] < 30, 1, 0) # oversold flag df["rsi_mid"] = df["rsi_14"] - 50 # centred df["rsi_slope"] = df["rsi_14"].diff(3) # momentum of RSI df["rsi_accel"] = df["rsi_slope"].diff(2) # acceleration # RSI regime: above/below 50 df["rsi_bull"] = np.where(df["rsi_14"] > 50, 1, -1) # ── MACD (12, 26, 9) ──────────────────────────────────────────────────── ema12 = close.ewm(span=12, adjust=False).mean() ema26 = close.ewm(span=26, adjust=False).mean() macd_line = ema12 - ema26 signal_line = macd_line.ewm(span=9, adjust=False).mean() macd_hist = macd_line - signal_line df["macd_line"] = macd_line df["macd_signal"] = signal_line df["macd_hist"] = macd_hist # MACD derived df["macd_cross"] = np.where(macd_line > signal_line, 1, -1) df["macd_hist_sign"] = np.where(macd_hist > 0, 1, -1) df["macd_hist_chg"] = macd_hist.diff(1) # histogram change df["macd_hist_accel"]= df["macd_hist_chg"].diff(1) # second derivative df["macd_zero_cross"]= np.where(macd_line > 0, 1, -1) # ── ATR 14 ────────────────────────────────────────────────────────────── tr = pd.concat([ high - low, (high - close.shift(1)).abs(), (low - close.shift(1)).abs() ], axis=1).max(axis=1) atr14 = tr.ewm(alpha=1/14, min_periods=14, adjust=False).mean() df["atr_14"] = atr14 df["natr_14"] = atr14 / close # normalised ATR df["atr_ratio"]= atr14 / atr14.rolling(50).mean() # current vs recent vol # ── Volatility regime ─────────────────────────────────────────────────── df["vol_high"] = np.where(df["natr_14"] > df["natr_14"].rolling(100).median(), 1, 0) # ── Price momentum ────────────────────────────────────────────────────── df["ret_1"] = close.pct_change(1) df["ret_3"] = close.pct_change(3) df["ret_8"] = close.pct_change(8) df["ret_16"] = close.pct_change(16) # Scaled by ATR so the model sees normalised moves df["ret_1_atr"] = df["ret_1"] / (atr14 / close).replace(0, np.nan) df["ret_3_atr"] = df["ret_3"] / (atr14 / close).replace(0, np.nan) df["ret_8_atr"] = df["ret_8"] / (atr14 / close).replace(0, np.nan) # ── EMAs & trend structure ─────────────────────────────────────────────── ema8 = close.ewm(span=8, adjust=False).mean() ema21 = close.ewm(span=21, adjust=False).mean() ema50 = close.ewm(span=50, adjust=False).mean() ema100= close.ewm(span=100,adjust=False).mean() df["ema8_21_spread"] = (ema8 - ema21) / close df["ema21_50_spread"]= (ema21 - ema50) / close df["ema50_100_spread"]= (ema50 - ema100) / close df["price_vs_ema21"] = (close - ema21) / close df["price_vs_ema50"] = (close - ema50) / close df["trend_align"] = np.where( (ema8 > ema21) & (ema21 > ema50), 1, np.where((ema8 < ema21) & (ema21 < ema50), -1, 0) ) # ── Bollinger Bands (20, 2) ────────────────────────────────────────────── bb_mid = close.rolling(20).mean() bb_std = close.rolling(20).std(ddof=0) bb_up = bb_mid + 2 * bb_std bb_lo = bb_mid - 2 * bb_std bb_bw = (bb_up - bb_lo) / bb_mid # bandwidth bb_pct = (close - bb_lo) / (bb_up - bb_lo) # %B df["bb_pct"] = bb_pct df["bb_bw"] = bb_bw df["bb_bw_ratio"] = bb_bw / bb_bw.rolling(50).mean() # squeeze detector df["bb_upper_touch"] = np.where(close >= bb_up, 1, 0) df["bb_lower_touch"] = np.where(close <= bb_lo, 1, 0) # ── Stochastic %K %D (14, 3) ──────────────────────────────────────────── lo14 = low.rolling(14).min() hi14 = high.rolling(14).max() stoch_k = 100 * (close - lo14) / (hi14 - lo14).replace(0, np.nan) stoch_d = stoch_k.rolling(3).mean() df["stoch_k"] = stoch_k df["stoch_d"] = stoch_d df["stoch_kd_diff"]= stoch_k - stoch_d df["stoch_ob"] = np.where(stoch_k > 80, 1, 0) df["stoch_os"] = np.where(stoch_k < 20, 1, 0) # ── Candle structure ──────────────────────────────────────────────────── body = (close - open_).abs() candle_rng= (high - low).replace(0, np.nan) df["body_ratio"] = body / candle_rng # body vs full range df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle_rng df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle_rng df["candle_dir"] = np.where(close > open_, 1, -1) df["candle_dir_3"] = df["candle_dir"].rolling(3).sum() # short-term bias # ── Volume-less momentum oscillator (Williams %R 14) ─────────────────── df["williams_r"] = -100 * (hi14 - close) / (hi14 - lo14).replace(0, np.nan) # ── RSI x MACD composite signal ───────────────────────────────────────── df["rsi_macd_bull"] = np.where( (df["rsi_14"] > 50) & (macd_hist > 0), 1, np.where((df["rsi_14"] < 50) & (macd_hist < 0), -1, 0) ) # ── Divergence proxy: price vs RSI direction (3-bar) ──────────────────── price_dir3 = np.sign(close.diff(3)) rsi_dir3 = np.sign(df["rsi_14"].diff(3)) df["rsi_div"] = np.where(price_dir3 != rsi_dir3, 1, 0) # ── Mean-reversion signal: distance from 50-bar mean normalised by ATR ── sma50 = close.rolling(50).mean() df["zscore_50"] = (close - sma50) / (close.rolling(50).std(ddof=0).replace(0, np.nan)) df["mean_rev_long"] = np.where(df["zscore_50"] < -1.5, 1, 0) df["mean_rev_short"] = np.where(df["zscore_50"] > 1.5, 1, 0) # ── Interaction features ───────────────────────────────────────────────── df["rsi_bb_pct"] = df["rsi_14"] * df["bb_pct"] df["macd_hist_rsi_mid"] = df["macd_hist"] * df["rsi_mid"] df["stoch_rsi"] = df["stoch_k"] * df["rsi_14"] / 1e4 # normalised product # ── Lag features (avoid lookahead) ────────────────────────────────────── for lag in [1, 2, 4, 8]: df[f"rsi_lag{lag}"] = df["rsi_14"].shift(lag) df[f"macd_hist_lag{lag}"] = df["macd_hist"].shift(lag) df[f"ret_lag{lag}"] = df["ret_1"].shift(lag) # ── Hour-of-day & day-of-week cyclic encoding ─────────────────────────── if hasattr(df.index, "hour"): hour = df.index.hour dow = df.index.dayofweek df["hour_sin"] = np.sin(2 * np.pi * hour / 24) df["hour_cos"] = np.cos(2 * np.pi * hour / 24) df["dow_sin"] = np.sin(2 * np.pi * dow / 5) df["dow_cos"] = np.cos(2 * np.pi * dow / 5) # ── Final fill ─────────────────────────────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "NZD/USD RSI-MACD Gradient Boost Risk-Adjusted", "model_type": "GradientBoostingClassifier", "model_params": { "n_estimators": 600, "max_depth": 4, "learning_rate": 0.03, "subsample": 0.75, "max_features": "sqrt", "min_samples_leaf": 20, "min_samples_split":40, "warm_start": False, }, "signal_threshold": 0.56, "direction": "both", "stop_loss": 0.005, "take_profit": 0.010, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [21, 21], "min_atr": 0.0002, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximize risk-adjusted return (Sharpe/Calmar) using a deep " "GradientBoostingClassifier with many slow-learning trees and " "aggressive regularisation (min_samples_leaf=20, subsample=0.75). " "Feature set deliberately differs from prior RSI+BB+Stoch attempts " "by adding: ATR-normalised returns, z-score mean-reversion signals, " "RSI divergence proxy, Williams %R, candle structure ratios, cyclic " "time encoding, and interaction/lag features to give the model richer " "multi-timeframe context. SL=0.5%/TP=1% gives 1:2 RR aligned with " "maximising Sharpe." ), "notes": ( "Prior PF=1.35 / ret=+18.36% used standard RSI+MACD+BB+Stoch without " "ATR normalisation or divergence detection. This version adds z-score " "mean-reversion context, candle structure, and temporal encoding to " "reduce false positives. session_filter=[21,21] is intentionally " "narrow — set to None if you want 24h coverage. min_atr=0.0002 " "avoids dead-market signals." ), }
3.95	USD/JPY BB Squeeze Breakout (GBM) Maximize risk-adjusted return (Sharpe). GradientBoostingClassifier chosen for strong performance on tabular financial data with moderate fea…	V @vol_drifter	USDJPY	15min	60.7%74.1%	+1.15%+7.28%	1.062.29	3.13%3.13%	20127
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-06 02:57:20 # Model : Gradient Boosting # Feature Eng. : BB (20,2.0), ATR 14 + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # Bollinger Bands Squeeze Breakout Strategy — USD/JPY 15-min # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/USDJPY_15min.parquet" START_DATE = "2025-04-24" END_DATE = "2026-04-24" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── Bollinger Bands (20, 2) ────────────────────────────────────────────── bb_period = 20 bb_std = 2.0 bb_mid = close.rolling(bb_period).mean() bb_sigma = close.rolling(bb_period).std(ddof=0) bb_upper = bb_mid + bb_std * bb_sigma bb_lower = bb_mid - bb_std * bb_sigma df["bb_mid"] = bb_mid df["bb_upper"] = bb_upper df["bb_lower"] = bb_lower # Band width and %B — required features df["bb_width"] = (bb_upper - bb_lower) / bb_mid df["bb_pct"] = (close - bb_lower) / (bb_upper - bb_lower) # ── ATR 14 & NATR ──────────────────────────────────────────────────────── atr_period = 14 prev_close = close.shift(1) tr = pd.concat([ high - low, (high - prev_close).abs(), (low - prev_close).abs() ], axis=1).max(axis=1) atr = tr.ewm(span=atr_period, min_periods=atr_period, adjust=False).mean() natr = atr / close df["atr"] = atr df["natr"] = natr # ── Squeeze detection ──────────────────────────────────────────────────── # Keltner Channel (EMA20 ± 1.5 × ATR) for squeeze comparison kc_mid = close.ewm(span=20, adjust=False).mean() kc_upper = kc_mid + 1.5 * atr kc_lower = kc_mid - 1.5 * atr df["squeeze"] = np.where( (bb_upper < kc_upper) & (bb_lower > kc_lower), 1.0, 0.0 ) # Rolling squeeze count (bars in squeeze over last 10 bars) df["squeeze_count"] = ( df["squeeze"].rolling(10).sum() ) # Band-width z-score (how compressed is the width vs recent history) bw_mean = df["bb_width"].rolling(50).mean() bw_std = df["bb_width"].rolling(50).std(ddof=0) df["bb_width_zscore"] = (df["bb_width"] - bw_mean) / (bw_std + 1e-10) # ── Breakout momentum ──────────────────────────────────────────────────── # Price distance from bands, normalised by ATR df["dist_upper"] = (close - bb_upper) / (atr + 1e-10) df["dist_lower"] = (close - bb_lower) / (atr + 1e-10) df["dist_mid"] = (close - bb_mid) / (atr + 1e-10) # ── Rate of change ──────────────────────────────────────────────────────── for n in [1, 4, 8, 16]: df[f"roc_{n}"] = close.pct_change(n) # ── RSI 14 ─────────────────────────────────────────────────────────────── delta = close.diff() gain = delta.clip(lower=0) loss = (-delta).clip(lower=0) avg_g = gain.ewm(span=14, min_periods=14, adjust=False).mean() avg_l = loss.ewm(span=14, min_periods=14, adjust=False).mean() rs = avg_g / (avg_l + 1e-10) df["rsi_14"] = 100.0 - 100.0 / (1.0 + rs) # RSI normalised to [-1, 1] df["rsi_norm"] = (df["rsi_14"] - 50.0) / 50.0 # ── Momentum / trend context ────────────────────────────────────────────── ema_9 = close.ewm(span=9, adjust=False).mean() ema_21 = close.ewm(span=21, adjust=False).mean() ema_50 = close.ewm(span=50, adjust=False).mean() df["ema_9_21_diff"] = (ema_9 - ema_21) / (atr + 1e-10) df["ema_21_50_diff"] = (ema_21 - ema_50) / (atr + 1e-10) # Price position relative to EMAs df["close_vs_ema9"] = (close - ema_9) / (atr + 1e-10) df["close_vs_ema50"] = (close - ema_50) / (atr + 1e-10) # ── Volume-proxy: ATR velocity ──────────────────────────────────────────── df["atr_roc"] = atr.pct_change(4) # ── MACD-style oscillator ───────────────────────────────────────────────── macd_line = close.ewm(span=12, adjust=False).mean() - close.ewm(span=26, adjust=False).mean() macd_signal = macd_line.ewm(span=9, adjust=False).mean() df["macd_hist"] = (macd_line - macd_signal) / (atr + 1e-10) # ── Stochastic %K (14) ──────────────────────────────────────────────────── low_14 = low.rolling(14).min() high_14 = high.rolling(14).max() df["stoch_k"] = (close - low_14) / (high_14 - low_14 + 1e-10) # ── Candle body / wick features ─────────────────────────────────────────── body = (close - open_).abs() candle_range = (high - low).replace(0, np.nan) df["body_ratio"] = body / candle_range df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / (candle_range + 1e-10) df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / (candle_range + 1e-10) df["bull_candle"] = np.where(close > open_, 1.0, 0.0) # ── Lagged bb_pct and bb_width ──────────────────────────────────────────── for lag in [1, 2, 4]: df[f"bb_pct_lag{lag}"] = df["bb_pct"].shift(lag) df[f"bb_width_lag{lag}"] = df["bb_width"].shift(lag) # ── Band width momentum (is it expanding?) ──────────────────────────────── df["bb_width_chg1"] = df["bb_width"].diff(1) df["bb_width_chg4"] = df["bb_width"].diff(4) # ── Hour / session features ─────────────────────────────────────────────── df["hour_sin"] = np.sin(2 * np.pi * df.index.hour / 24) df["hour_cos"] = np.cos(2 * np.pi * df.index.hour / 24) df["dow_sin"] = np.sin(2 * np.pi * df.index.dayofweek / 5) df["dow_cos"] = np.cos(2 * np.pi * df.index.dayofweek / 5) # ── Fill NaN from warm-up ───────────────────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "USD/JPY BB Squeeze Breakout (GBM)", "model_type": "GradientBoostingClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.75, "min_samples_leaf": 20, "max_features": "sqrt", "validation_fraction": 0.1, "n_iter_no_change": 30, "tol": 1e-4, "random_state": 42, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.005, "take_profit": 0.010, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [6, 20], "min_atr": 0.0003, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximize risk-adjusted return (Sharpe). " "GradientBoostingClassifier chosen for strong performance on tabular " "financial data with moderate feature counts. Deeper ensemble (400 " "estimators, depth 4) with early stopping captures non-linear BB " "squeeze patterns. Subsample=0.75 and sqrt features reduce overfitting. " "SL 0.5% / TP 1.0% gives 1:2 R:R ratio. Session filter 06-20 UTC covers " "London + NY sessions where USD/JPY liquidity is highest." ), "notes": ( "Core signal: BB squeeze (narrow band width inside Keltner Channel) " "followed by band expansion. Features include band width z-score, " "breakout direction (dist_upper/lower), RSI, MACD histogram, " "stochastic %K, EMA spreads, candle structure, and lagged BB features. " "NATR used as min_atr filter to avoid low-volatility noise trades. " "Horizon=4 bars (1 hour on 15-min data) aligns with typical " "post-squeeze expansion duration." ), }
2.10	USD/CAD BB + ATR Gradient Boosting Mean-Rev Maximize risk-adjusted return (Sharpe/Calmar) on USD/CAD 15-min data. GradientBoostingClassifier chosen for strong generalisation on noisy F…	S @silver-bull-130	USDCAD	15min	62.6%56.8%	+2.56%+4.87%	1.151.33	1.75%1.75%	35644
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-06 01:50:17 # Model : Gradient Boosting # Feature Eng. : BB (20,2.0), ATR 14 + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/USDCAD_15min.parquet" START_DATE = "2025-04-24" END_DATE = "2026-04-24" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── Bollinger Bands (20, 2) ────────────────────────────────────────────── bb_period = 20 bb_std = 2.0 bb_mid = close.rolling(bb_period).mean() bb_std_s = close.rolling(bb_period).std(ddof=0) bb_upper = bb_mid + bb_std * bb_std_s bb_lower = bb_mid - bb_std * bb_std_s df["bb_mid"] = bb_mid df["bb_upper"] = bb_upper df["bb_lower"] = bb_lower df["bb_width"] = (bb_upper - bb_lower) / bb_mid df["bb_pct"] = (close - bb_lower) / (bb_upper - bb_lower) # ── ATR (14) & Normalised ATR ──────────────────────────────────────────── atr_period = 14 prev_close = close.shift(1) tr = pd.concat([ high - low, (high - prev_close).abs(), (low - prev_close).abs() ], axis=1).max(axis=1) atr = tr.ewm(span=atr_period, adjust=False).mean() natr = atr / close df["atr"] = atr df["natr"] = natr # ── RSI (14) ───────────────────────────────────────────────────────────── rsi_period = 14 delta = close.diff() gain = delta.clip(lower=0) loss = (-delta).clip(lower=0) avg_gain = gain.ewm(span=rsi_period, adjust=False).mean() avg_loss = loss.ewm(span=rsi_period, adjust=False).mean() rs = avg_gain / avg_loss.replace(0, np.nan) df["rsi"] = 100 - (100 / (1 + rs)) # ── MACD (12, 26, 9) ───────────────────────────────────────────────────── ema12 = close.ewm(span=12, adjust=False).mean() ema26 = close.ewm(span=26, adjust=False).mean() macd_line = ema12 - ema26 macd_signal = macd_line.ewm(span=9, adjust=False).mean() df["macd"] = macd_line df["macd_sig"] = macd_signal df["macd_hist"]= macd_line - macd_signal # ── SMA filters (50, 200) ──────────────────────────────────────────────── df["sma_20"] = close.rolling(20).mean() df["sma_50"] = close.rolling(50).mean() df["sma_200"] = close.rolling(200).mean() # Price relative to moving averages df["close_vs_sma20"] = (close - df["sma_20"]) / df["sma_20"] df["close_vs_sma50"] = (close - df["sma_50"]) / df["sma_50"] df["close_vs_sma200"] = (close - df["sma_200"]) / df["sma_200"] # ── Price momentum / returns ───────────────────────────────────────────── df["ret_1"] = close.pct_change(1) df["ret_4"] = close.pct_change(4) df["ret_8"] = close.pct_change(8) df["ret_16"] = close.pct_change(16) df["ret_32"] = close.pct_change(32) # ── Candle body & wick features ────────────────────────────────────────── body = (close - open_).abs() candle_range = (high - low).replace(0, np.nan) df["body_ratio"] = body / candle_range df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle_range df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle_range df["close_dir"] = np.sign(close - open_) # ── Volatility regime ──────────────────────────────────────────────────── df["vol_ratio"] = natr / natr.rolling(50).mean() # ATR vs its own average df["bb_squeeze"] = np.where(df["bb_width"] < df["bb_width"].rolling(50).quantile(0.25), 1.0, 0.0) # ── Stochastic %K / %D (14, 3) ─────────────────────────────────────────── low14 = low.rolling(14).min() high14 = high.rolling(14).max() stoch_k = 100 * (close - low14) / (high14 - low14).replace(0, np.nan) stoch_d = stoch_k.rolling(3).mean() df["stoch_k"] = stoch_k df["stoch_d"] = stoch_d # ── Rate-of-change ─────────────────────────────────────────────────────── df["roc_10"] = (close - close.shift(10)) / close.shift(10) # ── Rolling z-score of close (20-bar) ──────────────────────────────────── roll_mean = close.rolling(20).mean() roll_std = close.rolling(20).std(ddof=0).replace(0, np.nan) df["zscore_20"] = (close - roll_mean) / roll_std # ── Volume-related (if volume column exists) ───────────────────────────── if "volume" in df.columns and df["volume"].sum() > 0: vol_ma = df["volume"].rolling(20).mean().replace(0, np.nan) df["vol_ratio_20"] = df["volume"] / vol_ma # ── Fill NaNs from warm-up ─────────────────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "USD/CAD BB + ATR Gradient Boosting Mean-Rev", "model_type": "GradientBoostingClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.8, "min_samples_leaf": 20, "max_features": "sqrt", "validation_fraction": 0.1, "n_iter_no_change": 30, "tol": 1e-4, "random_state": 42, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.005, "take_profit": 0.010, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [7, 20], "min_atr": 0.0002, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximize risk-adjusted return (Sharpe/Calmar) on USD/CAD 15-min data. " "GradientBoostingClassifier chosen for strong generalisation on noisy FX " "price data; moderate depth (4) and learning rate (0.04) with early stopping " "prevent overfitting. Features: Bollinger Bands (mean-reversion signal via " "bb_pct and bb_width), ATR/NATR (volatility filter), RSI, MACD, Stochastic, " "z-score, momentum returns, and candle-body ratios. 2:1 R:R (SL 0.5%, TP 1.0%) " "with session filter (07-20 UTC) to avoid illiquid overnight hours." ), "notes": ( "session_filter [7,20] captures London + NY overlap on USD/CAD. " "min_atr 0.0002 avoids flat/choppy markets. on_opposite=reverse ensures " "the model flips direction quickly when sentiment changes. " "target_horizon=4 bars (1 hour) aligns with typical intraday FX moves." ), }
1.93	EMA(9/21) trend	M @malcolmtan	EMA(9/		50.0%—	+0.90%—	2.15—	0.39%0.39%	10—
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-25 02:27:36 # Model : XGBoost # Feature Eng. : go long when EMA(9) crosses above EMA(21), exit when it crosses back below + Auto-add features: ON # Signal / Entry : — # Optimization : — # Risk Mgmt : — # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd # ── Inlined strategy_utils ── """ strategy_utils.py — Standard utility functions for generated strategies. Claude imports these instead of writing boilerplate from scratch. This ensures consistent behavior across all generated strategies. """ import numpy as np import pandas as pd from sklearn.preprocessing import LabelEncoder # Max backtest window per timeframe. A finer timeframe over a longer window # blows up the results dict / parquet load / Modal train time (the 2026-05-12 # OOM was a 1-min × multi-year sweep) — and a 1-min strategy gains nothing from # 2 years of 1-min bars. Enforced HERE because every training path (UI / API / # Modal) funnels through run_strategy → load_ohlc. Env-overridable so a future # "max plan" / dedicated-server tier can lift it. _TF_MAX_DAYS = { "1min": 30, "5min": 90, "15min": 365, "1h": 730, } def _fetch_ohlc_from_internal(symbol: str, tf: str, start: str, end: str): """Phase 3.2: fetch parquet bytes from Server A's /internal/ohlc endpoint instead of reading a local file. Used inside Modal containers / Mac worker pool (Phase 3.4) so every train sees the same source of truth as the chart. Returns: pd.DataFrame (parquet decoded), or raises on any failure so the caller can fall back / surface a clear error in the job. """ import hashlib as _hashlib, hmac as _hmac, io as _io, os as _os import urllib.request as _ur, urllib.parse as _urp base = (_os.environ.get("QM_INTERNAL_OHLC_BASE") or "").rstrip("/") secret = (_os.environ.get("INTERNAL_WS_SECRET") or "").strip() if not base: raise RuntimeError("QM_INTERNAL_OHLC_BASE not set") if not secret: raise RuntimeError("INTERNAL_WS_SECRET not set") msg = f"{symbol}\|{tf}\|{start}\|{end}".encode("utf-8") sig = _hmac.new(secret.encode("utf-8"), msg, _hashlib.sha256).hexdigest() qs = _urp.urlencode({ "symbol": symbol, "tf": tf, "start": start, "end": end, "sig": sig, }) url = f"{base}/internal/ohlc?{qs}" req = _ur.Request(url, headers={"User-Agent": "qm-worker/1.0"}) with _ur.urlopen(req, timeout=30) as resp: if resp.status != 200: raise RuntimeError(f"/internal/ohlc returned {resp.status}") payload = resp.read() print(f"[load_ohlc:internal] {symbol} {tf} fetched {len(payload)} bytes", flush=True) return pd.read_parquet(_io.BytesIO(payload)) def _parse_symbol_tf_from_path(data_path: str): """Pull SYMBOL + TF out of a path like .../EURUSD_1min.parquet.""" import os as _os, re as _re base = _os.path.basename(str(data_path)) m = _re.match(r"^([A-Z]{6})_(\d+min\|\d+h)\.parquet$", base) if not m: return None, None return m.group(1), m.group(2) def load_ohlc(data_path, start_date="", end_date=""): """Load OHLC parquet, sort index, filter dates. Always returns consistent format. The lower bound is clamped per timeframe (see _TF_MAX_DAYS) — a request for more history than the cap silently starts later. Phase 3.2: when env QM_USE_INTERNAL_OHLC=="1", fetch over HTTP from Server A's /internal/ohlc endpoint instead of pd.read_parquet on a local file (which on Modal is a stale Volume snapshot). The endpoint applies the same day-cap, so the local cap-check below is a defensive no-op in that path. Flag defaults to "0" → unchanged behavior. Returns: (df, close, open_, high, low) """ import os as _os, re as _re _use_internal = _os.environ.get("QM_USE_INTERNAL_OHLC", "0") == "1" if _use_internal: _sym, _tf = _parse_symbol_tf_from_path(data_path) if not _sym or not _tf: raise RuntimeError( f"QM_USE_INTERNAL_OHLC=1 but DATA_PATH basename does not match " f"SYMBOL_TF.parquet: {data_path}" ) df = _fetch_ohlc_from_internal(_sym, _tf, start_date or "", end_date or "") else: df = pd.read_parquet(data_path) df.index = pd.to_datetime(df.index) df = df.sort_index() # Per-timeframe window cap (timeframe inferred from the parquet filename). _m = _re.search(r"_(\d+min\|\d+h)\.parquet$", _os.path.basename(str(data_path))) _tf = _m.group(1) if _m else None _max_days = _TF_MAX_DAYS.get(_tf) if _max_days and _max_days > 0 and len(df): _env_override = _os.environ.get(f"QM_MAX_DAYS_{_tf.upper()}") if _env_override and _env_override.isdigit(): _max_days = int(_env_override) try: _eff_end = pd.Timestamp(end_date) if end_date else df.index.max() _eff_end = min(_eff_end, df.index.max()) _floor = _eff_end - pd.Timedelta(days=_max_days) _req_start = pd.Timestamp(start_date) if start_date else df.index.min() if _req_start < _floor: print(f"[load_ohlc] {_tf} backtest window capped to {_max_days}d: " f"start {_req_start.date()} -> {_floor.date()}", flush=True) start_date = _floor except Exception as _e: print(f"[load_ohlc] window-cap check skipped ({_e})", flush=True) if start_date: df = df[df.index >= start_date] if end_date: df = df[df.index <= end_date] return df, df["close"], df["open"], df["high"], df["low"] def make_target(close, horizon=4): """Create target: direction N bars ahead. Default 4 bars = 1 hour on 15-min data. Returns: target (pd.Series of -1, 0, 1) """ return np.sign(close.shift(-horizon) - close) def split_data(df, target, feature_cols, train_split=0.7, validation_date=""): """Train/test split. Handles both ratio and date-based splits. Drops NaN from target before splitting. Encodes labels to [0,1,2]. Returns: dict with keys: X_train, X_test, y_train, y_test, y_train_enc, y_test_enc, enc, close_train, close_test, split_idx, split_dt, n_train, n_test """ # Drop NaN from target mask = target.notna() df = df[mask].copy() target = target[mask] close = df["close"] # Build feature matrix X = df[feature_cols].copy() X = X.bfill().ffill() X = X.replace([np.inf, -np.inf], np.nan).fillna(0.0) # Split if validation_date: split_idx = len(df[df.index <= validation_date]) else: split_idx = int(len(df) * train_split) split_idx = max(1, min(split_idx, len(df) - 1)) X_train = X.iloc[:split_idx] X_test = X.iloc[split_idx:] y_train = target.iloc[:split_idx] y_test = target.iloc[split_idx:] close_train = close.iloc[:split_idx] close_test = close.iloc[split_idx:] split_dt = str(df.index[split_idx]) # Label encoding — always fit on [-1, 0, 1] enc = LabelEncoder() enc.fit([-1, 0, 1]) y_train_enc = enc.transform(y_train) y_test_enc = enc.transform(y_test) return { "df": df, "X_train": X_train, "X_test": X_test, "y_train": y_train, "y_test": y_test, "y_train_enc": y_train_enc, "y_test_enc": y_test_enc, "enc": enc, "close": close, "close_train": close_train, "close_test": close_test, "split_idx": split_idx, "split_dt": split_dt, "n_train": len(X_train), "n_test": len(X_test), } def compute_overlays(close, df_index): """Compute BB and MA overlays on full dataset. Always consistent. Returns: (bb_dict, ma_dict) """ bb_mid = close.rolling(20).mean() bb_std = close.rolling(20).std() bb_upper = bb_mid + 2 * bb_std bb_lower = bb_mid - 2 * bb_std ma50 = close.rolling(50).mean() ma100 = close.rolling(100).mean() ma200 = close.rolling(200).mean() def _safe(s): s = s.reindex(df_index).bfill().ffill() return [float(x) if (x is not None and not np.isnan(x) and not np.isinf(x)) else None for x in s.values] bb = {"upper": _safe(bb_upper), "mid": _safe(bb_mid), "lower": _safe(bb_lower)} ma = {"ma50": _safe(ma50), "ma100": _safe(ma100), "ma200": _safe(ma200)} return bb, ma def run_backtest(signal, close, capital=10000, cost=2e-5): """Run backtest with transaction costs. Uses price-based trade returns (same as webapp _compute_trades). Signal 0 = hold (keep current position), not close. Returns: dict with equity, trade_returns, long_returns, short_returns, bar_returns """ sig_arr = signal.values price_arr = close.values idx = signal.index n = len(price_arr) # Trade returns — price-based (matches webapp _compute_trades exactly) trade_returns = [] long_returns = [] short_returns = [] trade_log = [] last_dir = None entry_price = None entry_bar = None for i in range(n): s = sig_arr[i] c = price_arr[i] if s != 0.0 and s != last_dir: # Direction change — close previous trade, open new if last_dir is not None and entry_price is not None and entry_price != 0: ret = float(last_dir * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if last_dir == 1: long_returns.append(ret) else: short_returns.append(ret) trade_log.append({ "type": "Buy" if last_dir == 1 else "Sell", "entry_time": str(idx[entry_bar]), "exit_time": str(idx[i]), "entry_price": round(entry_price, 5), "exit_price": round(c, 5), "pnl": round(last_dir * (c - entry_price), 5), "pnl_pct": round(ret * 100, 3), "exit_reason": "signal", }) entry_price = c entry_bar = i last_dir = s # Close last open trade if last_dir is not None and entry_price is not None and n > 0 and entry_price != 0: c = price_arr[-1] ret = float(last_dir * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if last_dir == 1: long_returns.append(ret) else: short_returns.append(ret) trade_log.append({ "type": "Buy" if last_dir == 1 else "Sell", "entry_time": str(idx[entry_bar]), "exit_time": str(idx[-1]), "entry_price": round(entry_price, 5), "exit_price": round(c, 5), "pnl": round(last_dir * (c - entry_price), 5), "pnl_pct": round(ret * 100, 3), "exit_reason": "end", }) # Equity curve from trade returns cumret = 1.0 equity_vals = np.full(n, float(capital)) trade_idx = 0 in_trade = False t_entry_price = None t_dir = None for i in range(n): s = sig_arr[i] c = price_arr[i] if s != 0.0 and s != t_dir: if t_dir is not None and t_entry_price is not None and t_entry_price != 0: t_ret = t_dir * (c - t_entry_price) / t_entry_price - cost cumret = (1 + t_ret) t_entry_price = c t_dir = s equity_vals[i] = capital cumret # Bar returns for Sharpe bar_returns = np.zeros(n) for i in range(1, n): if price_arr[i - 1] != 0 and last_dir is not None: bar_returns[i] = sig_arr[i - 1] * (price_arr[i] - price_arr[i - 1]) / price_arr[i - 1] if sig_arr[i - 1] != 0 else 0.0 return { "equity": pd.Series(equity_vals, index=close.index), "trade_returns": trade_returns, "long_returns": long_returns, "short_returns": short_returns, "bar_returns": bar_returns, "trade_log": trade_log, } def compute_trade_stats(trades, capital=10000): """Single source of truth for trade statistics. Every display path reads from this — no recomputation anywhere. All values are rounded and JSON-safe (no inf/nan). """ if not trades: return {"n": 0, "wins": 0, "losses": 0, "wr": 0, "avg": 0, "best": 0, "worst": 0, "ret": 0, "np": 0, "mdd": 0, "pf": 0, "rr": 0, "expect": 0} w = [r for r in trades if r > 0] l = [r for r in trades if r < 0] cumret = 1.0 for r in trades: cumret = (1 + r) net_p = capital (cumret - 1) # Max drawdown eq = np.cumprod([1.0] + [1 + r for r in trades]) peak = np.maximum.accumulate(eq) mdd = float(((eq - peak) / peak).min()) if len(eq) > 1 else 0.0 # Profit Factor gross_w = sum(w) if w else 0 gross_l = abs(sum(l)) if l else 0 pf = gross_w / gross_l if gross_l > 0 else (9999.0 if gross_w > 0 else 0) # Risk:Reward avg_w = float(np.mean(w)) if w else 0 avg_l = abs(float(np.mean(l))) if l else 0 rr = avg_w / avg_l if avg_l > 0 else (9999.0 if avg_w > 0 else 0) # Expectancy expect = net_p / len(trades) return { "n": len(trades), "wins": len(w), "losses": len(l), "wr": round(len(w) / len(trades), 4), "avg": round(float(np.mean(trades)), 6), "best": round(max(w), 6) if w else 0, "worst": round(min(l), 6) if l else 0, "ret": round(cumret - 1, 6), "np": round(net_p, 2), "mdd": round(mdd, 6), "pf": round(pf, 2), "rr": round(rr, 2), "expect": round(expect, 2), } def compute_metrics(bt_result, close_test, capital=10000): """Compute all standard metrics from backtest result. Uses trade-level compounding (same as webapp _trade_stats) for accuracy. Returns: dict with total_ret, bh_ret, sharpe_strat, sharpe_bh, mdd, n_trades """ equity = bt_result["equity"] trade_returns = bt_result["trade_returns"] # Total return — trade-level compounding (matches webapp) if trade_returns: cumret = 1.0 for r in trade_returns: cumret = (1 + r) total_ret = cumret - 1 else: total_ret = 0.0 # Buy and hold bh_equity = capital (close_test / close_test.iloc[0]) bh_ret = (bh_equity.iloc[-1] - capital) / capital if capital != 0 else 0.0 # Sharpe ratio — trade-level (matches webapp: sqrt(25226) annualization) if len(trade_returns) >= 2 and float(np.std(trade_returns)) > 0: sharpe_strat = float(np.mean(trade_returns) / np.std(trade_returns) np.sqrt(252 * 26)) else: sharpe_strat = 0.0 bh_rets = bh_equity.pct_change().dropna() if len(bh_rets) > 1 and bh_rets.std() != 0: sharpe_bh = float((bh_rets.mean() / bh_rets.std()) * np.sqrt(252 * 24 * 4)) else: sharpe_bh = 0.0 # Max drawdown — trade-level (matches webapp) if trade_returns: eq = np.cumprod([1.0] + [1 + r for r in trade_returns]) peak = np.maximum.accumulate(eq) mdd = float(((eq - peak) / peak).min()) if len(eq) > 1 else 0.0 else: mdd = 0.0 return { "total_ret": float(total_ret), "bh_ret": float(bh_ret), "sharpe_strat": float(sharpe_strat) if not np.isnan(sharpe_strat) else 0.0, "sharpe_bh": float(sharpe_bh) if not np.isnan(sharpe_bh) else 0.0, "mdd": float(mdd), "n_trades": len(trade_returns), } # Diagnostics line/histogram series (equity / drawdown / rolling_acc / conf_hist) # only feed the small Diagnostics charts — they're never used by the price chart # or scroll-back. On a 1-min model trained over the (2.2-capped) window these are # still ~30k points each; downsample to a visually-identical resolution before the # dict leaves the trainer so it doesn't carry that into Server-A RAM / Postgres. _RESULTS_SERIES_MAX = 5000 def _downsample_idx(n, cap=_RESULTS_SERIES_MAX): """Evenly-spaced index list spanning [0, n-1] (first+last always kept), or None when no downsampling is needed (n <= cap).""" if n <= cap: return None return np.unique(np.linspace(0, n - 1, cap).astype(int)).tolist() def _take(arr, idx): """Subset a list by an index list (idx may be None → return arr unchanged).""" if idx is None or not isinstance(arr, list): return arr return [arr[i] for i in idx] # trade_log / train_trade_log are lists of per-trade dicts (display-only — the # Trade Log tab). They scale with TRADE count, not bar count, so the bar-window # cap (Phase 2.2) doesn't bound them — a degenerate near-every-bar model can put # 10k+ trade dicts in the blob (>3 MB). Cap each (independently — a small-N model # keeps every trade) to the most-recent N, recording `_total` + `_truncated` # so the true count is still reported. Real strategies have far fewer than # _TRADE_LOG_MAX trades, so this only ever bites pathological models. _TRADE_LOG_MAX = 5000 def _cap_trade_log(tl): """Return (capped_list, original_len, was_truncated).""" if not isinstance(tl, list) or len(tl) <= _TRADE_LOG_MAX: return tl, (len(tl) if isinstance(tl, list) else 0), False return tl[-_TRADE_LOG_MAX:], len(tl), True def build_return_dict(split_result, bt_result, metrics, model, feature_cols, signal_full, p_pos_test, p_neg_test, custom_figs=None, bt_train_result=None, pre_stats=None): """Assemble the complete return dict. Handles ALL serialization. Never returns Timestamps, numpy arrays, or non-JSON types. Returns: JSON-safe dict with all required keys """ df = split_result["df"] close = split_result["close"] close_test = split_result["close_test"] X_test = split_result["X_test"] y_test = split_result["y_test"] equity = bt_result["equity"] bar_returns = bt_result["bar_returns"] # OHLC ohlc_dates = [str(x) for x in df.index.tolist()] def _safe_list(arr): return [float(x) if (x is not None and not np.isnan(x) and not np.isinf(x)) else None for x in arr] # Overlays bb, ma = compute_overlays(close, df.index) # Buy and hold equity capital = equity.iloc[0] if len(equity) > 0 else 10000 bh_equity = capital * (close_test / close_test.iloc[0]) # Confusion matrix from sklearn.metrics import confusion_matrix pred_test = model.predict(X_test) y_test_arr = np.asarray(y_test) cm = confusion_matrix(y_test_arr, pred_test, labels=[-1, 0, 1]) # Rolling accuracy sig_arr = signal_full.reindex(close_test.index).values correct = pd.Series((pred_test == y_test_arr).astype(float), index=X_test.index) active_test = pd.Series(sig_arr != 0, index=close_test.index) if len(sig_arr) == len(close_test) else pd.Series(True, index=close_test.index) correct_active = correct.where(active_test, other=np.nan) rolling_acc = correct_active.rolling(30, min_periods=1).mean() # Feature importance importances = model.feature_importances_ fi_pairs = sorted(zip(feature_cols, importances), key=lambda x: x[1])[-15:] # Drawdown rolling_max = equity.cummax() drawdown = (equity - rolling_max) / rolling_max.replace(0, np.nan) drawdown = drawdown.fillna(0.0) # ── Downsample the Diagnostics-only series (see _downsample_idx) ────────── _eq_dates = [str(x) for x in close_test.index.tolist()] _eq_strat = _safe_list(equity.values) _eq_bh = _safe_list(bh_equity.values) _eq_idx = _downsample_idx(len(_eq_dates)) _eq_dates, _eq_strat, _eq_bh = _take(_eq_dates, _eq_idx), _take(_eq_strat, _eq_idx), _take(_eq_bh, _eq_idx) _ra_dates = [str(x) for x in rolling_acc.index.tolist()] _ra_vals = [float(x) if (not np.isnan(x) and not np.isinf(x)) else None for x in rolling_acc.values] _ra_idx = _downsample_idx(len(_ra_dates)) _ra_dates, _ra_vals = _take(_ra_dates, _ra_idx), _take(_ra_vals, _ra_idx) _dd_dates = [str(x) for x in drawdown.index.tolist()] _dd_vals = _safe_list(drawdown.values) _dd_idx = _downsample_idx(len(_dd_dates)) _dd_dates, _dd_vals = _take(_dd_dates, _dd_idx), _take(_dd_vals, _dd_idx) _cp_pos = [float(x) for x in (p_pos_test.tolist() if hasattr(p_pos_test, 'tolist') else list(p_pos_test))] _cp_neg = [float(x) for x in (p_neg_test.tolist() if hasattr(p_neg_test, 'tolist') else list(p_neg_test))] _cp_pos = _take(_cp_pos, _downsample_idx(len(_cp_pos))) _cp_neg = _take(_cp_neg, _downsample_idx(len(_cp_neg))) # ── Trade logs — display-only (Trade Log tab); cap to most-recent N with a # `_total` field so the true count is still reported (see _cap_trade_log). # NB: ret_dist arrays are left FULL — a downstream path in callbacks.py # recomputes n_trades/win-rate from len(ret_dist), so a sample would skew # the displayed counts; they're small anyway and gzip handles them. _tl_test, _tl_test_n, _tl_test_tr = _cap_trade_log(bt_result.get("trade_log", [])) _tl_tr, _tl_tr_n, _tl_tr_tr = _cap_trade_log(bt_train_result.get("trade_log", []) if bt_train_result else []) return { "ohlc": { "dates": ohlc_dates, "open": _safe_list(df["open"].values), "high": _safe_list(df["high"].values), "low": _safe_list(df["low"].values), "close": _safe_list(df["close"].values), }, "signals": { "dates": [str(x) for x in signal_full.index.tolist()], "values": [float(x) for x in signal_full.values], }, "bb": bb, "ma": ma, "equity": { "dates": _eq_dates, "strategy": _eq_strat, "bh": _eq_bh, }, "feature_importance": { "names": [p[0] for p in fi_pairs], "values": [float(p[1]) for p in fi_pairs], }, "conf_matrix": cm.tolist(), "conf_hist": { "p_pos": _cp_pos, "p_neg": _cp_neg, }, "rolling_acc": { "dates": _ra_dates, "values": _ra_vals, }, "drawdown": { "dates": _dd_dates, "values": _dd_vals, }, "ret_dist": [float(x) for x in bt_result["trade_returns"]], "ret_dist_long": [float(x) for x in bt_result["long_returns"]], "ret_dist_short": [float(x) for x in bt_result["short_returns"]], "train_ret_dist": [float(x) for x in bt_train_result["trade_returns"]] if bt_train_result else [], "train_ret_dist_long": [float(x) for x in bt_train_result["long_returns"]] if bt_train_result else [], "train_ret_dist_short": [float(x) for x in bt_train_result["short_returns"]] if bt_train_result else [], "trade_log": _tl_test, "train_trade_log": _tl_tr, "trade_log_total": _tl_test_n, "train_trade_log_total": _tl_tr_n, "trade_log_truncated": _tl_test_tr, "train_trade_log_truncated": _tl_tr_tr, (pre_stats or {}), "metrics": metrics, "split_dt": split_result["split_dt"], "split_idx": int(split_result["split_idx"]), "n_train": int(split_result["n_train"]), "n_test": int(split_result["n_test"]), "feature_cols": list(feature_cols), "custom_figs": custom_figs or [], } # ════════════════════════════════════════════════════════════════════════════ # STRATEGY FRAMEWORK v2 — Config-driven architecture # Claude writes feature_engineering() + strategy_config(). Framework does rest. # ════════════════════════════════════════════════════════════════════════════ import importlib _MODEL_REGISTRY = { "XGBClassifier": ("xgboost", "XGBClassifier"), "RandomForestClassifier": ("sklearn.ensemble", "RandomForestClassifier"), "GradientBoostingClassifier": ("sklearn.ensemble", "GradientBoostingClassifier"), "LogisticRegression": ("sklearn.linear_model", "LogisticRegression"), "ExtraTreesClassifier": ("sklearn.ensemble", "ExtraTreesClassifier"), "AdaBoostClassifier": ("sklearn.ensemble", "AdaBoostClassifier"), } def _build_model_from_config(config, X_train, y_train_enc): """Build, fit, and wrap a model from strategy_config dict.""" model_type = config.get("model_type", "RandomForestClassifier") model_params = dict(config.get("model_params", {})) if model_type not in _MODEL_REGISTRY: raise ValueError(f"Unknown model_type '{model_type}'. Valid: {list(_MODEL_REGISTRY.keys())}") module_path, class_name = _MODEL_REGISTRY[model_type] mod = importlib.import_module(module_path) cls = getattr(mod, class_name) # XGBoost defaults if class_name == "XGBClassifier": model_params.setdefault("use_label_encoder", False) model_params.setdefault("eval_metric", "mlogloss") model_params.setdefault("tree_method", "hist") # Determinism > speed (2026-05-25). XGBoost hist with n_jobs=-1 is # NON-reproducible even with random_state set — the parallel histogram # gradient-sum order varies across threads, so the SAME code + data # gives a slightly different model (and backtest) every run. Forcing # single-thread makes training bit-reproducible so: (a) a user who # copies a strategy and reruns it gets identical numbers, (b) the # community "Live" score matches a redeploy, (c) "same code, different # result" support reports go away. Cost: single-threaded XGB (a few # seconds slower on large windows; hist is fast so it's minor). FORCED # (not setdefault) so the guarantee can't be silently broken by a # strategy passing n_jobs. Exact reproducibility holds within the # platform (pinned versions / same Modal image); a user's own machine # with different xgboost/numpy/CPU can still differ in low-order bits. model_params["n_jobs"] = 1 # Common defaults model_params.setdefault("random_state", 42) from model_wrapper import ModelWrapper clf = cls(model_params) clf.fit(X_train, y_train_enc) enc = LabelEncoder() enc.fit([-1, 0, 1]) return ModelWrapper(clf, original_classes=enc.classes_, n_features=X_train.shape[1]) def _generate_signals(model, X, threshold): """Framework-owned signal generation. Deterministic threshold logic.""" proba = model.predict_proba(X) classes = list(model.classes_) idx_pos = classes.index(1) if 1 in classes else None idx_neg = classes.index(-1) if -1 in classes else None p_pos = proba[:, idx_pos] if idx_pos is not None else np.zeros(len(X)) p_neg = proba[:, idx_neg] if idx_neg is not None else np.zeros(len(X)) signal_vals = np.zeros(len(X)) signal_vals = np.where(p_pos >= threshold, 1.0, signal_vals) signal_vals = np.where(p_neg >= threshold, -1.0, signal_vals) # Both exceed: pick stronger both = (p_pos >= threshold) & (p_neg >= threshold) signal_vals[both] = np.where(p_pos[both] >= p_neg[both], 1.0, -1.0) return pd.Series(signal_vals, index=X.index), p_pos, p_neg # ── Filter functions (all no-ops when config value is None) ────────────── def _apply_direction_filter(signal, direction): """Zero out signals that don't match allowed direction.""" if direction is None or direction == "both": return signal s = signal.copy() if direction == "long": s[s < 0] = 0.0 elif direction == "short": s[s > 0] = 0.0 return s def _apply_session_filter(signal, index, session_hours): """Zero out signals outside session hours [start, end] UTC.""" if session_hours is None: return signal s = signal.copy() start_h, end_h = session_hours[0], session_hours[1] hours = index.hour if start_h <= end_h: mask = (hours >= start_h) & (hours < end_h) else: # wrap around midnight, e.g. [22, 6] mask = (hours >= start_h) \| (hours < end_h) s[~mask] = 0.0 return s def _apply_atr_filter(signal, close, high, low, min_atr): """Zero out signals when NATR(14) is below threshold.""" if min_atr is None: return signal hl = high - low hc = (high - close.shift(1)).abs() lc = (low - close.shift(1)).abs() tr = pd.concat([hl, hc, lc], axis=1).max(axis=1) atr14 = tr.ewm(com=13, adjust=False).mean() natr = atr14 / close.replace(0, np.nan) s = signal.copy() s[natr < min_atr] = 0.0 return s def _apply_trend_filter(signal, close, trend_filter): """Only allow signals aligned with trend. e.g. 'sma_50': longs above SMA, shorts below.""" if trend_filter is None: return signal # Parse: "sma_50" → SMA with period 50 parts = trend_filter.lower().replace("-", "_").split("_") if len(parts) >= 2 and parts[0] in ("sma", "ema"): period = int(parts[1]) else: return signal # unknown filter, skip if parts[0] == "sma": trend_line = close.rolling(period).mean() else: trend_line = close.ewm(span=period, adjust=False).mean() s = signal.copy() # Longs only above trend, shorts only below s[(s > 0) & (close < trend_line)] = 0.0 s[(s < 0) & (close > trend_line)] = 0.0 return s # ── run_backtest_v2: framework-owned SL/TP/cooldown/position management ── def run_backtest_v2(signal, close, high, low, config, capital=10000, cost=2e-5): """Backtest with SL/TP/cooldown/direction handling built into the engine. Unlike run_backtest (v1), this function handles position exits internally. Returns: same dict shape as run_backtest() """ stop_loss = config.get("stop_loss") take_profit = config.get("take_profit") cooldown = config.get("cooldown", 0) on_opposite = config.get("on_opposite", "reverse") sig_arr = signal.values close_arr = close.values high_arr = high.values low_arr = low.values idx = signal.index n = len(close_arr) trade_returns = [] long_returns = [] short_returns = [] trade_log = [] equity_vals = np.full(n, float(capital)) cumret = 1.0 position = 0.0 # current direction: 1.0, -1.0, or 0.0 (flat) entry_price = None entry_bar = None # index into arrays for entry time cooldown_remaining = 0 def _log_trade(exit_bar, exit_px, ret, reason): trade_log.append({ "type": "Buy" if position == 1.0 else "Sell", "entry_time": str(idx[entry_bar]), "exit_time": str(idx[exit_bar]), "entry_price": round(entry_price, 5), "exit_price": round(exit_px, 5), "pnl": round(position * (exit_px - entry_price), 5), "pnl_pct": round(ret * 100, 3), "exit_reason": reason, }) for i in range(n): c = close_arr[i] h = high_arr[i] lo = low_arr[i] s = sig_arr[i] # 1. Check SL/TP if in trade if position != 0.0 and entry_price is not None: hit_sl = False hit_tp = False exit_price = None if position == 1.0: # long if stop_loss is not None and lo <= entry_price * (1 - stop_loss): hit_sl = True exit_price = entry_price * (1 - stop_loss) elif take_profit is not None and h >= entry_price * (1 + take_profit): hit_tp = True exit_price = entry_price * (1 + take_profit) else: # short if stop_loss is not None and h >= entry_price * (1 + stop_loss): hit_sl = True exit_price = entry_price * (1 + stop_loss) elif take_profit is not None and lo <= entry_price * (1 - take_profit): hit_tp = True exit_price = entry_price * (1 - take_profit) if hit_sl or hit_tp: ret = float(position * (exit_price - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(i, exit_price, ret, "SL" if hit_sl else "TP") cumret = (1 + ret) position = 0.0 entry_price = None entry_bar = None cooldown_remaining = cooldown equity_vals[i] = capital cumret continue # 2. Cooldown if cooldown_remaining > 0: cooldown_remaining -= 1 equity_vals[i] = capital * cumret continue # 3. Signal processing if s != 0.0: if position == 0.0: # Open new trade position = s entry_price = c entry_bar = i elif s != position: # Opposite signal if on_opposite == "reverse": # Close current + open opposite ret = float(position * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(i, c, ret, "signal") cumret = (1 + ret) position = s entry_price = c entry_bar = i else: # close_only # Close current, go flat ret = float(position (c - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(i, c, ret, "close_only") cumret = (1 + ret) position = 0.0 entry_price = None entry_bar = None cooldown_remaining = cooldown equity_vals[i] = capital cumret # Close last open trade at final close if position != 0.0 and entry_price is not None and n > 0 and entry_price != 0: c = close_arr[-1] ret = float(position * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(n - 1, c, ret, "end") cumret = (1 + ret) equity_vals[-1] = capital cumret # Bar returns for Sharpe (approximate) bar_returns = np.zeros(n) for i in range(1, n): if close_arr[i - 1] != 0 and sig_arr[i - 1] != 0: bar_returns[i] = sig_arr[i - 1] * (close_arr[i] - close_arr[i - 1]) / close_arr[i - 1] return { "equity": pd.Series(equity_vals, index=close.index), "trade_returns": trade_returns, "long_returns": long_returns, "short_returns": short_returns, "bar_returns": bar_returns, "trade_log": trade_log, } # ── run_strategy: the v2 orchestrator ──────────────────────────────────── def run_strategy(feature_fn, config_fn, data_path, start_date="", end_date="", validation_date="", train_split=0.7, register_model_fn=None): """Config-driven strategy execution. Claude writes feature_fn + config_fn, framework does everything else. Returns: results dict (same format as webapp expects) """ config = config_fn() # Auto-correct SL/TP if Claude passed percentage instead of decimal for _key in ("stop_loss", "take_profit"): _val = config.get(_key) if _val is not None and _val > 0.1: # >10% is almost certainly a percentage config[_key] = _val / 100.0 print(f"[strategy] Auto-corrected {_key}: {_val} -> {config[_key]} (was percentage, converted to decimal)") # 1. Load data df, close, open_, high, low = load_ohlc(data_path, start_date, end_date) # 2. Feature engineering (Claude's function) df = feature_fn(df, close, open_, high, low) close = df["close"] open_ = df["open"] high = df["high"] low = df["low"] # 3. Warm-up detection: drop rows where features have NaN BEFORE any fill feature_cols = [c for c in df.columns if c not in ("open", "high", "low", "close")] raw_nans = df[feature_cols].isna().any(axis=1) valid_rows = ~raw_nans if valid_rows.any(): first_valid = valid_rows.idxmax() if raw_nans.loc[:first_valid].any(): df = df.loc[first_valid:].copy() close = df["close"] open_ = df["open"] high = df["high"] low = df["low"] # 4. Target horizon = config.get("target_horizon", 4) target = make_target(close, horizon=horizon) # 5. Split (ffill only within each partition — no bfill leak) mask = target.notna() df = df[mask].copy() target = target[mask] close = df["close"] high = df["high"] low = df["low"] X = df[feature_cols].copy() X = X.replace([np.inf, -np.inf], np.nan) if validation_date: split_idx = len(df[df.index <= validation_date]) else: split_idx = int(len(df) * train_split) split_idx = max(1, min(split_idx, len(df) - 1)) # ffill within train and test separately (no leak) X_train = X.iloc[:split_idx].ffill().fillna(0.0) X_test = X.iloc[split_idx:].ffill().fillna(0.0) X = pd.concat([X_train, X_test]) y_train = target.iloc[:split_idx] y_test = target.iloc[split_idx:] close_train = close.iloc[:split_idx] close_test = close.iloc[split_idx:] high_test = high.iloc[split_idx:] low_test = low.iloc[split_idx:] enc = LabelEncoder() enc.fit([-1, 0, 1]) y_train_enc = enc.transform(y_train) y_test_enc = enc.transform(y_test) split_dt = str(df.index[split_idx]) sp = { "df": df, "X_train": X_train, "X_test": X_test, "y_train": y_train, "y_test": y_test, "y_train_enc": y_train_enc, "y_test_enc": y_test_enc, "enc": enc, "close": close, "close_train": close_train, "close_test": close_test, "split_idx": split_idx, "split_dt": split_dt, "n_train": len(X_train), "n_test": len(X_test), } # 6. Build model from config model = _build_model_from_config(config, X_train, y_train_enc) # 7. Generate signals threshold = config.get("signal_threshold", 0.55) signal_train, p_pos_train, p_neg_train = _generate_signals(model, X_train, threshold) signal_test, p_pos_test, p_neg_test = _generate_signals(model, X_test, threshold) # 8. Apply filters (order: direction → session → ATR → trend) direction = config.get("direction", "both") signal_test = _apply_direction_filter(signal_test, direction) signal_train = _apply_direction_filter(signal_train, direction) session_filter = config.get("session_filter") signal_test = _apply_session_filter(signal_test, signal_test.index, session_filter) signal_train = _apply_session_filter(signal_train, signal_train.index, session_filter) min_atr = config.get("min_atr") if min_atr is not None: signal_test = _apply_atr_filter(signal_test, close_test, high_test, low_test, min_atr) trend_filter = config.get("trend_filter") if trend_filter is not None: signal_test = _apply_trend_filter(signal_test, close_test, trend_filter) signal_full = pd.concat([signal_train, signal_test]) # 9. Backtest with SL/TP/cooldown (test + train) high_train = high.iloc[:split_idx] low_train = low.iloc[:split_idx] has_risk = (config.get("stop_loss") is not None or config.get("take_profit") is not None or config.get("cooldown", 0) > 0 or config.get("on_opposite", "reverse") != "reverse") if has_risk: bt = run_backtest_v2(signal_test, close_test, high_test, low_test, config, capital=10000) bt_train = run_backtest_v2(signal_train, close_train, high_train, low_train, config, capital=10000) else: bt = run_backtest(signal_test, close_test, capital=10000) bt_train = run_backtest(signal_train, close_train, capital=10000) # 10. Metrics metrics = compute_metrics(bt, close_test, capital=10000) # 11. Pre-compute all trade stats (single source of truth) pre_stats = { "train_stats": compute_trade_stats(bt_train.get("trade_returns", []), capital=10000), "test_stats": compute_trade_stats(bt.get("trade_returns", []), capital=10000), "long_stats": compute_trade_stats(bt.get("long_returns", []), capital=10000), "short_stats": compute_trade_stats(bt.get("short_returns", []), capital=10000), } # 12. Register model if register_model_fn is not None: register_model_fn(model) # 13. Build return dict return build_return_dict(sp, bt, metrics, model, feature_cols, signal_full, p_pos_test, p_neg_test, custom_figs=[], bt_train_result=bt_train, pre_stats=pre_stats) # ── End strategy_utils ── DATA_PATH = '/root/Desktop/QuantifyMe/data/ohlc/GBPUSD_15min.parquet' START_DATE = '2026-04-15' END_DATE = '2026-05-25' VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── EMA crossover core signals ────────────────────────────────────────── ema9 = close.ewm(span=9, adjust=False).mean() ema21 = close.ewm(span=21, adjust=False).mean() ema50 = close.ewm(span=50, adjust=False).mean() ema200 = close.ewm(span=200, adjust=False).mean() df["ema9"] = ema9 df["ema21"] = ema21 df["ema50"] = ema50 df["ema200"] = ema200 # Raw spread and normalised spread df["ema_diff"] = ema9 - ema21 df["ema_diff_norm"] = (ema9 - ema21) / close # Cross signal: +1 when ema9 > ema21, -1 otherwise df["ema_cross_sign"] = np.where(ema9 > ema21, 1.0, -1.0) # Momentum of the spread (rate of change of spread) df["ema_diff_roc1"] = df["ema_diff"].diff(1) df["ema_diff_roc3"] = df["ema_diff"].diff(3) # Distance of price from ema50 and ema200 (normalised) df["dist_ema50"] = (close - ema50) / close df["dist_ema200"] = (close - ema200) / close # ── RSI (14) ──────────────────────────────────────────────────────────── delta = close.diff() gain = delta.clip(lower=0.0) loss = (-delta).clip(lower=0.0) avg_g = gain.ewm(com=13, adjust=False).mean() avg_l = loss.ewm(com=13, adjust=False).mean() rs = avg_g / avg_l.replace(0.0, np.nan) rsi14 = 100.0 - 100.0 / (1.0 + rs) df["rsi14"] = rsi14 # RSI normalised and centred df["rsi14_norm"] = (rsi14 - 50.0) / 50.0 # ── MACD ──────────────────────────────────────────────────────────────── ema12 = close.ewm(span=12, adjust=False).mean() ema26 = close.ewm(span=26, adjust=False).mean() macd_line = ema12 - ema26 signal_ln = macd_line.ewm(span=9, adjust=False).mean() macd_hist = macd_line - signal_ln df["macd_line"] = macd_line / close df["macd_signal"] = signal_ln / close df["macd_hist"] = macd_hist / close df["macd_cross"] = np.where(macd_line > signal_ln, 1.0, -1.0) # ── Bollinger Bands (20, 2) ────────────────────────────────────────────── bb_mid = close.rolling(20).mean() bb_std = close.rolling(20).std(ddof=0) bb_upper = bb_mid + 2.0 * bb_std bb_lower = bb_mid - 2.0 * bb_std bb_width = (bb_upper - bb_lower) / bb_mid.replace(0.0, np.nan) bb_pct = (close - bb_lower) / (bb_upper - bb_lower).replace(0.0, np.nan) df["bb_width"] = bb_width df["bb_pct"] = bb_pct # ── ATR (14) ───────────────────────────────────────────────────────────── tr = pd.concat([ high - low, (high - close.shift(1)).abs(), (low - close.shift(1)).abs() ], axis=1).max(axis=1) atr14 = tr.ewm(com=13, adjust=False).mean() df["atr14"] = atr14 df["natr14"] = atr14 / close # normalised ATR (volatility proxy) # ── Stochastic %K / %D (14, 3) ────────────────────────────────────────── low14 = low.rolling(14).min() high14 = high.rolling(14).max() stoch_k = 100.0 * (close - low14) / (high14 - low14).replace(0.0, np.nan) stoch_d = stoch_k.rolling(3).mean() df["stoch_k"] = stoch_k / 100.0 df["stoch_d"] = stoch_d / 100.0 df["stoch_diff"] = (stoch_k - stoch_d) / 100.0 # ── Rate of Change ─────────────────────────────────────────────────────── df["roc1"] = close.pct_change(1) df["roc4"] = close.pct_change(4) df["roc8"] = close.pct_change(8) df["roc16"] = close.pct_change(16) # ── Candle features ────────────────────────────────────────────────────── body = (close - open_).abs() candle_rng = (high - low).replace(0.0, np.nan) df["body_ratio"] = body / candle_rng df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle_rng df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle_rng df["candle_dir"] = np.where(close >= open_, 1.0, -1.0) # ── Volume-like proxy: range relative to rolling average ──────────────── df["range_ratio"] = candle_rng / candle_rng.rolling(20).mean() # ── Lagged EMA diff features ───────────────────────────────────────────── for lag in [1, 2, 3, 4]: df[f"ema_diff_lag{lag}"] = df["ema_diff_norm"].shift(lag) # ── Lagged RSI ─────────────────────────────────────────────────────────── for lag in [1, 2, 4]: df[f"rsi14_lag{lag}"] = df["rsi14_norm"].shift(lag) # ── Rolling volatility (std of returns) ────────────────────────────────── ret = close.pct_change() df["vol_8"] = ret.rolling(8).std() df["vol_16"] = ret.rolling(16).std() df["vol_32"] = ret.rolling(32).std() # ── Trend strength: ADX-like (simplified) ──────────────────────────────── plus_dm = (high.diff()).clip(lower=0.0) minus_dm = (-low.diff()).clip(lower=0.0) overlap = pd.concat([plus_dm, minus_dm], axis=1).min(axis=1) plus_dm = plus_dm - overlap minus_dm = minus_dm - overlap smooth_tr = tr.ewm(com=13, adjust=False).mean() plus_di = 100.0 * plus_dm.ewm(com=13, adjust=False).mean() / smooth_tr.replace(0.0, np.nan) minus_di = 100.0 * minus_dm.ewm(com=13, adjust=False).mean() / smooth_tr.replace(0.0, np.nan) di_sum = (plus_di + minus_di).replace(0.0, np.nan) adx = ((plus_di - minus_di).abs() / di_sum * 100.0).ewm(com=13, adjust=False).mean() df["adx"] = adx / 100.0 df["plus_di"] = plus_di / 100.0 df["minus_di"] = minus_di / 100.0 # ── Session hour (UTC) ─────────────────────────────────────────────────── if hasattr(df.index, "hour"): df["hour_sin"] = np.sin(2.0 * np.pi * df.index.hour / 24.0) df["hour_cos"] = np.cos(2.0 * np.pi * df.index.hour / 24.0) else: df["hour_sin"] = 0.0 df["hour_cos"] = 1.0 # ── Fill any NaN from warm-up periods ──────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "EMA 9/21 Crossover + MACD Momentum (XGBoost)", "model_type": "XGBClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.80, "colsample_bytree": 0.75, "min_child_weight": 3, "gamma": 0.10, "reg_alpha": 0.05, "reg_lambda": 1.50, "objective": "binary:logistic", "tree_method": "hist", "random_state": 42, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.0030, "take_profit": 0.0060, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [6, 20], "min_atr": None, "trend_filter": "sma_50", "target_horizon": 4, "objective": ( "Maximize Sharpe ratio on EUR/USD 15-min data. " "Core signal: EMA(9) vs EMA(21) crossover enriched with MACD, RSI, " "Bollinger %B, Stochastic, ATR, ADX, candle structure and rolling " "volatility. XGBoost with moderate depth (4) and strong regularisation " "(gamma, alpha, lambda) prevents overfitting on ~6 weeks of intraday data. " "A 0.55 probability threshold filters low-confidence signals. " "A 2:1 TP:SL ratio (30 bp SL / 60 bp TP) improves the reward-risk " "balance. Session filter [6,20] UTC keeps the model away from the thin " "Asian pre-open. trend_filter sma_50 aligns entries with the prevailing " "short-term trend to reduce chop. Cooldown=0 and reverse-on-opposite " "allow continuous participation in trending EMA crossover moves." ), "notes": ( "round-trip cost 2e-5 is accounted for by the framework. " "target_horizon=4 bars (1 hour ahead) suits EMA crossover which " "generates medium-frequency signals rather than tick-level scalps. " "All features are normalised or expressed as ratios to minimise " "scale sensitivity for the logistic-objective XGBoost." ), } # ── Framework v2: auto-generated wrapper ── def train_and_backtest(): _vd = VALIDATION_DATE if 'VALIDATION_DATE' in globals() else '' _ts = TRAIN_SPLIT if 'TRAIN_SPLIT' in globals() else 0.7 return run_strategy( feature_engineering, strategy_config, DATA_PATH, START_DATE, END_DATE, _vd, _ts, register_model_fn=register_model )
1.59	Bollinger reversion	M @malcolmtan	Bollin		47.9%—	+1.53%—	1.46—	0.67%0.67%	71—
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-25 02:29:29 # Model : XGBoost # Feature Eng. : buy when price closes below the lower Bollinger Band(20,2) and RSI(14) < 35, exit at the middle band + Auto-add features: ON # Signal / Entry : — # Optimization : — # Risk Mgmt : — # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # Bollinger Band Mean-Reversion + RSI Filter (XGBoost, Sharpe) # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd # ── Inlined strategy_utils ── """ strategy_utils.py — Standard utility functions for generated strategies. Claude imports these instead of writing boilerplate from scratch. This ensures consistent behavior across all generated strategies. """ import numpy as np import pandas as pd from sklearn.preprocessing import LabelEncoder # Max backtest window per timeframe. A finer timeframe over a longer window # blows up the results dict / parquet load / Modal train time (the 2026-05-12 # OOM was a 1-min × multi-year sweep) — and a 1-min strategy gains nothing from # 2 years of 1-min bars. Enforced HERE because every training path (UI / API / # Modal) funnels through run_strategy → load_ohlc. Env-overridable so a future # "max plan" / dedicated-server tier can lift it. _TF_MAX_DAYS = { "1min": 30, "5min": 90, "15min": 365, "1h": 730, } def _fetch_ohlc_from_internal(symbol: str, tf: str, start: str, end: str): """Phase 3.2: fetch parquet bytes from Server A's /internal/ohlc endpoint instead of reading a local file. Used inside Modal containers / Mac worker pool (Phase 3.4) so every train sees the same source of truth as the chart. Returns: pd.DataFrame (parquet decoded), or raises on any failure so the caller can fall back / surface a clear error in the job. """ import hashlib as _hashlib, hmac as _hmac, io as _io, os as _os import urllib.request as _ur, urllib.parse as _urp base = (_os.environ.get("QM_INTERNAL_OHLC_BASE") or "").rstrip("/") secret = (_os.environ.get("INTERNAL_WS_SECRET") or "").strip() if not base: raise RuntimeError("QM_INTERNAL_OHLC_BASE not set") if not secret: raise RuntimeError("INTERNAL_WS_SECRET not set") msg = f"{symbol}\|{tf}\|{start}\|{end}".encode("utf-8") sig = _hmac.new(secret.encode("utf-8"), msg, _hashlib.sha256).hexdigest() qs = _urp.urlencode({ "symbol": symbol, "tf": tf, "start": start, "end": end, "sig": sig, }) url = f"{base}/internal/ohlc?{qs}" req = _ur.Request(url, headers={"User-Agent": "qm-worker/1.0"}) with _ur.urlopen(req, timeout=30) as resp: if resp.status != 200: raise RuntimeError(f"/internal/ohlc returned {resp.status}") payload = resp.read() print(f"[load_ohlc:internal] {symbol} {tf} fetched {len(payload)} bytes", flush=True) return pd.read_parquet(_io.BytesIO(payload)) def _parse_symbol_tf_from_path(data_path: str): """Pull SYMBOL + TF out of a path like .../EURUSD_1min.parquet.""" import os as _os, re as _re base = _os.path.basename(str(data_path)) m = _re.match(r"^([A-Z]{6})_(\d+min\|\d+h)\.parquet$", base) if not m: return None, None return m.group(1), m.group(2) def load_ohlc(data_path, start_date="", end_date=""): """Load OHLC parquet, sort index, filter dates. Always returns consistent format. The lower bound is clamped per timeframe (see _TF_MAX_DAYS) — a request for more history than the cap silently starts later. Phase 3.2: when env QM_USE_INTERNAL_OHLC=="1", fetch over HTTP from Server A's /internal/ohlc endpoint instead of pd.read_parquet on a local file (which on Modal is a stale Volume snapshot). The endpoint applies the same day-cap, so the local cap-check below is a defensive no-op in that path. Flag defaults to "0" → unchanged behavior. Returns: (df, close, open_, high, low) """ import os as _os, re as _re _use_internal = _os.environ.get("QM_USE_INTERNAL_OHLC", "0") == "1" if _use_internal: _sym, _tf = _parse_symbol_tf_from_path(data_path) if not _sym or not _tf: raise RuntimeError( f"QM_USE_INTERNAL_OHLC=1 but DATA_PATH basename does not match " f"SYMBOL_TF.parquet: {data_path}" ) df = _fetch_ohlc_from_internal(_sym, _tf, start_date or "", end_date or "") else: df = pd.read_parquet(data_path) df.index = pd.to_datetime(df.index) df = df.sort_index() # Per-timeframe window cap (timeframe inferred from the parquet filename). _m = _re.search(r"_(\d+min\|\d+h)\.parquet$", _os.path.basename(str(data_path))) _tf = _m.group(1) if _m else None _max_days = _TF_MAX_DAYS.get(_tf) if _max_days and _max_days > 0 and len(df): _env_override = _os.environ.get(f"QM_MAX_DAYS_{_tf.upper()}") if _env_override and _env_override.isdigit(): _max_days = int(_env_override) try: _eff_end = pd.Timestamp(end_date) if end_date else df.index.max() _eff_end = min(_eff_end, df.index.max()) _floor = _eff_end - pd.Timedelta(days=_max_days) _req_start = pd.Timestamp(start_date) if start_date else df.index.min() if _req_start < _floor: print(f"[load_ohlc] {_tf} backtest window capped to {_max_days}d: " f"start {_req_start.date()} -> {_floor.date()}", flush=True) start_date = _floor except Exception as _e: print(f"[load_ohlc] window-cap check skipped ({_e})", flush=True) if start_date: df = df[df.index >= start_date] if end_date: df = df[df.index <= end_date] return df, df["close"], df["open"], df["high"], df["low"] def make_target(close, horizon=4): """Create target: direction N bars ahead. Default 4 bars = 1 hour on 15-min data. Returns: target (pd.Series of -1, 0, 1) """ return np.sign(close.shift(-horizon) - close) def split_data(df, target, feature_cols, train_split=0.7, validation_date=""): """Train/test split. Handles both ratio and date-based splits. Drops NaN from target before splitting. Encodes labels to [0,1,2]. Returns: dict with keys: X_train, X_test, y_train, y_test, y_train_enc, y_test_enc, enc, close_train, close_test, split_idx, split_dt, n_train, n_test """ # Drop NaN from target mask = target.notna() df = df[mask].copy() target = target[mask] close = df["close"] # Build feature matrix X = df[feature_cols].copy() X = X.bfill().ffill() X = X.replace([np.inf, -np.inf], np.nan).fillna(0.0) # Split if validation_date: split_idx = len(df[df.index <= validation_date]) else: split_idx = int(len(df) * train_split) split_idx = max(1, min(split_idx, len(df) - 1)) X_train = X.iloc[:split_idx] X_test = X.iloc[split_idx:] y_train = target.iloc[:split_idx] y_test = target.iloc[split_idx:] close_train = close.iloc[:split_idx] close_test = close.iloc[split_idx:] split_dt = str(df.index[split_idx]) # Label encoding — always fit on [-1, 0, 1] enc = LabelEncoder() enc.fit([-1, 0, 1]) y_train_enc = enc.transform(y_train) y_test_enc = enc.transform(y_test) return { "df": df, "X_train": X_train, "X_test": X_test, "y_train": y_train, "y_test": y_test, "y_train_enc": y_train_enc, "y_test_enc": y_test_enc, "enc": enc, "close": close, "close_train": close_train, "close_test": close_test, "split_idx": split_idx, "split_dt": split_dt, "n_train": len(X_train), "n_test": len(X_test), } def compute_overlays(close, df_index): """Compute BB and MA overlays on full dataset. Always consistent. Returns: (bb_dict, ma_dict) """ bb_mid = close.rolling(20).mean() bb_std = close.rolling(20).std() bb_upper = bb_mid + 2 * bb_std bb_lower = bb_mid - 2 * bb_std ma50 = close.rolling(50).mean() ma100 = close.rolling(100).mean() ma200 = close.rolling(200).mean() def _safe(s): s = s.reindex(df_index).bfill().ffill() return [float(x) if (x is not None and not np.isnan(x) and not np.isinf(x)) else None for x in s.values] bb = {"upper": _safe(bb_upper), "mid": _safe(bb_mid), "lower": _safe(bb_lower)} ma = {"ma50": _safe(ma50), "ma100": _safe(ma100), "ma200": _safe(ma200)} return bb, ma def run_backtest(signal, close, capital=10000, cost=2e-5): """Run backtest with transaction costs. Uses price-based trade returns (same as webapp _compute_trades). Signal 0 = hold (keep current position), not close. Returns: dict with equity, trade_returns, long_returns, short_returns, bar_returns """ sig_arr = signal.values price_arr = close.values idx = signal.index n = len(price_arr) # Trade returns — price-based (matches webapp _compute_trades exactly) trade_returns = [] long_returns = [] short_returns = [] trade_log = [] last_dir = None entry_price = None entry_bar = None for i in range(n): s = sig_arr[i] c = price_arr[i] if s != 0.0 and s != last_dir: # Direction change — close previous trade, open new if last_dir is not None and entry_price is not None and entry_price != 0: ret = float(last_dir * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if last_dir == 1: long_returns.append(ret) else: short_returns.append(ret) trade_log.append({ "type": "Buy" if last_dir == 1 else "Sell", "entry_time": str(idx[entry_bar]), "exit_time": str(idx[i]), "entry_price": round(entry_price, 5), "exit_price": round(c, 5), "pnl": round(last_dir * (c - entry_price), 5), "pnl_pct": round(ret * 100, 3), "exit_reason": "signal", }) entry_price = c entry_bar = i last_dir = s # Close last open trade if last_dir is not None and entry_price is not None and n > 0 and entry_price != 0: c = price_arr[-1] ret = float(last_dir * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if last_dir == 1: long_returns.append(ret) else: short_returns.append(ret) trade_log.append({ "type": "Buy" if last_dir == 1 else "Sell", "entry_time": str(idx[entry_bar]), "exit_time": str(idx[-1]), "entry_price": round(entry_price, 5), "exit_price": round(c, 5), "pnl": round(last_dir * (c - entry_price), 5), "pnl_pct": round(ret * 100, 3), "exit_reason": "end", }) # Equity curve from trade returns cumret = 1.0 equity_vals = np.full(n, float(capital)) trade_idx = 0 in_trade = False t_entry_price = None t_dir = None for i in range(n): s = sig_arr[i] c = price_arr[i] if s != 0.0 and s != t_dir: if t_dir is not None and t_entry_price is not None and t_entry_price != 0: t_ret = t_dir * (c - t_entry_price) / t_entry_price - cost cumret = (1 + t_ret) t_entry_price = c t_dir = s equity_vals[i] = capital cumret # Bar returns for Sharpe bar_returns = np.zeros(n) for i in range(1, n): if price_arr[i - 1] != 0 and last_dir is not None: bar_returns[i] = sig_arr[i - 1] * (price_arr[i] - price_arr[i - 1]) / price_arr[i - 1] if sig_arr[i - 1] != 0 else 0.0 return { "equity": pd.Series(equity_vals, index=close.index), "trade_returns": trade_returns, "long_returns": long_returns, "short_returns": short_returns, "bar_returns": bar_returns, "trade_log": trade_log, } def compute_trade_stats(trades, capital=10000): """Single source of truth for trade statistics. Every display path reads from this — no recomputation anywhere. All values are rounded and JSON-safe (no inf/nan). """ if not trades: return {"n": 0, "wins": 0, "losses": 0, "wr": 0, "avg": 0, "best": 0, "worst": 0, "ret": 0, "np": 0, "mdd": 0, "pf": 0, "rr": 0, "expect": 0} w = [r for r in trades if r > 0] l = [r for r in trades if r < 0] cumret = 1.0 for r in trades: cumret = (1 + r) net_p = capital (cumret - 1) # Max drawdown eq = np.cumprod([1.0] + [1 + r for r in trades]) peak = np.maximum.accumulate(eq) mdd = float(((eq - peak) / peak).min()) if len(eq) > 1 else 0.0 # Profit Factor gross_w = sum(w) if w else 0 gross_l = abs(sum(l)) if l else 0 pf = gross_w / gross_l if gross_l > 0 else (9999.0 if gross_w > 0 else 0) # Risk:Reward avg_w = float(np.mean(w)) if w else 0 avg_l = abs(float(np.mean(l))) if l else 0 rr = avg_w / avg_l if avg_l > 0 else (9999.0 if avg_w > 0 else 0) # Expectancy expect = net_p / len(trades) return { "n": len(trades), "wins": len(w), "losses": len(l), "wr": round(len(w) / len(trades), 4), "avg": round(float(np.mean(trades)), 6), "best": round(max(w), 6) if w else 0, "worst": round(min(l), 6) if l else 0, "ret": round(cumret - 1, 6), "np": round(net_p, 2), "mdd": round(mdd, 6), "pf": round(pf, 2), "rr": round(rr, 2), "expect": round(expect, 2), } def compute_metrics(bt_result, close_test, capital=10000): """Compute all standard metrics from backtest result. Uses trade-level compounding (same as webapp _trade_stats) for accuracy. Returns: dict with total_ret, bh_ret, sharpe_strat, sharpe_bh, mdd, n_trades """ equity = bt_result["equity"] trade_returns = bt_result["trade_returns"] # Total return — trade-level compounding (matches webapp) if trade_returns: cumret = 1.0 for r in trade_returns: cumret = (1 + r) total_ret = cumret - 1 else: total_ret = 0.0 # Buy and hold bh_equity = capital (close_test / close_test.iloc[0]) bh_ret = (bh_equity.iloc[-1] - capital) / capital if capital != 0 else 0.0 # Sharpe ratio — trade-level (matches webapp: sqrt(25226) annualization) if len(trade_returns) >= 2 and float(np.std(trade_returns)) > 0: sharpe_strat = float(np.mean(trade_returns) / np.std(trade_returns) np.sqrt(252 * 26)) else: sharpe_strat = 0.0 bh_rets = bh_equity.pct_change().dropna() if len(bh_rets) > 1 and bh_rets.std() != 0: sharpe_bh = float((bh_rets.mean() / bh_rets.std()) * np.sqrt(252 * 24 * 4)) else: sharpe_bh = 0.0 # Max drawdown — trade-level (matches webapp) if trade_returns: eq = np.cumprod([1.0] + [1 + r for r in trade_returns]) peak = np.maximum.accumulate(eq) mdd = float(((eq - peak) / peak).min()) if len(eq) > 1 else 0.0 else: mdd = 0.0 return { "total_ret": float(total_ret), "bh_ret": float(bh_ret), "sharpe_strat": float(sharpe_strat) if not np.isnan(sharpe_strat) else 0.0, "sharpe_bh": float(sharpe_bh) if not np.isnan(sharpe_bh) else 0.0, "mdd": float(mdd), "n_trades": len(trade_returns), } # Diagnostics line/histogram series (equity / drawdown / rolling_acc / conf_hist) # only feed the small Diagnostics charts — they're never used by the price chart # or scroll-back. On a 1-min model trained over the (2.2-capped) window these are # still ~30k points each; downsample to a visually-identical resolution before the # dict leaves the trainer so it doesn't carry that into Server-A RAM / Postgres. _RESULTS_SERIES_MAX = 5000 def _downsample_idx(n, cap=_RESULTS_SERIES_MAX): """Evenly-spaced index list spanning [0, n-1] (first+last always kept), or None when no downsampling is needed (n <= cap).""" if n <= cap: return None return np.unique(np.linspace(0, n - 1, cap).astype(int)).tolist() def _take(arr, idx): """Subset a list by an index list (idx may be None → return arr unchanged).""" if idx is None or not isinstance(arr, list): return arr return [arr[i] for i in idx] # trade_log / train_trade_log are lists of per-trade dicts (display-only — the # Trade Log tab). They scale with TRADE count, not bar count, so the bar-window # cap (Phase 2.2) doesn't bound them — a degenerate near-every-bar model can put # 10k+ trade dicts in the blob (>3 MB). Cap each (independently — a small-N model # keeps every trade) to the most-recent N, recording `_total` + `_truncated` # so the true count is still reported. Real strategies have far fewer than # _TRADE_LOG_MAX trades, so this only ever bites pathological models. _TRADE_LOG_MAX = 5000 def _cap_trade_log(tl): """Return (capped_list, original_len, was_truncated).""" if not isinstance(tl, list) or len(tl) <= _TRADE_LOG_MAX: return tl, (len(tl) if isinstance(tl, list) else 0), False return tl[-_TRADE_LOG_MAX:], len(tl), True def build_return_dict(split_result, bt_result, metrics, model, feature_cols, signal_full, p_pos_test, p_neg_test, custom_figs=None, bt_train_result=None, pre_stats=None): """Assemble the complete return dict. Handles ALL serialization. Never returns Timestamps, numpy arrays, or non-JSON types. Returns: JSON-safe dict with all required keys """ df = split_result["df"] close = split_result["close"] close_test = split_result["close_test"] X_test = split_result["X_test"] y_test = split_result["y_test"] equity = bt_result["equity"] bar_returns = bt_result["bar_returns"] # OHLC ohlc_dates = [str(x) for x in df.index.tolist()] def _safe_list(arr): return [float(x) if (x is not None and not np.isnan(x) and not np.isinf(x)) else None for x in arr] # Overlays bb, ma = compute_overlays(close, df.index) # Buy and hold equity capital = equity.iloc[0] if len(equity) > 0 else 10000 bh_equity = capital * (close_test / close_test.iloc[0]) # Confusion matrix from sklearn.metrics import confusion_matrix pred_test = model.predict(X_test) y_test_arr = np.asarray(y_test) cm = confusion_matrix(y_test_arr, pred_test, labels=[-1, 0, 1]) # Rolling accuracy sig_arr = signal_full.reindex(close_test.index).values correct = pd.Series((pred_test == y_test_arr).astype(float), index=X_test.index) active_test = pd.Series(sig_arr != 0, index=close_test.index) if len(sig_arr) == len(close_test) else pd.Series(True, index=close_test.index) correct_active = correct.where(active_test, other=np.nan) rolling_acc = correct_active.rolling(30, min_periods=1).mean() # Feature importance importances = model.feature_importances_ fi_pairs = sorted(zip(feature_cols, importances), key=lambda x: x[1])[-15:] # Drawdown rolling_max = equity.cummax() drawdown = (equity - rolling_max) / rolling_max.replace(0, np.nan) drawdown = drawdown.fillna(0.0) # ── Downsample the Diagnostics-only series (see _downsample_idx) ────────── _eq_dates = [str(x) for x in close_test.index.tolist()] _eq_strat = _safe_list(equity.values) _eq_bh = _safe_list(bh_equity.values) _eq_idx = _downsample_idx(len(_eq_dates)) _eq_dates, _eq_strat, _eq_bh = _take(_eq_dates, _eq_idx), _take(_eq_strat, _eq_idx), _take(_eq_bh, _eq_idx) _ra_dates = [str(x) for x in rolling_acc.index.tolist()] _ra_vals = [float(x) if (not np.isnan(x) and not np.isinf(x)) else None for x in rolling_acc.values] _ra_idx = _downsample_idx(len(_ra_dates)) _ra_dates, _ra_vals = _take(_ra_dates, _ra_idx), _take(_ra_vals, _ra_idx) _dd_dates = [str(x) for x in drawdown.index.tolist()] _dd_vals = _safe_list(drawdown.values) _dd_idx = _downsample_idx(len(_dd_dates)) _dd_dates, _dd_vals = _take(_dd_dates, _dd_idx), _take(_dd_vals, _dd_idx) _cp_pos = [float(x) for x in (p_pos_test.tolist() if hasattr(p_pos_test, 'tolist') else list(p_pos_test))] _cp_neg = [float(x) for x in (p_neg_test.tolist() if hasattr(p_neg_test, 'tolist') else list(p_neg_test))] _cp_pos = _take(_cp_pos, _downsample_idx(len(_cp_pos))) _cp_neg = _take(_cp_neg, _downsample_idx(len(_cp_neg))) # ── Trade logs — display-only (Trade Log tab); cap to most-recent N with a # `_total` field so the true count is still reported (see _cap_trade_log). # NB: ret_dist arrays are left FULL — a downstream path in callbacks.py # recomputes n_trades/win-rate from len(ret_dist), so a sample would skew # the displayed counts; they're small anyway and gzip handles them. _tl_test, _tl_test_n, _tl_test_tr = _cap_trade_log(bt_result.get("trade_log", [])) _tl_tr, _tl_tr_n, _tl_tr_tr = _cap_trade_log(bt_train_result.get("trade_log", []) if bt_train_result else []) return { "ohlc": { "dates": ohlc_dates, "open": _safe_list(df["open"].values), "high": _safe_list(df["high"].values), "low": _safe_list(df["low"].values), "close": _safe_list(df["close"].values), }, "signals": { "dates": [str(x) for x in signal_full.index.tolist()], "values": [float(x) for x in signal_full.values], }, "bb": bb, "ma": ma, "equity": { "dates": _eq_dates, "strategy": _eq_strat, "bh": _eq_bh, }, "feature_importance": { "names": [p[0] for p in fi_pairs], "values": [float(p[1]) for p in fi_pairs], }, "conf_matrix": cm.tolist(), "conf_hist": { "p_pos": _cp_pos, "p_neg": _cp_neg, }, "rolling_acc": { "dates": _ra_dates, "values": _ra_vals, }, "drawdown": { "dates": _dd_dates, "values": _dd_vals, }, "ret_dist": [float(x) for x in bt_result["trade_returns"]], "ret_dist_long": [float(x) for x in bt_result["long_returns"]], "ret_dist_short": [float(x) for x in bt_result["short_returns"]], "train_ret_dist": [float(x) for x in bt_train_result["trade_returns"]] if bt_train_result else [], "train_ret_dist_long": [float(x) for x in bt_train_result["long_returns"]] if bt_train_result else [], "train_ret_dist_short": [float(x) for x in bt_train_result["short_returns"]] if bt_train_result else [], "trade_log": _tl_test, "train_trade_log": _tl_tr, "trade_log_total": _tl_test_n, "train_trade_log_total": _tl_tr_n, "trade_log_truncated": _tl_test_tr, "train_trade_log_truncated": _tl_tr_tr, (pre_stats or {}), "metrics": metrics, "split_dt": split_result["split_dt"], "split_idx": int(split_result["split_idx"]), "n_train": int(split_result["n_train"]), "n_test": int(split_result["n_test"]), "feature_cols": list(feature_cols), "custom_figs": custom_figs or [], } # ════════════════════════════════════════════════════════════════════════════ # STRATEGY FRAMEWORK v2 — Config-driven architecture # Claude writes feature_engineering() + strategy_config(). Framework does rest. # ════════════════════════════════════════════════════════════════════════════ import importlib _MODEL_REGISTRY = { "XGBClassifier": ("xgboost", "XGBClassifier"), "RandomForestClassifier": ("sklearn.ensemble", "RandomForestClassifier"), "GradientBoostingClassifier": ("sklearn.ensemble", "GradientBoostingClassifier"), "LogisticRegression": ("sklearn.linear_model", "LogisticRegression"), "ExtraTreesClassifier": ("sklearn.ensemble", "ExtraTreesClassifier"), "AdaBoostClassifier": ("sklearn.ensemble", "AdaBoostClassifier"), } def _build_model_from_config(config, X_train, y_train_enc): """Build, fit, and wrap a model from strategy_config dict.""" model_type = config.get("model_type", "RandomForestClassifier") model_params = dict(config.get("model_params", {})) if model_type not in _MODEL_REGISTRY: raise ValueError(f"Unknown model_type '{model_type}'. Valid: {list(_MODEL_REGISTRY.keys())}") module_path, class_name = _MODEL_REGISTRY[model_type] mod = importlib.import_module(module_path) cls = getattr(mod, class_name) # XGBoost defaults if class_name == "XGBClassifier": model_params.setdefault("use_label_encoder", False) model_params.setdefault("eval_metric", "mlogloss") model_params.setdefault("tree_method", "hist") # Determinism > speed (2026-05-25). XGBoost hist with n_jobs=-1 is # NON-reproducible even with random_state set — the parallel histogram # gradient-sum order varies across threads, so the SAME code + data # gives a slightly different model (and backtest) every run. Forcing # single-thread makes training bit-reproducible so: (a) a user who # copies a strategy and reruns it gets identical numbers, (b) the # community "Live" score matches a redeploy, (c) "same code, different # result" support reports go away. Cost: single-threaded XGB (a few # seconds slower on large windows; hist is fast so it's minor). FORCED # (not setdefault) so the guarantee can't be silently broken by a # strategy passing n_jobs. Exact reproducibility holds within the # platform (pinned versions / same Modal image); a user's own machine # with different xgboost/numpy/CPU can still differ in low-order bits. model_params["n_jobs"] = 1 # Common defaults model_params.setdefault("random_state", 42) from model_wrapper import ModelWrapper clf = cls(model_params) clf.fit(X_train, y_train_enc) enc = LabelEncoder() enc.fit([-1, 0, 1]) return ModelWrapper(clf, original_classes=enc.classes_, n_features=X_train.shape[1]) def _generate_signals(model, X, threshold): """Framework-owned signal generation. Deterministic threshold logic.""" proba = model.predict_proba(X) classes = list(model.classes_) idx_pos = classes.index(1) if 1 in classes else None idx_neg = classes.index(-1) if -1 in classes else None p_pos = proba[:, idx_pos] if idx_pos is not None else np.zeros(len(X)) p_neg = proba[:, idx_neg] if idx_neg is not None else np.zeros(len(X)) signal_vals = np.zeros(len(X)) signal_vals = np.where(p_pos >= threshold, 1.0, signal_vals) signal_vals = np.where(p_neg >= threshold, -1.0, signal_vals) # Both exceed: pick stronger both = (p_pos >= threshold) & (p_neg >= threshold) signal_vals[both] = np.where(p_pos[both] >= p_neg[both], 1.0, -1.0) return pd.Series(signal_vals, index=X.index), p_pos, p_neg # ── Filter functions (all no-ops when config value is None) ────────────── def _apply_direction_filter(signal, direction): """Zero out signals that don't match allowed direction.""" if direction is None or direction == "both": return signal s = signal.copy() if direction == "long": s[s < 0] = 0.0 elif direction == "short": s[s > 0] = 0.0 return s def _apply_session_filter(signal, index, session_hours): """Zero out signals outside session hours [start, end] UTC.""" if session_hours is None: return signal s = signal.copy() start_h, end_h = session_hours[0], session_hours[1] hours = index.hour if start_h <= end_h: mask = (hours >= start_h) & (hours < end_h) else: # wrap around midnight, e.g. [22, 6] mask = (hours >= start_h) \| (hours < end_h) s[~mask] = 0.0 return s def _apply_atr_filter(signal, close, high, low, min_atr): """Zero out signals when NATR(14) is below threshold.""" if min_atr is None: return signal hl = high - low hc = (high - close.shift(1)).abs() lc = (low - close.shift(1)).abs() tr = pd.concat([hl, hc, lc], axis=1).max(axis=1) atr14 = tr.ewm(com=13, adjust=False).mean() natr = atr14 / close.replace(0, np.nan) s = signal.copy() s[natr < min_atr] = 0.0 return s def _apply_trend_filter(signal, close, trend_filter): """Only allow signals aligned with trend. e.g. 'sma_50': longs above SMA, shorts below.""" if trend_filter is None: return signal # Parse: "sma_50" → SMA with period 50 parts = trend_filter.lower().replace("-", "_").split("_") if len(parts) >= 2 and parts[0] in ("sma", "ema"): period = int(parts[1]) else: return signal # unknown filter, skip if parts[0] == "sma": trend_line = close.rolling(period).mean() else: trend_line = close.ewm(span=period, adjust=False).mean() s = signal.copy() # Longs only above trend, shorts only below s[(s > 0) & (close < trend_line)] = 0.0 s[(s < 0) & (close > trend_line)] = 0.0 return s # ── run_backtest_v2: framework-owned SL/TP/cooldown/position management ── def run_backtest_v2(signal, close, high, low, config, capital=10000, cost=2e-5): """Backtest with SL/TP/cooldown/direction handling built into the engine. Unlike run_backtest (v1), this function handles position exits internally. Returns: same dict shape as run_backtest() """ stop_loss = config.get("stop_loss") take_profit = config.get("take_profit") cooldown = config.get("cooldown", 0) on_opposite = config.get("on_opposite", "reverse") sig_arr = signal.values close_arr = close.values high_arr = high.values low_arr = low.values idx = signal.index n = len(close_arr) trade_returns = [] long_returns = [] short_returns = [] trade_log = [] equity_vals = np.full(n, float(capital)) cumret = 1.0 position = 0.0 # current direction: 1.0, -1.0, or 0.0 (flat) entry_price = None entry_bar = None # index into arrays for entry time cooldown_remaining = 0 def _log_trade(exit_bar, exit_px, ret, reason): trade_log.append({ "type": "Buy" if position == 1.0 else "Sell", "entry_time": str(idx[entry_bar]), "exit_time": str(idx[exit_bar]), "entry_price": round(entry_price, 5), "exit_price": round(exit_px, 5), "pnl": round(position * (exit_px - entry_price), 5), "pnl_pct": round(ret * 100, 3), "exit_reason": reason, }) for i in range(n): c = close_arr[i] h = high_arr[i] lo = low_arr[i] s = sig_arr[i] # 1. Check SL/TP if in trade if position != 0.0 and entry_price is not None: hit_sl = False hit_tp = False exit_price = None if position == 1.0: # long if stop_loss is not None and lo <= entry_price * (1 - stop_loss): hit_sl = True exit_price = entry_price * (1 - stop_loss) elif take_profit is not None and h >= entry_price * (1 + take_profit): hit_tp = True exit_price = entry_price * (1 + take_profit) else: # short if stop_loss is not None and h >= entry_price * (1 + stop_loss): hit_sl = True exit_price = entry_price * (1 + stop_loss) elif take_profit is not None and lo <= entry_price * (1 - take_profit): hit_tp = True exit_price = entry_price * (1 - take_profit) if hit_sl or hit_tp: ret = float(position * (exit_price - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(i, exit_price, ret, "SL" if hit_sl else "TP") cumret = (1 + ret) position = 0.0 entry_price = None entry_bar = None cooldown_remaining = cooldown equity_vals[i] = capital cumret continue # 2. Cooldown if cooldown_remaining > 0: cooldown_remaining -= 1 equity_vals[i] = capital * cumret continue # 3. Signal processing if s != 0.0: if position == 0.0: # Open new trade position = s entry_price = c entry_bar = i elif s != position: # Opposite signal if on_opposite == "reverse": # Close current + open opposite ret = float(position * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(i, c, ret, "signal") cumret = (1 + ret) position = s entry_price = c entry_bar = i else: # close_only # Close current, go flat ret = float(position (c - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(i, c, ret, "close_only") cumret = (1 + ret) position = 0.0 entry_price = None entry_bar = None cooldown_remaining = cooldown equity_vals[i] = capital cumret # Close last open trade at final close if position != 0.0 and entry_price is not None and n > 0 and entry_price != 0: c = close_arr[-1] ret = float(position * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(n - 1, c, ret, "end") cumret = (1 + ret) equity_vals[-1] = capital cumret # Bar returns for Sharpe (approximate) bar_returns = np.zeros(n) for i in range(1, n): if close_arr[i - 1] != 0 and sig_arr[i - 1] != 0: bar_returns[i] = sig_arr[i - 1] * (close_arr[i] - close_arr[i - 1]) / close_arr[i - 1] return { "equity": pd.Series(equity_vals, index=close.index), "trade_returns": trade_returns, "long_returns": long_returns, "short_returns": short_returns, "bar_returns": bar_returns, "trade_log": trade_log, } # ── run_strategy: the v2 orchestrator ──────────────────────────────────── def run_strategy(feature_fn, config_fn, data_path, start_date="", end_date="", validation_date="", train_split=0.7, register_model_fn=None): """Config-driven strategy execution. Claude writes feature_fn + config_fn, framework does everything else. Returns: results dict (same format as webapp expects) """ config = config_fn() # Auto-correct SL/TP if Claude passed percentage instead of decimal for _key in ("stop_loss", "take_profit"): _val = config.get(_key) if _val is not None and _val > 0.1: # >10% is almost certainly a percentage config[_key] = _val / 100.0 print(f"[strategy] Auto-corrected {_key}: {_val} -> {config[_key]} (was percentage, converted to decimal)") # 1. Load data df, close, open_, high, low = load_ohlc(data_path, start_date, end_date) # 2. Feature engineering (Claude's function) df = feature_fn(df, close, open_, high, low) close = df["close"] open_ = df["open"] high = df["high"] low = df["low"] # 3. Warm-up detection: drop rows where features have NaN BEFORE any fill feature_cols = [c for c in df.columns if c not in ("open", "high", "low", "close")] raw_nans = df[feature_cols].isna().any(axis=1) valid_rows = ~raw_nans if valid_rows.any(): first_valid = valid_rows.idxmax() if raw_nans.loc[:first_valid].any(): df = df.loc[first_valid:].copy() close = df["close"] open_ = df["open"] high = df["high"] low = df["low"] # 4. Target horizon = config.get("target_horizon", 4) target = make_target(close, horizon=horizon) # 5. Split (ffill only within each partition — no bfill leak) mask = target.notna() df = df[mask].copy() target = target[mask] close = df["close"] high = df["high"] low = df["low"] X = df[feature_cols].copy() X = X.replace([np.inf, -np.inf], np.nan) if validation_date: split_idx = len(df[df.index <= validation_date]) else: split_idx = int(len(df) * train_split) split_idx = max(1, min(split_idx, len(df) - 1)) # ffill within train and test separately (no leak) X_train = X.iloc[:split_idx].ffill().fillna(0.0) X_test = X.iloc[split_idx:].ffill().fillna(0.0) X = pd.concat([X_train, X_test]) y_train = target.iloc[:split_idx] y_test = target.iloc[split_idx:] close_train = close.iloc[:split_idx] close_test = close.iloc[split_idx:] high_test = high.iloc[split_idx:] low_test = low.iloc[split_idx:] enc = LabelEncoder() enc.fit([-1, 0, 1]) y_train_enc = enc.transform(y_train) y_test_enc = enc.transform(y_test) split_dt = str(df.index[split_idx]) sp = { "df": df, "X_train": X_train, "X_test": X_test, "y_train": y_train, "y_test": y_test, "y_train_enc": y_train_enc, "y_test_enc": y_test_enc, "enc": enc, "close": close, "close_train": close_train, "close_test": close_test, "split_idx": split_idx, "split_dt": split_dt, "n_train": len(X_train), "n_test": len(X_test), } # 6. Build model from config model = _build_model_from_config(config, X_train, y_train_enc) # 7. Generate signals threshold = config.get("signal_threshold", 0.55) signal_train, p_pos_train, p_neg_train = _generate_signals(model, X_train, threshold) signal_test, p_pos_test, p_neg_test = _generate_signals(model, X_test, threshold) # 8. Apply filters (order: direction → session → ATR → trend) direction = config.get("direction", "both") signal_test = _apply_direction_filter(signal_test, direction) signal_train = _apply_direction_filter(signal_train, direction) session_filter = config.get("session_filter") signal_test = _apply_session_filter(signal_test, signal_test.index, session_filter) signal_train = _apply_session_filter(signal_train, signal_train.index, session_filter) min_atr = config.get("min_atr") if min_atr is not None: signal_test = _apply_atr_filter(signal_test, close_test, high_test, low_test, min_atr) trend_filter = config.get("trend_filter") if trend_filter is not None: signal_test = _apply_trend_filter(signal_test, close_test, trend_filter) signal_full = pd.concat([signal_train, signal_test]) # 9. Backtest with SL/TP/cooldown (test + train) high_train = high.iloc[:split_idx] low_train = low.iloc[:split_idx] has_risk = (config.get("stop_loss") is not None or config.get("take_profit") is not None or config.get("cooldown", 0) > 0 or config.get("on_opposite", "reverse") != "reverse") if has_risk: bt = run_backtest_v2(signal_test, close_test, high_test, low_test, config, capital=10000) bt_train = run_backtest_v2(signal_train, close_train, high_train, low_train, config, capital=10000) else: bt = run_backtest(signal_test, close_test, capital=10000) bt_train = run_backtest(signal_train, close_train, capital=10000) # 10. Metrics metrics = compute_metrics(bt, close_test, capital=10000) # 11. Pre-compute all trade stats (single source of truth) pre_stats = { "train_stats": compute_trade_stats(bt_train.get("trade_returns", []), capital=10000), "test_stats": compute_trade_stats(bt.get("trade_returns", []), capital=10000), "long_stats": compute_trade_stats(bt.get("long_returns", []), capital=10000), "short_stats": compute_trade_stats(bt.get("short_returns", []), capital=10000), } # 12. Register model if register_model_fn is not None: register_model_fn(model) # 13. Build return dict return build_return_dict(sp, bt, metrics, model, feature_cols, signal_full, p_pos_test, p_neg_test, custom_figs=[], bt_train_result=bt_train, pre_stats=pre_stats) # ── End strategy_utils ── DATA_PATH = '/root/Desktop/QuantifyMe/data/ohlc/AUDUSD_15min.parquet' START_DATE = '2026-04-15' END_DATE = '2026-05-25' VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── Bollinger Bands (20, 2) ────────────────────────────────────────────── bb_period = 20 bb_std = 2.0 bb_mid = close.rolling(bb_period).mean() bb_sigma = close.rolling(bb_period).std(ddof=0) bb_upper = bb_mid + bb_std * bb_sigma bb_lower = bb_mid - bb_std * bb_sigma df["bb_mid"] = bb_mid df["bb_upper"] = bb_upper df["bb_lower"] = bb_lower # %B — position of close within the band (0 = lower, 1 = upper) bb_range = bb_upper - bb_lower df["bb_pct_b"] = np.where(bb_range > 0, (close - bb_lower) / bb_range, 0.5) # Bandwidth — normalised band width (regime filter) df["bb_bandwidth"] = np.where(bb_mid > 0, bb_range / bb_mid, 0.0) # Distance from each band (signed, normalised by sigma) df["dist_lower"] = np.where(bb_sigma > 0, (close - bb_lower) / bb_sigma, 0.0) df["dist_upper"] = np.where(bb_sigma > 0, (bb_upper - close) / bb_sigma, 0.0) df["dist_mid"] = np.where(bb_sigma > 0, (close - bb_mid) / bb_sigma, 0.0) # Below lower band flag df["below_lower"] = np.where(close < bb_lower, 1, 0) # Above upper band flag df["above_upper"] = np.where(close > bb_upper, 1, 0) # ── RSI (14) ───────────────────────────────────────────────────────────── rsi_period = 14 delta = close.diff() gain = delta.clip(lower=0) loss = (-delta).clip(lower=0) avg_gain = gain.ewm(com=rsi_period - 1, min_periods=rsi_period).mean() avg_loss = loss.ewm(com=rsi_period - 1, min_periods=rsi_period).mean() rs = np.where(avg_loss > 0, avg_gain / avg_loss, 100.0) rsi = 100.0 - 100.0 / (1.0 + rs) df["rsi"] = rsi # RSI-derived flags and distances df["rsi_oversold"] = np.where(rsi < 35, 1, 0) df["rsi_overbought"] = np.where(rsi > 65, 1, 0) df["rsi_dist_35"] = rsi - 35.0 # negative when oversold df["rsi_dist_65"] = rsi - 65.0 # positive when overbought df["rsi_norm"] = (rsi - 50.0) / 50.0 # centred, ±1 range # ── Core entry condition features ──────────────────────────────────────── # Buy setup: close < lower BB AND RSI < 35 df["long_setup"] = np.where((close < bb_lower) & (rsi < 35), 1, 0) # Sell setup: close > upper BB AND RSI > 65 df["short_setup"] = np.where((close > bb_upper) & (rsi > 65), 1, 0) # ── ATR (14) — volatility context ──────────────────────────────────────── atr_period = 14 hl = high - low hc = (high - close.shift(1)).abs() lc = (low - close.shift(1)).abs() tr = pd.concat([hl, hc, lc], axis=1).max(axis=1) atr = tr.ewm(com=atr_period - 1, min_periods=atr_period).mean() df["atr"] = atr df["natr"] = np.where(close > 0, atr / close, 0.0) # ── Momentum / Rate-of-Change ───────────────────────────────────────────── for n in [1, 3, 5, 10]: df[f"roc_{n}"] = np.where( close.shift(n) > 0, (close - close.shift(n)) / close.shift(n), 0.0 ) # ── EMA trend context (fast / slow) ────────────────────────────────────── ema_fast = close.ewm(span=9, min_periods=9).mean() ema_slow = close.ewm(span=21, min_periods=21).mean() df["ema_fast"] = ema_fast df["ema_slow"] = ema_slow df["ema_diff"] = np.where(ema_slow > 0, (ema_fast - ema_slow) / ema_slow, 0.0) df["ema_bull"] = np.where(ema_fast > ema_slow, 1, 0) # SMA-50 trend filter helper (used by framework trend_filter) df["sma_50"] = close.rolling(50).mean() # ── Candle body & wick features ─────────────────────────────────────────── body = (close - open_).abs() candle_rng = (high - low).replace(0, np.nan) df["body_ratio"] = (body / candle_rng).fillna(0.0) df["upper_wick"] = np.where(candle_rng.notna(), (high - close.clip(lower=open_)) / candle_rng.fillna(1), 0.0) df["lower_wick"] = np.where(candle_rng.notna(), (close.clip(upper=open_) - low) / candle_rng.fillna(1), 0.0) df["bull_candle"] = np.where(close > open_, 1, 0) # ── Volume-like proxy — true range z-score ──────────────────────────────── tr_mean = tr.rolling(20).mean() tr_std = tr.rolling(20).std(ddof=0).replace(0, np.nan) df["tr_zscore"] = ((tr - tr_mean) / tr_std).fillna(0.0) # ── Lagged RSI and %B (1, 2, 3 bars back) ──────────────────────────────── for lag in [1, 2, 3]: df[f"rsi_lag{lag}"] = df["rsi"].shift(lag) df[f"bb_pct_b_lag{lag}"] = df["bb_pct_b"].shift(lag) # ── RSI slope ──────────────────────────────────────────────────────────── df["rsi_slope3"] = df["rsi"] - df["rsi"].shift(3) # ── Mean-reversion proximity: how far price is from middle band ─────────── df["pct_to_mid"] = np.where(close > 0, (bb_mid - close) / close, 0.0) # ── Fill any NaNs from warm-up ──────────────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "BB Mean-Reversion + RSI Oversold/Overbought (XGBoost)", "model_type": "XGBClassifier", "model_params": { "n_estimators": 500, "max_depth": 4, "learning_rate": 0.03, "subsample": 0.75, "colsample_bytree": 0.70, "min_child_weight": 5, "gamma": 0.1, "reg_alpha": 0.05, "reg_lambda": 1.5, "objective": "binary:logistic", "random_state": 42, "n_jobs": -1, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.0010, "take_profit": 0.0020, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [7, 17], "min_atr": 0.00005, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximize Sharpe ratio by exploiting Bollinger Band mean-reversion " "with RSI confirmation. Entry conditions (close < lower BB, RSI < 35 " "for longs; close > upper BB, RSI > 65 for shorts) are encoded as " "features together with momentum, ATR volatility, candle structure, " "and lagged indicators. XGBoost with strong regularisation " "(reg_lambda=1.5, gamma=0.1, min_child_weight=5) and a low learning " "rate avoids overfitting on the 6-week window. Session filter " "[7,17] UTC targets liquid London/NY overlap, reducing noise. " "TP:SL ratio of 2:1 supports positive expected value even at " "moderate win rates, pushing Sharpe higher." ), "notes": ( "Features: %B position, RSI (raw + flags + slope + lags), " "EMA cross, ATR/NATR, ROC(1/3/5/10), candle body/wick ratios, " "TR z-score, distance-to-midband, long/short setup flags. " "Round-trip cost ~2e-5 is implicitly absorbed by the 10-pip TP target. " "Cooldown=0 allows immediate re-entry after mean-reversion completes." ), } # ── Framework v2: auto-generated wrapper ── def train_and_backtest(): _vd = VALIDATION_DATE if 'VALIDATION_DATE' in globals() else '' _ts = TRAIN_SPLIT if 'TRAIN_SPLIT' in globals() else 0.7 return run_strategy( feature_engineering, strategy_config, DATA_PATH, START_DATE, END_DATE, _vd, _ts, register_model_fn=register_model )
1.06	NZD/USD Stoch+BB+RSI Gradient Boosting Mean-Revert Maximise risk-adjusted return (Sharpe / Calmar) on NZD/USD 15-min. GradientBoostingClassifier selected for its strong generalisation on stru…	C @candle_owl	NZDUSD	15min	59.1%61.0%	+3.83%+6.88%	1.101.24	4.90%4.90%	38159
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-06 01:54:56 # Model : Gradient Boosting # Feature Eng. : BB (20,2.0), RSI 14, Stochastic (14,3) + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/NZDUSD_15min.parquet" START_DATE = "2025-04-24" END_DATE = "2026-04-24" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── Bollinger Bands (20, 2) ────────────────────────────────────────────── bb_period = 20 bb_std = 2.0 bb_mid = close.rolling(bb_period).mean() bb_std_ = close.rolling(bb_period).std(ddof=0) bb_upper = bb_mid + bb_std * bb_std_ bb_lower = bb_mid - bb_std * bb_std_ df["bb_mid"] = bb_mid df["bb_upper"] = bb_upper df["bb_lower"] = bb_lower df["bb_width"] = (bb_upper - bb_lower) / bb_mid df["bb_pct"] = (close - bb_lower) / (bb_upper - bb_lower) # ── RSI (14) ───────────────────────────────────────────────────────────── rsi_period = 14 delta = close.diff() gain = delta.clip(lower=0) loss = -delta.clip(upper=0) avg_gain = gain.ewm(com=rsi_period - 1, min_periods=rsi_period).mean() avg_loss = loss.ewm(com=rsi_period - 1, min_periods=rsi_period).mean() rs = avg_gain / avg_loss.replace(0, np.nan) df["rsi"] = 100 - (100 / (1 + rs)) # ── Stochastic Oscillator (K=14, D=3) ──────────────────────────────────── stoch_k_period = 14 stoch_d_period = 3 lowest_low = low.rolling(stoch_k_period).min() highest_high = high.rolling(stoch_k_period).max() stoch_range = (highest_high - lowest_low).replace(0, np.nan) df["stoch_k"] = 100 * (close - lowest_low) / stoch_range df["stoch_d"] = df["stoch_k"].rolling(stoch_d_period).mean() df["stoch_kd_diff"] = df["stoch_k"] - df["stoch_d"] # ── ATR (14) — for normalised volatility / min_atr filter ──────────────── atr_period = 14 tr = pd.concat([ high - low, (high - close.shift(1)).abs(), (low - close.shift(1)).abs() ], axis=1).max(axis=1) atr = tr.ewm(com=atr_period - 1, min_periods=atr_period).mean() df["atr"] = atr df["natr"] = atr / close # normalised ATR used by min_atr filter # ── Price momentum / rate-of-change ────────────────────────────────────── df["roc_4"] = close.pct_change(4) # 1-hour momentum on 15-min bars df["roc_8"] = close.pct_change(8) # 2-hour momentum df["roc_16"] = close.pct_change(16) # 4-hour momentum # ── EMA trend context ───────────────────────────────────────────────────── df["ema_20"] = close.ewm(span=20, adjust=False).mean() df["ema_50"] = close.ewm(span=50, adjust=False).mean() df["ema_100"] = close.ewm(span=100, adjust=False).mean() df["sma_50"] = close.rolling(50).mean() # used by trend_filter df["ema_cross_20_50"] = df["ema_20"] - df["ema_50"] df["ema_cross_50_100"] = df["ema_50"] - df["ema_100"] df["close_vs_ema20"] = (close - df["ema_20"]) / df["ema_20"] # ── Candlestick body / wick features ───────────────────────────────────── df["body"] = (close - open_).abs() df["candle_dir"] = np.where(close >= open_, 1.0, -1.0) df["upper_wick"] = high - pd.concat([close, open_], axis=1).max(axis=1) df["lower_wick"] = pd.concat([close, open_], axis=1).min(axis=1) - low df["body_ratio"] = df["body"] / (high - low).replace(0, np.nan) # ── Volume-proxy: realised range rolling stats ──────────────────────────── df["hl_range"] = high - low df["hl_range_ma8"] = df["hl_range"].rolling(8).mean() df["hl_range_ratio"]= df["hl_range"] / df["hl_range_ma8"] # ── RSI derived signals ─────────────────────────────────────────────────── df["rsi_overbought"] = np.where(df["rsi"] > 70, 1.0, 0.0) df["rsi_oversold"] = np.where(df["rsi"] < 30, 1.0, 0.0) df["rsi_momentum"] = df["rsi"].diff(4) # ── Stochastic derived signals ──────────────────────────────────────────── df["stoch_overbought"] = np.where(df["stoch_k"] > 80, 1.0, 0.0) df["stoch_oversold"] = np.where(df["stoch_k"] < 20, 1.0, 0.0) # ── BB squeeze: width vs rolling mean of width ──────────────────────────── df["bb_width_ma20"] = df["bb_width"].rolling(20).mean() df["bb_squeeze"] = np.where(df["bb_width"] < df["bb_width_ma20"], 1.0, 0.0) # ── Interaction features ────────────────────────────────────────────────── df["rsi_bb_pct"] = df["rsi"] * df["bb_pct"] df["stoch_k_bb_pct"] = df["stoch_k"] * df["bb_pct"] df["rsi_stoch_diff"] = df["rsi"] - df["stoch_k"] # ── Lagged features (avoids look-ahead) ────────────────────────────────── for lag in [1, 2, 3, 4]: df[f"rsi_lag{lag}"] = df["rsi"].shift(lag) df[f"stoch_k_lag{lag}"] = df["stoch_k"].shift(lag) df[f"bb_pct_lag{lag}"] = df["bb_pct"].shift(lag) df[f"roc4_lag{lag}"] = df["roc_4"].shift(lag) # ── Fill NaN from warm-up ───────────────────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "NZD/USD Stoch+BB+RSI Gradient Boosting Mean-Revert", "model_type": "GradientBoostingClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.8, "min_samples_leaf": 20, "max_features": "sqrt", "validation_fraction": 0.1, "n_iter_no_change": 30, "tol": 1e-4, "random_state": 42, }, "signal_threshold": 0.56, "direction": "both", "stop_loss": 0.005, "take_profit": 0.010, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [7, 20], "min_atr": 0.0002, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximise risk-adjusted return (Sharpe / Calmar) on NZD/USD 15-min. " "GradientBoostingClassifier selected for its strong generalisation on " "structured tabular data with noisy financial features. n_estimators=400 " "with early stopping (n_iter_no_change=30) prevents overfitting. " "max_depth=4 keeps trees shallow to reduce variance. subsample=0.8 + " "max_features=sqrt add stochasticity for robustness. SL 0.5% / TP 1.0% " "gives a minimum 2:1 reward-risk ratio. Session filter 07-20 UTC covers " "Sydney open through NY overlap, maximising NZD/USD liquidity. " "Reverse on opposite signal keeps the model continuously positioned in " "the highest-confidence direction. min_atr filter avoids flat/illiquid " "periods where the model edges degrade." ), "notes": ( "Features: Bollinger Bands (20,2) width & %B, RSI(14), Stochastic K/D " "(14,3), ATR(14)/NATR, EMA cross (20/50/100), SMA50 trend context, " "price ROC (4/8/16 bars), candlestick body/wick ratios, HL range " "normalisation, BB squeeze flag, RSI/Stoch overbought-oversold flags, " "interaction terms (RSI%B, StochK%B), and 4 lags each of RSI, StochK, " "%B and ROC4. Threshold 0.56 slightly above 0.50 to filter marginal " "signals without sacrificing too many trades." ), }
0.91	NZD/USD EMA Cross + ATR Gradient Boosting Maximize risk-adjusted return (Sharpe / Calmar). GradientBoostingClassifier with moderate depth (4) and low learning rate (0.03) to reduce o…	E @elastic-moose-350	NZDUSD	15min	63.3%60.2%	+9.88%+4.50%	1.181.16	3.44%3.44%	712103
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-06 03:16:00 # Model : Gradient Boosting # Feature Eng. : EMA (50,200), ATR 14 + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/NZDUSD_15min.parquet" START_DATE = "2025-04-24" END_DATE = "2026-04-24" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── EMA 50 and EMA 200 ────────────────────────────────────────────────── ema_50 = close.ewm(span=50, adjust=False).mean() ema_200 = close.ewm(span=200, adjust=False).mean() df["ema_50"] = ema_50 df["ema_200"] = ema_200 df["dm_ema_50"] = (close - ema_50) / ema_50 df["dm_ema_200"] = (close - ema_200) / ema_200 # EMA cross signal: ema_50 vs ema_200 df["ema_cross"] = df["ema_50"] - df["ema_200"] # Cross direction: +1 when ema_50 > ema_200, -1 otherwise df["ema_cross_sign"] = np.where(df["ema_cross"] > 0, 1.0, -1.0) # Cross event: 1 when cross just happened (sign flip) prev_cross = df["ema_cross"].shift(1) df["ema_cross_event"] = np.where( (df["ema_cross"] * prev_cross) < 0, 1.0, 0.0 ) # ── ATR 14 ────────────────────────────────────────────────────────────── prev_close = close.shift(1) tr = pd.concat([ high - low, (high - prev_close).abs(), (low - prev_close).abs() ], axis=1).max(axis=1) atr = tr.ewm(span=14, adjust=False).mean() df["atr"] = atr df["natr"] = atr / close # ── RSI 14 ────────────────────────────────────────────────────────────── delta = close.diff() gain = delta.clip(lower=0) loss = (-delta).clip(lower=0) avg_gain = gain.ewm(span=14, adjust=False).mean() avg_loss = loss.ewm(span=14, adjust=False).mean() rs = avg_gain / (avg_loss + 1e-10) rsi = 100 - (100 / (1 + rs)) df["rsi_14"] = rsi df["rsi_norm"] = (rsi - 50) / 50 # centred and scaled # ── MACD ──────────────────────────────────────────────────────────────── ema_12 = close.ewm(span=12, adjust=False).mean() ema_26 = close.ewm(span=26, adjust=False).mean() macd = ema_12 - ema_26 signal = macd.ewm(span=9, adjust=False).mean() df["macd"] = macd df["macd_signal"] = signal df["macd_hist"] = macd - signal df["macd_norm"] = macd / close df["macd_hist_norm"] = (macd - signal) / close # ── Bollinger Bands (20, 2) ────────────────────────────────────────────── sma_20 = close.rolling(20).mean() std_20 = close.rolling(20).std() bb_upper = sma_20 + 2 * std_20 bb_lower = sma_20 - 2 * std_20 bb_width = (bb_upper - bb_lower) / (sma_20 + 1e-10) bb_pos = (close - bb_lower) / (bb_upper - bb_lower + 1e-10) df["bb_width"] = bb_width df["bb_pos"] = bb_pos # ── Momentum & Rate-of-Change ──────────────────────────────────────────── df["mom_4"] = close.pct_change(4) df["mom_8"] = close.pct_change(8) df["mom_16"] = close.pct_change(16) # ── Rolling volatility (realised vol over 20 bars) ────────────────────── log_ret = np.log(close / close.shift(1)) df["rvol_20"] = log_ret.rolling(20).std() # ── Stochastic Oscillator (14) ─────────────────────────────────────────── low_14 = low.rolling(14).min() high_14 = high.rolling(14).max() stoch_k = 100 * (close - low_14) / (high_14 - low_14 + 1e-10) stoch_d = stoch_k.rolling(3).mean() df["stoch_k"] = stoch_k df["stoch_d"] = stoch_d df["stoch_diff"] = stoch_k - stoch_d # ── Candle body / range features ──────────────────────────────────────── df["body"] = (close - open_).abs() / (high - low + 1e-10) df["upper_wick"] = (high - close.clip(lower=open_)) / (high - low + 1e-10) df["lower_wick"] = (close.clip(upper=open_) - low) / (high - low + 1e-10) df["bar_dir"] = np.where(close > open_, 1.0, -1.0) # ── Price position relative to EMAs ───────────────────────────────────── df["close_vs_ema50_sign"] = np.where(close > ema_50, 1.0, -1.0) df["close_vs_ema200_sign"] = np.where(close > ema_200, 1.0, -1.0) # ── Lagged features (1-bar and 2-bar lags on key signals) ─────────────── for col in ["rsi_norm", "macd_hist_norm", "mom_4", "ema_cross", "natr", "bb_pos"]: df[f"{col}_lag1"] = df[col].shift(1) df[f"{col}_lag2"] = df[col].shift(2) # ── Fill NaN from warm-up ──────────────────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "NZD/USD EMA Cross + ATR Gradient Boosting", "model_type": "GradientBoostingClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.03, "subsample": 0.8, "min_samples_leaf": 20, "max_features": "sqrt", "validation_fraction": 0.1, "n_iter_no_change": 30, "tol": 1e-4, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.005, "take_profit": 0.01, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": None, "min_atr": None, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximize risk-adjusted return (Sharpe / Calmar). " "GradientBoostingClassifier with moderate depth (4) and low learning rate (0.03) " "to reduce overfitting on 15-min NZD/USD. SL=0.5%, TP=1.0% gives 1:2 RR. " "EMA 50/200 cross is the primary trend feature; ATR normalises volatility context. " "Supplementary RSI, MACD, Bollinger, Stochastic and candle-body features capture " "momentum and mean-reversion signals. Early stopping via n_iter_no_change guards " "against overfit on the training partition." ), "notes": ( "target_horizon=4 (1 hour) matches typical intraday swing on NZD/USD. " "reverse on opposite signal keeps the model responsive during trending regimes. " "No session filter applied — NZD/USD has reasonable liquidity around the clock. " "min_samples_leaf=20 and subsample=0.8 add regularisation without grid search." ), }
0.86	EUR/USD Stoch+BB+RSI Gradient Boosting Mean-Rev Maximize risk-adjusted return (Sharpe/Calmar) on EUR/USD 15-min data. GradientBoostingClassifier chosen for strong out-of-bag regularisation…	E @echo-quanta-127	EURUSD	15min	61.2%53.2%	+1.02%+3.69%	1.061.14	2.59%2.59%	21447
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-06 02:33:17 # Model : Gradient Boosting # Feature Eng. : BB (20,2.0), RSI 14, Stochastic (14,3) + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/EURUSD_15min.parquet" START_DATE = "2025-04-24" END_DATE = "2026-04-24" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── Bollinger Bands (20, 2) ────────────────────────────────────────────── bb_period = 20 bb_std = 2.0 bb_mid = close.rolling(bb_period).mean() bb_std_v = close.rolling(bb_period).std(ddof=0) bb_upper = bb_mid + bb_std * bb_std_v bb_lower = bb_mid - bb_std * bb_std_v df["bb_mid"] = bb_mid df["bb_upper"] = bb_upper df["bb_lower"] = bb_lower df["bb_width"] = (bb_upper - bb_lower) / bb_mid df["bb_pct"] = (close - bb_lower) / (bb_upper - bb_lower) # ── RSI 14 ─────────────────────────────────────────────────────────────── rsi_period = 14 delta = close.diff() gain = delta.clip(lower=0) loss = -delta.clip(upper=0) avg_gain = gain.ewm(com=rsi_period - 1, min_periods=rsi_period).mean() avg_loss = loss.ewm(com=rsi_period - 1, min_periods=rsi_period).mean() rs = avg_gain / avg_loss.replace(0, np.nan) df["rsi"] = 100 - (100 / (1 + rs)) # ── Stochastic Oscillator (K=14, D=3) ──────────────────────────────────── stoch_k_period = 14 stoch_d_period = 3 lowest_low = low.rolling(stoch_k_period).min() highest_high = high.rolling(stoch_k_period).max() range_hl = (highest_high - lowest_low).replace(0, np.nan) df["stoch_k"] = 100 * (close - lowest_low) / range_hl df["stoch_d"] = df["stoch_k"].rolling(stoch_d_period).mean() df["stoch_kd_diff"] = df["stoch_k"] - df["stoch_d"] # ── ATR (14) ────────────────────────────────────────────────────────────── atr_period = 14 prev_close = close.shift(1) tr = pd.concat([ high - low, (high - prev_close).abs(), (low - prev_close).abs() ], axis=1).max(axis=1) df["atr"] = tr.ewm(com=atr_period - 1, min_periods=atr_period).mean() df["natr"] = df["atr"] / close # ── SMA filters ────────────────────────────────────────────────────────── df["sma_20"] = close.rolling(20).mean() df["sma_50"] = close.rolling(50).mean() df["sma_200"] = close.rolling(200).mean() df["price_vs_sma50"] = close / df["sma_50"] - 1 df["price_vs_sma200"] = close / df["sma_200"] - 1 # ── EMA cross ──────────────────────────────────────────────────────────── ema_fast = close.ewm(span=8, adjust=False).mean() ema_slow = close.ewm(span=21, adjust=False).mean() df["ema_cross"] = ema_fast - ema_slow # ── MACD ───────────────────────────────────────────────────────────────── ema12 = close.ewm(span=12, adjust=False).mean() ema26 = close.ewm(span=26, adjust=False).mean() macd_line = ema12 - ema26 macd_signal = macd_line.ewm(span=9, adjust=False).mean() df["macd"] = macd_line df["macd_sig"] = macd_signal df["macd_hist"] = macd_line - macd_signal # ── Momentum / Rate-of-change ──────────────────────────────────────────── df["roc_4"] = close.pct_change(4) df["roc_8"] = close.pct_change(8) df["roc_16"] = close.pct_change(16) # ── Candle features ─────────────────────────────────────────────────────── df["candle_body"] = (close - open_) / close df["candle_range"] = (high - low) / close df["upper_shadow"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / close df["lower_shadow"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / close # ── Volatility regime ───────────────────────────────────────────────────── df["vol_ratio"] = df["atr"] / df["atr"].rolling(50).mean() # ── RSI regime bins (np.where instead of pd.cut) ───────────────────────── df["rsi_oversold"] = np.where(df["rsi"] < 30, 1, 0) df["rsi_overbought"]= np.where(df["rsi"] > 70, 1, 0) df["rsi_mid"] = np.where((df["rsi"] >= 40) & (df["rsi"] <= 60), 1, 0) # ── Stochastic regime bins ──────────────────────────────────────────────── df["stoch_oversold"] = np.where(df["stoch_k"] < 20, 1, 0) df["stoch_overbought"] = np.where(df["stoch_k"] > 80, 1, 0) # ── BB regime bins ──────────────────────────────────────────────────────── df["bb_squeeze"] = np.where(df["bb_width"] < df["bb_width"].rolling(50).quantile(0.20), 1, 0) df["bb_expansion"] = np.where(df["bb_width"] > df["bb_width"].rolling(50).quantile(0.80), 1, 0) df["price_below_bb_lower"] = np.where(close < bb_lower, 1, 0) df["price_above_bb_upper"] = np.where(close > bb_upper, 1, 0) # ── Volume proxy — bar range z-score ────────────────────────────────────── range_series = high - low range_mean = range_series.rolling(20).mean() range_std = range_series.rolling(20).std(ddof=0) df["range_zscore"] = (range_series - range_mean) / range_std.replace(0, np.nan) # ── Lagged features ─────────────────────────────────────────────────────── for lag in [1, 2, 3, 4]: df[f"rsi_lag{lag}"] = df["rsi"].shift(lag) df[f"stoch_k_lag{lag}"] = df["stoch_k"].shift(lag) df[f"bb_pct_lag{lag}"] = df["bb_pct"].shift(lag) df[f"macd_hist_lag{lag}"] = df["macd_hist"].shift(lag) # ── Interaction features ────────────────────────────────────────────────── df["rsi_x_bb_pct"] = df["rsi"] * df["bb_pct"] df["stoch_x_bb_pct"] = df["stoch_k"] * df["bb_pct"] df["macd_x_ema_cross"] = df["macd_hist"] * df["ema_cross"] df["rsi_x_stoch_kd"] = df["rsi"] * df["stoch_kd_diff"] # ── Fill NaN from warm-up ───────────────────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "EUR/USD Stoch+BB+RSI Gradient Boosting Mean-Rev", "model_type": "GradientBoostingClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.75, "min_samples_leaf": 20, "max_features": "sqrt", "validation_fraction": 0.1, "n_iter_no_change": 30, "tol": 1e-4, "random_state": 42, }, "signal_threshold": 0.56, "direction": "both", "stop_loss": 0.005, "take_profit": 0.010, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [6, 18], "min_atr": 0.0002, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximize risk-adjusted return (Sharpe/Calmar) on EUR/USD 15-min data. " "GradientBoostingClassifier chosen for strong out-of-bag regularisation " "via subsample=0.75 and early stopping (n_iter_no_change=30). " "max_depth=4 limits overfitting on mean-reversion regime. " "learning_rate=0.04 with 400 trees balances bias-variance. " "Signal threshold 0.56 filters low-confidence signals for better precision. " "Session filter 06-18 UTC targets London+NY overlap with highest liquidity. " "SL=0.5%, TP=1.0% gives 1:2 R:R aligned with mean-reversion edge. " "Target horizon=4 bars (1 hour) captures short-term mean-reversion cycles." ), "notes": ( "Features: Stochastic(14,3), BB(20,2), RSI(14) as primary signals. " "Supplemented by MACD, EMA cross, ATR volatility filter, candle body/shadow, " "range z-score, lagged versions of key oscillators, and interaction terms. " "Regime bins (oversold/overbought/squeeze/expansion) add non-linear context. " "min_atr=0.0002 avoids trading during dead/illiquid periods." ), }
0.59	USD/JPY Multi-MA + RSI/BB XGBoost Sharpe Maximize Sharpe ratio on USD/JPY 1-min data using XGBoost with returns, RSI, Bollinger Bands, multiple MAs (50/100/200), MACD, ATR, and cand…	M @malcolmtan	USD/JP		60.7%—	+0.42%—	1.22—	0.53%0.53%	84—
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-08 02:08:02 # Model : XGBoost # Feature Eng. : Auto-add features: ON # Signal / Entry : — # Optimization : — # Risk Mgmt : — # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/USDJPY_1min.parquet" START_DATE = "2026-05-04 00:00:00" END_DATE = "2026-05-07 00:00:00" VALIDATION_DATE = "" TRAIN_SPLIT = 0.6993736951983298 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # --- Returns over multiple horizons --- for n in [1, 3, 5, 10, 20]: df[f"ret_{n}"] = close.pct_change(n) # --- RSI 14 --- delta = close.diff() gain = delta.clip(lower=0) loss = -delta.clip(upper=0) avg_gain = gain.ewm(com=13, min_periods=14).mean() avg_loss = loss.ewm(com=13, min_periods=14).mean() rs = avg_gain / (avg_loss + 1e-10) df["rsi_14"] = 100.0 - (100.0 / (1.0 + rs)) # --- RSI derived features --- df["rsi_14_zscore"] = (df["rsi_14"] - df["rsi_14"].rolling(50).mean()) / (df["rsi_14"].rolling(50).std() + 1e-10) df["rsi_overbought"] = np.where(df["rsi_14"] > 70, 1, 0) df["rsi_oversold"] = np.where(df["rsi_14"] < 30, 1, 0) # --- Bollinger Bands 20, 2 --- bb_mid = close.rolling(20).mean() bb_std = close.rolling(20).std() bb_upper = bb_mid + 2.0 * bb_std bb_lower = bb_mid - 2.0 * bb_std df["bb_mid"] = bb_mid df["bb_upper"] = bb_upper df["bb_lower"] = bb_lower df["bb_width"] = (bb_upper - bb_lower) / (bb_mid + 1e-10) df["bb_pct_b"] = (close - bb_lower) / (bb_upper - bb_lower + 1e-10) df["bb_above"] = np.where(close > bb_upper, 1, 0) df["bb_below"] = np.where(close < bb_lower, 1, 0) # --- Moving Averages --- for w in [50, 100, 200]: df[f"sma_{w}"] = close.rolling(w).mean() df[f"price_vs_sma_{w}"] = (close - df[f"sma_{w}"]) / (df[f"sma_{w}"] + 1e-10) # --- MA crossover signals --- df["sma50_vs_sma100"] = np.where(df["sma_50"] > df["sma_100"], 1, -1) df["sma50_vs_sma200"] = np.where(df["sma_50"] > df["sma_200"], 1, -1) df["sma100_vs_sma200"] = np.where(df["sma_100"] > df["sma_200"], 1, -1) # --- ATR 14 --- tr = pd.concat([ high - low, (high - close.shift(1)).abs(), (low - close.shift(1)).abs() ], axis=1).max(axis=1) df["atr_14"] = tr.ewm(com=13, min_periods=14).mean() df["natr_14"] = df["atr_14"] / (close + 1e-10) # --- Momentum / rate of change --- for n in [5, 10, 20]: df[f"mom_{n}"] = close - close.shift(n) df[f"roc_{n}"] = (close - close.shift(n)) / (close.shift(n) + 1e-10) # --- Volume features (if volume exists) --- if "volume" in df.columns: vol = df["volume"].replace(0, np.nan) df["vol_sma_20"] = vol.rolling(20).mean() df["vol_ratio_20"] = vol / (df["vol_sma_20"] + 1e-10) else: df["vol_ratio_20"] = 1.0 # --- Price spread & body features --- df["hl_spread"] = (high - low) / (close + 1e-10) df["body_ratio"] = (close - open_).abs() / (high - low + 1e-10) df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / (high - low + 1e-10) df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / (high - low + 1e-10) df["bull_candle"] = np.where(close > open_, 1, 0) # --- Lagged returns for autocorrelation signal --- for lag in [1, 2, 3, 5]: df[f"ret1_lag{lag}"] = df["ret_1"].shift(lag) # --- Rolling volatility --- df["vol_10"] = df["ret_1"].rolling(10).std() df["vol_20"] = df["ret_1"].rolling(20).std() df["vol_50"] = df["ret_1"].rolling(50).std() # --- Z-score of close over 20 and 50 bars --- df["zscore_20"] = (close - close.rolling(20).mean()) / (close.rolling(20).std() + 1e-10) df["zscore_50"] = (close - close.rolling(50).mean()) / (close.rolling(50).std() + 1e-10) # --- Relative distance of price from BB bands --- df["dist_upper"] = (bb_upper - close) / (close + 1e-10) df["dist_lower"] = (close - bb_lower) / (close + 1e-10) # --- EMA 9 and 21 for short-term momentum --- df["ema_9"] = close.ewm(span=9, min_periods=9).mean() df["ema_21"] = close.ewm(span=21, min_periods=21).mean() df["ema9_vs_ema21"] = np.where(df["ema_9"] > df["ema_21"], 1, -1) df["price_vs_ema9"] = (close - df["ema_9"]) / (df["ema_9"] + 1e-10) df["price_vs_ema21"] = (close - df["ema_21"]) / (df["ema_21"] + 1e-10) # --- MACD-like signal --- ema_12 = close.ewm(span=12, min_periods=12).mean() ema_26 = close.ewm(span=26, min_periods=26).mean() macd_line = ema_12 - ema_26 signal_line = macd_line.ewm(span=9, min_periods=9).mean() df["macd"] = macd_line df["macd_signal"] = signal_line df["macd_hist"] = macd_line - signal_line df["macd_cross"] = np.where(macd_line > signal_line, 1, -1) # --- Fill NaN from warm-up periods --- df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "USD/JPY Multi-MA + RSI/BB XGBoost Sharpe", "model_type": "XGBClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.75, "colsample_bytree": 0.75, "min_child_weight": 5, "gamma": 0.1, "reg_alpha": 0.1, "reg_lambda": 1.5, "objective": "binary:logistic", "tree_method": "hist", "random_state": 42, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.0008, "take_profit": 0.0016, "cooldown": 0, "max_positions": 1, "on_opposite": "close_only", "session_filter": None, "min_atr": None, "trend_filter": None, "target_horizon": 5, "objective": ( "Maximize Sharpe ratio on USD/JPY 1-min data using XGBoost with " "returns, RSI, Bollinger Bands, multiple MAs (50/100/200), MACD, " "ATR, and candle-body features. Stop-loss and take-profit set at " "a 1:2 risk/reward to filter noise and improve Sharpe. n_estimators " "and moderate depth balance bias-variance. Regularization (alpha/lambda) " "reduces overfitting on short date range." ), "notes": ( "Target horizon of 5 bars (5 minutes) is chosen to capture short-term " "directional moves on 1-min data without excessive label noise. " "colsample_bytree and subsample add stochasticity to reduce variance. " "close_only on opposite signal avoids whipsaw from rapid reversals. " "No session filter applied since USD/JPY has liquidity around the clock." ), }