Browse AI-generated trading strategies shared by the community. Fork, learn, and build on each other's work.
| Score▼ | Strategy | Author | Win Rate▼ | Return▼ | PF▼ | MDD▼ | Trades▼ | Actions | ||
|---|---|---|---|---|---|---|---|---|---|---|
|
🥇
|
USD/CAD BB Mean-Reversion + ATR XGBoost
Maximise risk-adjusted return (Sharpe/Calmar) on USD/CAD 15-min using Bollinger Band mean-reversion signals augmented by ATR, RSI, MACD, and…
|
C
@candle_owl
|
USDCAD | 15min | 59.1%72.1% | +4.84%+25.91% | 1.313.34 | 1.34%1.34% | 36268 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 02:36:58
# Model : XGBoost
# Feature Eng. : BB (20,2.0), ATR 14 + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/USDCAD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── Bollinger Bands (20, 2) ──────────────────────────────────────────────
bb_period = 20
bb_std = 2.0
bb_mid = close.rolling(bb_period).mean()
bb_sigma = close.rolling(bb_period).std(ddof=0)
bb_upper = bb_mid + bb_std * bb_sigma
bb_lower = bb_mid - bb_std * bb_sigma
df["bb_mid"] = bb_mid
df["bb_upper"] = bb_upper
df["bb_lower"] = bb_lower
df["bb_width"] = (bb_upper - bb_lower) / bb_mid
df["bb_pct"] = (close - bb_lower) / (bb_upper - bb_lower)
# ── ATR (14) & Normalised ATR ────────────────────────────────────────────
atr_period = 14
tr = pd.concat([
high - low,
(high - close.shift(1)).abs(),
(low - close.shift(1)).abs()
], axis=1).max(axis=1)
atr = tr.ewm(alpha=1.0 / atr_period, min_periods=atr_period, adjust=False).mean()
natr = atr / close
df["atr"] = atr
df["natr"] = natr
# ── Price momentum / returns ─────────────────────────────────────────────
df["ret_1"] = close.pct_change(1)
df["ret_4"] = close.pct_change(4)
df["ret_8"] = close.pct_change(8)
df["ret_16"] = close.pct_change(16)
# ── Distance from Bollinger mid / bands ──────────────────────────────────
df["close_minus_mid"] = (close - bb_mid) / bb_mid
df["close_minus_upper"] = (close - bb_upper) / bb_mid
df["close_minus_lower"] = (close - bb_lower) / bb_mid
# ── BB squeeze flag: width below rolling median ───────────────────────────
bb_width_med = df["bb_width"].rolling(50).median()
df["bb_squeeze"] = np.where(df["bb_width"] < bb_width_med, 1.0, 0.0)
# ── BB mean-reversion z-score ────────────────────────────────────────────
df["bb_z"] = (close - bb_mid) / (bb_sigma + 1e-12)
# ── Candle body / wick features ──────────────────────────────────────────
body = (close - open_).abs()
candle_rng = (high - low).replace(0, np.nan)
df["body_ratio"] = body / candle_rng
df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle_rng
df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle_rng
df["bull_candle"] = np.where(close > open_, 1.0, 0.0)
# ── RSI (14) built from scratch ──────────────────────────────────────────
rsi_period = 14
delta = close.diff()
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_g = gain.ewm(alpha=1.0 / rsi_period, min_periods=rsi_period, adjust=False).mean()
avg_l = loss.ewm(alpha=1.0 / rsi_period, min_periods=rsi_period, adjust=False).mean()
rs = avg_g / (avg_l + 1e-12)
rsi = 100.0 - (100.0 / (1.0 + rs))
df["rsi_14"] = rsi
# RSI deviation from neutral 50
df["rsi_dev"] = (rsi - 50.0) / 50.0
# ── MACD (12, 26, 9) ─────────────────────────────────────────────────────
ema12 = close.ewm(span=12, adjust=False).mean()
ema26 = close.ewm(span=26, adjust=False).mean()
macd_line = ema12 - ema26
macd_sig = macd_line.ewm(span=9, adjust=False).mean()
df["macd"] = macd_line / close
df["macd_hist"] = (macd_line - macd_sig) / close
# ── Rolling volatility (realised over 20 bars) ───────────────────────────
df["vol_20"] = df["ret_1"].rolling(20).std()
# ── ATR z-score vs 50-bar rolling mean ───────────────────────────────────
atr_mean = atr.rolling(50).mean()
atr_std = atr.rolling(50).std(ddof=0)
df["atr_z"] = (atr - atr_mean) / (atr_std + 1e-12)
# ── Volume-of-BB-touches over last 10 bars ───────────────────────────────
near_upper = (close >= bb_upper * 0.998).astype(float)
near_lower = (close <= bb_lower * 1.002).astype(float)
df["touch_upper_10"] = near_upper.rolling(10).sum()
df["touch_lower_10"] = near_lower.rolling(10).sum()
# ── SMA 50 (trend filter helper) ─────────────────────────────────────────
df["sma_50"] = close.rolling(50).mean()
df["close_vs_sma"] = (close - df["sma_50"]) / df["sma_50"]
# ── EMA cross (9 / 21) ───────────────────────────────────────────────────
ema9 = close.ewm(span=9, adjust=False).mean()
ema21 = close.ewm(span=21, adjust=False).mean()
df["ema_cross"] = (ema9 - ema21) / close
# ── Bar-of-day / session ─────────────────────────────────────────────────
if hasattr(df.index, "hour"):
df["hour_sin"] = np.sin(2 * np.pi * df.index.hour / 24.0)
df["hour_cos"] = np.cos(2 * np.pi * df.index.hour / 24.0)
else:
df["hour_sin"] = 0.0
df["hour_cos"] = 1.0
# ── Lag features on bb_pct and rsi ───────────────────────────────────────
for lag in [1, 2, 4]:
df[f"bb_pct_lag{lag}"] = df["bb_pct"].shift(lag)
df[f"rsi_14_lag{lag}"] = df["rsi_14"].shift(lag)
df[f"macd_hist_lag{lag}"] = df["macd_hist"].shift(lag)
# ── Fill NaN from warm-up ─────────────────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "USD/CAD BB Mean-Reversion + ATR XGBoost",
"model_type": "XGBClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.75,
"colsample_bytree": 0.70,
"min_child_weight": 3,
"gamma": 0.15,
"reg_alpha": 0.10,
"reg_lambda": 1.50,
"objective": "binary:logistic",
"random_state": 42,
"n_jobs": -1,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [7, 20],
"min_atr": 0.0002,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximise risk-adjusted return (Sharpe/Calmar) on USD/CAD 15-min "
"using Bollinger Band mean-reversion signals augmented by ATR, RSI, "
"MACD, and EMA-cross features fed into a regularised XGBoost classifier. "
"SL=0.5% / TP=1.0% gives a 1:2 RR floor. Conservative depth (4) and "
"strong L1/L2 regularisation prevent overfitting on a single year of data."
),
"notes": (
"BB squeeze flag and bb_z capture regime; atr_z filters noisy bars. "
"Session filter 07-20 UTC covers London + NY overlap for tighter spreads. "
"min_atr=0.0002 avoids dead-market whipsaws. Lag features on bb_pct and "
"rsi_14 give the model short-term momentum context without look-ahead."
),
}
|
||||||||||
|
🥈
|
EMA Cross 50/200 + ATR Momentum (XGBoost)
Maximize risk-adjusted return (Sharpe/Calmar) on EUR/USD 15-min data. EMA 50/200 cross provides the primary trend regime filter. ATR 14 gate…
|
D
@delta-atlas-858
|
EURUSD | 15min | 48.5%64.7% | +8.52%+12.40% | 2.412.47 | 0.83%0.83% | 6817 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-05 10:27:10
# Model : XGBoost
# Feature Eng. : EMA (50,200), ATR 14 + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/EURUSD_15min.parquet"
START_DATE = "2025-04-23"
END_DATE = "2026-04-23"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── EMA 50 and EMA 200 ──────────────────────────────────────────────────
ema_50 = close.ewm(span=50, adjust=False).mean()
ema_200 = close.ewm(span=200, adjust=False).mean()
df["ema_50"] = ema_50
df["ema_200"] = ema_200
df["dm_ema_50"] = (close - ema_50) / ema_50
df["dm_ema_200"] = (close - ema_200) / ema_200
# EMA cross signal: positive when fast > slow
df["ema_cross"] = ema_50 - ema_200
df["ema_cross_norm"] = df["ema_cross"] / ema_200
# Cross direction change (momentum of the spread)
df["ema_cross_delta"] = df["ema_cross"].diff(1)
df["ema_cross_accel"] = df["ema_cross_delta"].diff(1)
# ── ATR 14 ──────────────────────────────────────────────────────────────
prev_close = close.shift(1)
tr = pd.concat([
high - low,
(high - prev_close).abs(),
(low - prev_close).abs()
], axis=1).max(axis=1)
atr = tr.ewm(span=14, adjust=False).mean()
df["atr"] = atr
df["natr"] = atr / close
# ── Price momentum features ─────────────────────────────────────────────
for lag in [1, 2, 4, 8, 16]:
df[f"ret_{lag}"] = close.pct_change(lag)
# ── Volatility regime ───────────────────────────────────────────────────
df["atr_ratio"] = atr / atr.rolling(50).mean() # ATR vs its own MA
df["natr_ma20"] = df["natr"].rolling(20).mean()
# ── RSI 14 ──────────────────────────────────────────────────────────────
delta = close.diff(1)
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_g = gain.ewm(span=14, adjust=False).mean()
avg_l = loss.ewm(span=14, adjust=False).mean()
rs = avg_g / avg_l.replace(0, np.nan)
df["rsi_14"] = 100 - (100 / (1 + rs))
df["rsi_delta"] = df["rsi_14"].diff(1)
# ── MACD (12/26/9) ──────────────────────────────────────────────────────
ema_12 = close.ewm(span=12, adjust=False).mean()
ema_26 = close.ewm(span=26, adjust=False).mean()
macd = ema_12 - ema_26
signal = macd.ewm(span=9, adjust=False).mean()
df["macd"] = macd
df["macd_signal"] = signal
df["macd_hist"] = macd - signal
df["macd_hist_delta"] = df["macd_hist"].diff(1)
# ── Bollinger Bands (20, 2σ) ─────────────────────────────────────────────
bb_mid = close.rolling(20).mean()
bb_std = close.rolling(20).std()
bb_up = bb_mid + 2 * bb_std
bb_lo = bb_mid - 2 * bb_std
df["bb_pos"] = (close - bb_lo) / (bb_up - bb_lo).replace(0, np.nan)
df["bb_width"] = (bb_up - bb_lo) / bb_mid
# ── Stochastic %K / %D (14, 3) ──────────────────────────────────────────
low14 = low.rolling(14).min()
high14 = high.rolling(14).max()
stoch_k = 100 * (close - low14) / (high14 - low14).replace(0, np.nan)
stoch_d = stoch_k.rolling(3).mean()
df["stoch_k"] = stoch_k
df["stoch_d"] = stoch_d
df["stoch_kd"] = stoch_k - stoch_d
# ── Volume / body / wick features ───────────────────────────────────────
body = (close - open_).abs()
candle = (high - low).replace(0, np.nan)
df["body_ratio"] = body / candle
df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle
df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle
df["bull_candle"] = np.where(close > open_, 1, 0)
# ── Rolling z-score of close vs SMA 50 ──────────────────────────────────
sma_50 = close.rolling(50).mean()
sma_50_std = close.rolling(50).std()
df["zscore_50"] = (close - sma_50) / sma_50_std.replace(0, np.nan)
# ── High/Low breakout flags ──────────────────────────────────────────────
df["high_20_break"] = np.where(close > high.rolling(20).max().shift(1), 1, 0)
df["low_20_break"] = np.where(close < low.rolling(20).min().shift(1), 1, 0)
# ── Time-of-day features (cyclical encoding) ─────────────────────────────
if hasattr(df.index, 'hour'):
hour = df.index.hour + df.index.minute / 60.0
df["hour_sin"] = np.sin(2 * np.pi * hour / 24.0)
df["hour_cos"] = np.cos(2 * np.pi * hour / 24.0)
# ── Fill NaN from indicator warm-up ─────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "EMA Cross 50/200 + ATR Momentum (XGBoost)",
"model_type": "XGBClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.75,
"colsample_bytree": 0.70,
"min_child_weight": 3,
"gamma": 0.15,
"reg_alpha": 0.10,
"reg_lambda": 1.50,
"objective": "binary:logistic",
"random_state": 42,
"n_jobs": -1,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [6, 20],
"min_atr": 0.0002,
"trend_filter": "sma_50",
"target_horizon": 4,
"objective": (
"Maximize risk-adjusted return (Sharpe/Calmar) on EUR/USD 15-min data. "
"EMA 50/200 cross provides the primary trend regime filter. "
"ATR 14 gates entries by volatility (min_atr avoids dead-market noise). "
"XGBoost chosen for its ability to capture non-linear feature interactions. "
"Conservative depth=4 and regularisation (alpha/lambda) prevent overfitting "
"on the relatively short 1-year window. 2:1 reward/risk (SL=0.5%, TP=1.0%) "
"ensures positive expectancy even at modest hit-rates. Session filter 06-20 UTC "
"keeps the strategy in liquid London/NY hours only."
),
"notes": (
"Feature set combines trend (EMA cross, z-score), momentum (RSI, MACD, returns), "
"volatility (ATR ratio, BB width), and price structure (body/wick ratios, "
"stochastic). Cyclical hour encoding captures intraday seasonality without "
"introducing lookahead. bfill().ffill() handles EMA warm-up NaNs gracefully."
),
}
|
||||||||||
|
🥉
|
GBP/USD Gradient Boosting Trend + Mean-Reversion
Maximize risk-adjusted return (Sharpe/Calmar) on GBP/USD 15-min. GradientBoostingClassifier with 400 shallow trees (depth 4) and a conservat…
|
C
@candid-owl-125
|
GBPUSD | 15min | 53.5%58.0% | +3.70%+34.59% | 1.161.78 | 2.46%2.46% | 31269 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 01:41:21
# Model : Gradient Boosting
# Feature Eng. : SMA (20,50,200), BB (20,2.0), RSI 14, MACD (12,26,9), ATR 14 + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/GBPUSD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── SMA 20, 50, 200 + distance from close ──────────────────────────────
for period in [20, 50, 200]:
sma = close.rolling(period).mean()
df[f"sma_{period}"] = sma
df[f"dm_sma_{period}"] = (close - sma) / sma
# ── Bollinger Bands (20, 2) ─────────────────────────────────────────────
bb_mid = close.rolling(20).mean()
bb_std = close.rolling(20).std(ddof=0)
bb_upper = bb_mid + 2.0 * bb_std
bb_lower = bb_mid - 2.0 * bb_std
df["bb_mid"] = bb_mid
df["bb_upper"] = bb_upper
df["bb_lower"] = bb_lower
df["bb_width"] = (bb_upper - bb_lower) / bb_mid
bb_range = bb_upper - bb_lower
df["bb_pct"] = np.where(bb_range != 0, (close - bb_lower) / bb_range, 0.5)
# ── RSI 14 ─────────────────────────────────────────────────────────────
delta = close.diff()
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_gain = gain.ewm(com=13, min_periods=14, adjust=False).mean()
avg_loss = loss.ewm(com=13, min_periods=14, adjust=False).mean()
rs = np.where(avg_loss != 0, avg_gain / avg_loss, 100.0)
df["rsi_14"] = 100.0 - (100.0 / (1.0 + rs))
# ── MACD (12, 26, 9) ───────────────────────────────────────────────────
ema_12 = close.ewm(span=12, adjust=False).mean()
ema_26 = close.ewm(span=26, adjust=False).mean()
macd_line = ema_12 - ema_26
signal_line = macd_line.ewm(span=9, adjust=False).mean()
df["macd_line"] = macd_line
df["macd_signal"] = signal_line
df["macd_hist"] = macd_line - signal_line
# ── ATR 14 + NATR ──────────────────────────────────────────────────────
prev_close = close.shift(1)
tr = pd.concat([
high - low,
(high - prev_close).abs(),
(low - prev_close).abs()
], axis=1).max(axis=1)
atr = tr.ewm(com=13, min_periods=14, adjust=False).mean()
df["atr_14"] = atr
df["natr"] = np.where(close != 0, atr / close, 0.0)
# ── Price momentum (returns over multiple horizons) ────────────────────
for lag in [1, 4, 8, 16]:
df[f"ret_{lag}"] = close.pct_change(lag)
# ── Candle body & wick features ────────────────────────────────────────
body = (close - open_).abs()
candle_range = (high - low).replace(0, np.nan)
df["body_ratio"] = body / candle_range
df["upper_wick"] = np.where(
candle_range.notna(),
(high - close.combine(open_, max)) / candle_range,
0.0
)
df["lower_wick"] = np.where(
candle_range.notna(),
(close.combine(open_, min) - low) / candle_range,
0.0
)
df["candle_dir"] = np.where(close >= open_, 1.0, -1.0)
# ── Volume proxy: normalised candle range ──────────────────────────────
rolling_range = candle_range.rolling(20).mean()
df["norm_range"] = np.where(
rolling_range != 0,
(high - low) / rolling_range,
1.0
)
# ── RSI derived features ───────────────────────────────────────────────
df["rsi_ob"] = np.where(df["rsi_14"] > 70, 1.0, 0.0)
df["rsi_os"] = np.where(df["rsi_14"] < 30, 1.0, 0.0)
df["rsi_mid"] = df["rsi_14"] - 50.0
df["rsi_slope"] = df["rsi_14"].diff(3)
# ── MACD histogram slope ───────────────────────────────────────────────
df["macd_hist_slope"] = df["macd_hist"].diff(2)
df["macd_cross"] = np.where(
(df["macd_hist"] > 0) & (df["macd_hist"].shift(1) <= 0), 1.0,
np.where(
(df["macd_hist"] < 0) & (df["macd_hist"].shift(1) >= 0), -1.0,
0.0
)
)
# ── Bollinger squeeze (low volatility precursor) ───────────────────────
bb_width_ma = df["bb_width"].rolling(20).mean()
df["bb_squeeze"] = np.where(df["bb_width"] < bb_width_ma, 1.0, 0.0)
# ── SMA slope features ─────────────────────────────────────────────────
df["sma_20_slope"] = df["sma_20"].pct_change(4)
df["sma_50_slope"] = df["sma_50"].pct_change(8)
# ── Cross-SMA alignment (trend structure) ──────────────────────────────
df["sma20_above_50"] = np.where(df["sma_20"] > df["sma_50"], 1.0, 0.0)
df["sma50_above_200"] = np.where(df["sma_50"] > df["sma_200"], 1.0, 0.0)
df["close_above_200"] = np.where(close > df["sma_200"], 1.0, 0.0)
# ── Lagged close returns as additional features ────────────────────────
for lag in [1, 2, 3]:
df[f"close_lag_{lag}"] = close.shift(lag)
# ── Rolling volatility (std of returns) ────────────────────────────────
df["vol_10"] = close.pct_change().rolling(10).std()
df["vol_20"] = close.pct_change().rolling(20).std()
df["vol_ratio"] = np.where(
df["vol_20"] != 0,
df["vol_10"] / df["vol_20"],
1.0
)
# ── Hour-of-day (London/NY session proxy) ──────────────────────────────
if hasattr(df.index, "hour"):
df["hour_sin"] = np.sin(2 * np.pi * df.index.hour / 24.0)
df["hour_cos"] = np.cos(2 * np.pi * df.index.hour / 24.0)
else:
df["hour_sin"] = 0.0
df["hour_cos"] = 1.0
# ── Fill NaN from warm-up periods ──────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "GBP/USD Gradient Boosting Trend + Mean-Reversion",
"model_type": "GradientBoostingClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.8,
"min_samples_leaf": 20,
"max_features": "sqrt",
"validation_fraction": 0.1,
"n_iter_no_change": 30,
"tol": 1e-4,
"random_state": 42,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [6, 20],
"min_atr": 0.0002,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximize risk-adjusted return (Sharpe/Calmar) on GBP/USD 15-min. "
"GradientBoostingClassifier with 400 shallow trees (depth 4) and a "
"conservative learning rate of 0.04 avoids overfitting while capturing "
"non-linear interactions between trend (SMA alignment, slope), momentum "
"(MACD histogram, RSI), and volatility (ATR, BB squeeze) features. "
"SL=0.5% / TP=1.0% gives 1:2 R/R. Session filter 06-20 UTC covers "
"London open through NY close where GBP/USD liquidity is highest. "
"min_atr filter avoids flat/illiquid bars."
),
"notes": (
"Features include multi-period SMA distances, Bollinger Band pct/width, "
"RSI with overbought/oversold flags, MACD histogram slope and crossover, "
"ATR-normalised volatility, candle body/wick ratios, rolling vol ratio, "
"and hour-of-day cyclical encoding. target_horizon=4 (1-hour forward) "
"balances signal frequency against predictability at 15-min resolution."
),
}
|
||||||||||
|
7.95
|
USD/JPY BB Mean-Reversion + ATR Gradient Boost
Maximise Sharpe ratio via a Gradient Boosting classifier trained on Bollinger Band position (bb_pct), normalised bandwidth (bb_width), ATR/N…
|
R
@ratio_witch
|
USDJPY | 15min | 60.2%62.9% | +4.28%+14.16% | 1.232.07 | 2.32%2.32% | 16670 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 02:01:39
# Model : Gradient Boosting
# Feature Eng. : BB (20,2.0), ATR 14 + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/USDJPY_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── Bollinger Bands (period=20, std_dev=2.0) ──────────────────────────────
bb_period = 20
bb_std = 2.0
bb_mid = close.rolling(bb_period).mean()
bb_sigma = close.rolling(bb_period).std(ddof=0)
bb_upper = bb_mid + bb_std * bb_sigma
bb_lower = bb_mid - bb_std * bb_sigma
df["bb_mid"] = bb_mid
df["bb_upper"] = bb_upper
df["bb_lower"] = bb_lower
# bb_width: normalised band width (volatility proxy)
df["bb_width"] = (bb_upper - bb_lower) / bb_mid
# bb_pct: position of close within the band [0, 1]
band_range = bb_upper - bb_lower
df["bb_pct"] = (close - bb_lower) / band_range
# Distance from close to mid in units of band width
df["bb_dist_mid"] = (close - bb_mid) / bb_mid
# ── ATR (period=14) ───────────────────────────────────────────────────────
atr_period = 14
prev_close = close.shift(1)
tr = pd.concat([
high - low,
(high - prev_close).abs(),
(low - prev_close).abs()
], axis=1).max(axis=1)
atr = tr.ewm(span=atr_period, min_periods=atr_period, adjust=False).mean()
natr = atr / close
df["atr"] = atr
df["natr"] = natr
# ── Momentum / trend features ─────────────────────────────────────────────
# Rate of change at multiple horizons
for n in [1, 4, 8, 16]:
df[f"roc_{n}"] = close.pct_change(n)
# RSI (14)
rsi_period = 14
delta = close.diff()
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_gain = gain.ewm(span=rsi_period, min_periods=rsi_period, adjust=False).mean()
avg_loss = loss.ewm(span=rsi_period, min_periods=rsi_period, adjust=False).mean()
rs = avg_gain / (avg_loss + 1e-10)
rsi = 100 - (100 / (1 + rs))
df["rsi_14"] = rsi
# RSI derived: distance from 50 (centred, normalised)
df["rsi_dev"] = (rsi - 50) / 50
# ── MACD (12, 26, 9) ──────────────────────────────────────────────────────
ema12 = close.ewm(span=12, adjust=False).mean()
ema26 = close.ewm(span=26, adjust=False).mean()
macd_line = ema12 - ema26
macd_signal = macd_line.ewm(span=9, adjust=False).mean()
macd_hist = macd_line - macd_signal
df["macd_line"] = macd_line
df["macd_signal"] = macd_signal
df["macd_hist"] = macd_hist
# ── Trend (SMA 50) ────────────────────────────────────────────────────────
sma50 = close.rolling(50).mean()
df["sma_50"] = sma50
df["close_vs_sma50"] = (close - sma50) / sma50 # normalised distance
# ── Volume / candle structure features ────────────────────────────────────
body = (close - open_).abs()
candle_rng = high - low
df["body_ratio"] = body / (candle_rng + 1e-10) # body as fraction of range
df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / (candle_rng + 1e-10)
df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / (candle_rng + 1e-10)
df["candle_dir"] = np.where(close > open_, 1.0, -1.0) # bullish / bearish bar
# ── Lagged bb_pct & rsi (to give the model recent history) ───────────────
for lag in [1, 2, 3]:
df[f"bb_pct_lag{lag}"] = df["bb_pct"].shift(lag)
df[f"rsi_lag{lag}"] = df["rsi_14"].shift(lag)
df[f"macd_hist_lag{lag}"] = df["macd_hist"].shift(lag)
# ── Volatility regime flag ────────────────────────────────────────────────
natr_ma = natr.rolling(50).mean()
df["vol_regime"] = np.where(natr > natr_ma, 1.0, 0.0) # 1 = high-vol regime
# ── BB squeeze detection ──────────────────────────────────────────────────
bb_width_ma = df["bb_width"].rolling(50).mean()
df["bb_squeeze"] = np.where(df["bb_width"] < bb_width_ma, 1.0, 0.0)
# ── Mean-reversion signal strength ────────────────────────────────────────
# Positive → oversold (close below lower band), Negative → overbought
df["mr_signal"] = 0.5 - df["bb_pct"] # centred: +0.5 at lower band, -0.5 at upper
# ── Fill NaN from warm-up ─────────────────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "USD/JPY BB Mean-Reversion + ATR Gradient Boost",
"model_type": "GradientBoostingClassifier",
"model_params": {
"n_estimators": 500,
"max_depth": 4,
"learning_rate": 0.03,
"subsample": 0.8,
"min_samples_leaf": 20,
"max_features": "sqrt",
"validation_fraction": 0.1,
"n_iter_no_change": 30,
"tol": 1e-4,
"random_state": 42,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": None,
"min_atr": None,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximise Sharpe ratio via a Gradient Boosting classifier trained on "
"Bollinger Band position (bb_pct), normalised bandwidth (bb_width), "
"ATR/NATR, RSI, MACD histogram, candle structure, and lagged features. "
"GBM chosen for its ability to capture non-linear interactions between "
"volatility (ATR) and mean-reversion (BB) signals. n_iter_no_change "
"acts as early stopping to prevent overfitting on the 15-min USDJPY series. "
"SL=0.5% / TP=1.0% gives a 1:2 risk-reward; threshold=0.55 reduces noise trades."
),
"notes": (
"Bollinger Bands are the primary mean-reversion anchor; ATR/NATR filter "
"entries to adequate volatility bars. RSI and MACD provide momentum context "
"to avoid fading strong trends. Lagged features (up to 3 bars) give the model "
"short-term regime memory without look-ahead. vol_regime and bb_squeeze flags "
"allow the model to differentiate trending vs. ranging conditions automatically. "
"No session filter applied — USDJPY is liquid across Asian and European sessions."
),
}
|
||||||||||
|
5.21
|
GBP/USD SMA Trend Gradient Boosting Risk-Adj
Maximize risk-adjusted return (Sharpe/Calmar) on GBP/USD 15-min data. GradientBoostingClassifier chosen for its strong bias-variance tradeof…
|
R
@ratio_witch
|
GBPUSD | 15min | 43.1%46.2% | +6.85%+15.56% | 1.711.95 | 2.69%2.69% | 7213 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 01:47:56
# Model : Gradient Boosting
# Feature Eng. : SMA (20,50,200) + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/GBPUSD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── SMA features (required) ──────────────────────────────────────────────
for period in [20, 50, 200]:
sma = close.rolling(period).mean()
df[f"sma_{period}"] = sma
df[f"dm_sma_{period}"] = (close - sma) / sma
# ── SMA crossover signals ────────────────────────────────────────────────
sma_20 = close.rolling(20).mean()
sma_50 = close.rolling(50).mean()
sma_200 = close.rolling(200).mean()
df["sma_20_50_cross"] = np.where(sma_20 > sma_50, 1.0, -1.0)
df["sma_20_200_cross"] = np.where(sma_20 > sma_200, 1.0, -1.0)
df["sma_50_200_cross"] = np.where(sma_50 > sma_200, 1.0, -1.0)
# ── Price momentum features ──────────────────────────────────────────────
for lag in [1, 2, 4, 8, 16]:
df[f"ret_{lag}"] = close.pct_change(lag)
# ── Volatility: rolling std of returns ──────────────────────────────────
ret_1 = close.pct_change(1)
for window in [8, 20, 50]:
df[f"vol_{window}"] = ret_1.rolling(window).std()
# ── ATR (Average True Range) ─────────────────────────────────────────────
tr = pd.concat([
high - low,
(high - close.shift(1)).abs(),
(low - close.shift(1)).abs()
], axis=1).max(axis=1)
for atr_period in [14, 50]:
atr = tr.rolling(atr_period).mean()
df[f"atr_{atr_period}"] = atr
df[f"natr_{atr_period}"] = atr / close
# ── RSI ──────────────────────────────────────────────────────────────────
for rsi_period in [14, 28]:
delta = close.diff()
gain = delta.clip(lower=0).rolling(rsi_period).mean()
loss = (-delta.clip(upper=0)).rolling(rsi_period).mean()
rs = gain / (loss + 1e-10)
df[f"rsi_{rsi_period}"] = 100 - (100 / (1 + rs))
# ── MACD ─────────────────────────────────────────────────────────────────
ema_12 = close.ewm(span=12, adjust=False).mean()
ema_26 = close.ewm(span=26, adjust=False).mean()
macd_line = ema_12 - ema_26
signal_line = macd_line.ewm(span=9, adjust=False).mean()
df["macd"] = macd_line
df["macd_signal"] = signal_line
df["macd_hist"] = macd_line - signal_line
df["macd_hist_norm"] = (macd_line - signal_line) / (close + 1e-10)
# ── Bollinger Bands ───────────────────────────────────────────────────────
for bb_period in [20, 50]:
bb_mid = close.rolling(bb_period).mean()
bb_std = close.rolling(bb_period).std()
bb_upper = bb_mid + 2.0 * bb_std
bb_lower = bb_mid - 2.0 * bb_std
bb_width = (bb_upper - bb_lower) / (bb_mid + 1e-10)
bb_pos = (close - bb_lower) / (bb_upper - bb_lower + 1e-10)
df[f"bb_width_{bb_period}"] = bb_width
df[f"bb_pos_{bb_period}"] = bb_pos
# ── Stochastic Oscillator ────────────────────────────────────────────────
for stoch_period in [14, 28]:
lowest_low = low.rolling(stoch_period).min()
highest_high = high.rolling(stoch_period).max()
stoch_k = (close - lowest_low) / (highest_high - lowest_low + 1e-10) * 100
stoch_d = stoch_k.rolling(3).mean()
df[f"stoch_k_{stoch_period}"] = stoch_k
df[f"stoch_d_{stoch_period}"] = stoch_d
# ── Rate of Change (ROC) ──────────────────────────────────────────────────
for roc_period in [5, 10, 20]:
df[f"roc_{roc_period}"] = close.pct_change(roc_period)
# ── Candle body and shadow features ──────────────────────────────────────
body = (close - open_).abs()
candle_range = (high - low).abs()
df["body_ratio"] = body / (candle_range + 1e-10)
df["upper_shadow"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / (candle_range + 1e-10)
df["lower_shadow"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / (candle_range + 1e-10)
df["bullish_candle"] = np.where(close > open_, 1.0, -1.0)
# ── Volume-proxy: candle range as volatility proxy ────────────────────────
df["range_norm"] = candle_range / (close + 1e-10)
df["range_ma_ratio"] = candle_range / (candle_range.rolling(20).mean() + 1e-10)
# ── Lag features for return predictors ───────────────────────────────────
for col_lag in ["rsi_14", "macd_hist", "bb_pos_20"]:
for lag in [1, 2, 3]:
df[f"{col_lag}_lag{lag}"] = df[col_lag].shift(lag)
# ── Distance of close from recent high/low ────────────────────────────────
for lookback in [10, 20, 50]:
roll_high = high.rolling(lookback).max()
roll_low = low.rolling(lookback).min()
df[f"dist_high_{lookback}"] = (close - roll_high) / (roll_high + 1e-10)
df[f"dist_low_{lookback}"] = (close - roll_low) / (roll_low + 1e-10)
# ── Trend strength: ADX proxy ─────────────────────────────────────────────
adx_period = 14
tr_adx = tr.copy()
plus_dm = pd.Series(np.where((high.diff() > 0) & (high.diff() > -low.diff()), high.diff(), 0.0), index=close.index)
minus_dm = pd.Series(np.where((-low.diff() > 0) & (-low.diff() > high.diff()), -low.diff(), 0.0), index=close.index)
atr_adx = tr_adx.rolling(adx_period).mean()
plus_di = 100 * plus_dm.rolling(adx_period).mean() / (atr_adx + 1e-10)
minus_di = 100 * minus_dm.rolling(adx_period).mean() / (atr_adx + 1e-10)
dx = (100 * (plus_di - minus_di).abs() / (plus_di + minus_di + 1e-10))
df["adx"] = dx.rolling(adx_period).mean()
df["plus_di"] = plus_di
df["minus_di"] = minus_di
# ── Fill NaN from indicator warm-up ──────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "GBP/USD SMA Trend Gradient Boosting Risk-Adj",
"model_type": "GradientBoostingClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.75,
"min_samples_leaf": 20,
"max_features": "sqrt",
"n_iter_no_change": 30,
"validation_fraction": 0.1,
"tol": 1e-4,
"random_state": 42,
},
"signal_threshold": 0.57,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [6, 18],
"min_atr": 0.0002,
"trend_filter": "sma_50",
"target_horizon": 4,
"objective": (
"Maximize risk-adjusted return (Sharpe/Calmar) on GBP/USD 15-min data. "
"GradientBoostingClassifier chosen for its strong bias-variance tradeoff "
"on medium-sized tabular datasets without needing GPU. "
"Hyperparameters: moderate depth=4 prevents overfitting, learning_rate=0.04 "
"with 400 estimators balances convergence vs generalisation, subsample=0.75 "
"adds stochasticity to reduce variance, min_samples_leaf=20 enforces statistical "
"significance at each leaf. Early stopping via n_iter_no_change guards against "
"overfit on the training fold. Signal threshold 0.57 filters marginal signals "
"to improve precision. SL=0.5%, TP=1.0% gives 1:2 RR. Session filter 6-18 UTC "
"covers London+NY overlap — highest GBP/USD liquidity and tighter spreads. "
"sma_50 trend filter ensures we only trade in the direction of medium-term trend, "
"reducing whipsaw losses. target_horizon=4 bars (1 hour) gives the model enough "
"time for moves to develop while staying relevant for intraday trading."
),
"notes": (
"Features: SMA 20/50/200 with distance metrics (core requirement), RSI 14/28, "
"MACD, Bollinger Bands 20/50, Stochastic 14/28, ATR 14/50, NATR, ROC, ADX, "
"candle body/shadow ratios, lagged RSI/MACD/BB features, distance from rolling "
"high/low, SMA crossover signals, multi-lag return features. "
"All features are backward-looking only (no lookahead bias). "
"on_opposite=reverse for fast trend-following entries without missing reversals."
),
}
|
||||||||||
|
4.42
|
GBP/USD BB Squeeze Breakout (GradientBoosting)
Maximize risk-adjusted return (Sharpe / Calmar). GradientBoostingClassifier chosen for its strong performance on tabular financial data with…
|
E
@elastic-moose-350
|
GBPUSD | 15min | 53.4%55.6% | +1.03%+24.81% | 1.041.67 | 5.20%5.20% | 34854 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 01:53:28
# Model : Gradient Boosting
# Feature Eng. : BB (20,2.0), ATR 14 + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# Bollinger Bands Squeeze Breakout — GBP/USD 15-min
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/GBPUSD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── Bollinger Bands (20, 2) ──────────────────────────────────────────────
bb_period = 20
bb_std = 2.0
bb_mid = close.rolling(bb_period).mean()
bb_sigma = close.rolling(bb_period).std(ddof=0)
bb_upper = bb_mid + bb_std * bb_sigma
bb_lower = bb_mid - bb_std * bb_sigma
bb_width = (bb_upper - bb_lower) / bb_mid
bb_pct = (close - bb_lower) / (bb_upper - bb_lower)
df["bb_mid"] = bb_mid
df["bb_upper"] = bb_upper
df["bb_lower"] = bb_lower
df["bb_width"] = bb_width
df["bb_pct"] = bb_pct
# ── ATR (14) & NATR ─────────────────────────────────────────────────────
atr_period = 14
prev_close = close.shift(1)
tr = pd.concat([
high - low,
(high - prev_close).abs(),
(low - prev_close).abs()
], axis=1).max(axis=1)
atr = tr.ewm(span=atr_period, min_periods=atr_period, adjust=False).mean()
natr = atr / close
df["atr"] = atr
df["natr"] = natr
# ── Squeeze detection ────────────────────────────────────────────────────
# Squeeze = BB width is in the bottom quartile over a 50-bar lookback
bb_width_min = bb_width.rolling(50).min()
bb_width_max = bb_width.rolling(50).max()
bb_width_norm = (bb_width - bb_width_min) / (bb_width_max - bb_width_min + 1e-12)
df["bb_width_norm"] = bb_width_norm
df["squeeze"] = np.where(bb_width_norm < 0.25, 1.0, 0.0)
# Squeeze released: was in squeeze 1 bar ago, now width is expanding
bb_width_chg = bb_width.diff()
df["squeeze_release"] = np.where(
(df["squeeze"].shift(1) == 1.0) & (bb_width_chg > 0), 1.0, 0.0
)
# ── BB width momentum ────────────────────────────────────────────────────
df["bb_width_chg"] = bb_width_chg
df["bb_width_chg_2"] = bb_width.diff(2)
df["bb_width_chg_5"] = bb_width.diff(5)
# ── Price position relative to bands ─────────────────────────────────────
df["close_vs_mid"] = close - bb_mid
df["close_vs_upper"] = close - bb_upper
df["close_vs_lower"] = close - bb_lower
# ── Momentum & returns ───────────────────────────────────────────────────
df["ret_1"] = close.pct_change(1)
df["ret_3"] = close.pct_change(3)
df["ret_5"] = close.pct_change(5)
df["ret_10"] = close.pct_change(10)
df["ret_20"] = close.pct_change(20)
# ── RSI (14) ─────────────────────────────────────────────────────────────
rsi_period = 14
delta = close.diff()
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_gain = gain.ewm(span=rsi_period, min_periods=rsi_period, adjust=False).mean()
avg_loss = loss.ewm(span=rsi_period, min_periods=rsi_period, adjust=False).mean()
rs = avg_gain / (avg_loss + 1e-12)
rsi = 100.0 - 100.0 / (1.0 + rs)
df["rsi"] = rsi
# RSI divergence proxy: price makes new low/high but RSI does not
df["rsi_5_min"] = rsi.rolling(5).min()
df["close_5_min"] = close.rolling(5).min()
df["rsi_5_max"] = rsi.rolling(5).max()
df["close_5_max"] = close.rolling(5).max()
# ── MACD ─────────────────────────────────────────────────────────────────
ema_fast = close.ewm(span=12, adjust=False).mean()
ema_slow = close.ewm(span=26, adjust=False).mean()
macd_line = ema_fast - ema_slow
macd_signal = macd_line.ewm(span=9, adjust=False).mean()
macd_hist = macd_line - macd_signal
df["macd_line"] = macd_line
df["macd_signal"] = macd_signal
df["macd_hist"] = macd_hist
df["macd_hist_chg"] = macd_hist.diff()
# ── Volume-like proxy: bar range ─────────────────────────────────────────
bar_range = high - low
df["bar_range"] = bar_range
df["bar_range_norm"] = bar_range / (atr + 1e-12)
# ── Candle body direction & size ─────────────────────────────────────────
body = close - open_
df["body"] = body
df["body_norm"] = body / (atr + 1e-12)
df["body_dir"] = np.where(body > 0, 1.0, np.where(body < 0, -1.0, 0.0))
# ── Upper / lower wick ───────────────────────────────────────────────────
df["upper_wick"] = high - pd.concat([close, open_], axis=1).max(axis=1)
df["lower_wick"] = pd.concat([close, open_], axis=1).min(axis=1) - low
# ── SMA trend context ─────────────────────────────────────────────────────
sma_50 = close.rolling(50).mean()
sma_200 = close.rolling(200).mean()
df["sma_50"] = sma_50
df["sma_200"] = sma_200
df["close_vs_sma50"] = (close - sma_50) / (sma_50 + 1e-12)
df["sma50_vs_sma200"] = (sma_50 - sma_200) / (sma_200 + 1e-12)
# ── Volatility regime ────────────────────────────────────────────────────
natr_ma = natr.rolling(50).mean()
df["natr_ratio"] = natr / (natr_ma + 1e-12) # >1 = above-avg vol
# ── Mean-reversion distance ───────────────────────────────────────────────
df["z_score_20"] = (close - bb_mid) / (bb_sigma + 1e-12)
# ── Rolling realized vol ─────────────────────────────────────────────────
df["rvol_10"] = df["ret_1"].rolling(10).std()
df["rvol_20"] = df["ret_1"].rolling(20).std()
# ── ATR-normalised returns ────────────────────────────────────────────────
df["ret_1_natr"] = df["ret_1"] / (natr + 1e-12)
# ── Lagged features ───────────────────────────────────────────────────────
for lag in [1, 2, 3, 5]:
df[f"bb_pct_lag{lag}"] = bb_pct.shift(lag)
df[f"bb_width_lag{lag}"] = bb_width.shift(lag)
df[f"rsi_lag{lag}"] = rsi.shift(lag)
df[f"macd_hist_lag{lag}"] = macd_hist.shift(lag)
# ── Hour-of-day (cyclical) ────────────────────────────────────────────────
hour = pd.Series(df.index.hour, index=df.index).astype(float)
df["hour_sin"] = np.sin(2 * np.pi * hour / 24.0)
df["hour_cos"] = np.cos(2 * np.pi * hour / 24.0)
# ── Day-of-week (cyclical) ────────────────────────────────────────────────
dow = pd.Series(df.index.dayofweek, index=df.index).astype(float)
df["dow_sin"] = np.sin(2 * np.pi * dow / 5.0)
df["dow_cos"] = np.cos(2 * np.pi * dow / 5.0)
# ── Fill NaN from warm-up ────────────────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "GBP/USD BB Squeeze Breakout (GradientBoosting)",
"model_type": "GradientBoostingClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.75,
"min_samples_leaf": 20,
"min_samples_split": 40,
"max_features": "sqrt",
"n_iter_no_change": 30,
"validation_fraction": 0.1,
"tol": 1e-4,
"random_state": 42,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [6, 20],
"min_atr": 0.0002,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximize risk-adjusted return (Sharpe / Calmar). "
"GradientBoostingClassifier chosen for its strong performance on "
"tabular financial data with noisy labels. Shallow trees (max_depth=4) "
"with shrinkage (lr=0.04) and subsample=0.75 reduce overfitting. "
"Early stopping (n_iter_no_change=30) prevents over-training. "
"SL=0.5%, TP=1.0% gives a 1:2 risk/reward ratio. "
"Session filter 06-20 UTC captures London + New York overlap for GBP/USD."
),
"notes": (
"Core signal: BB squeeze (narrow band width) followed by expansion "
"breakout, confirmed by MACD histogram direction and RSI. "
"ATR filter ensures minimum volatility for entries. "
"Lagged BB features capture the squeeze build-up dynamic. "
"Z-score and normalized returns give the model mean-reversion context. "
"Cyclical time features allow the model to learn intraday seasonality."
),
}
|
||||||||||
|
3.97
|
NZD/USD RSI-MACD Gradient Boost Risk-Adjusted
Maximize risk-adjusted return (Sharpe/Calmar) using a deep GradientBoostingClassifier with many slow-learning trees and aggressive regularis…
|
S
@silver-bull-130
|
NZDUSD | 15min | 60.9%0.0% | +18.36%+0.00% | 1.35— | 3.80%3.80% | 7320 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 01:35:33
# Model : Gradient Boosting
# Feature Eng. : RSI 14, MACD (12,26,9) + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/NZDUSD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── RSI 14 ──────────────────────────────────────────────────────────────
delta = close.diff()
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_gain = gain.ewm(alpha=1/14, min_periods=14, adjust=False).mean()
avg_loss = loss.ewm(alpha=1/14, min_periods=14, adjust=False).mean()
rs = avg_gain / avg_loss.replace(0, np.nan)
df["rsi_14"] = 100 - (100 / (1 + rs))
# RSI derived signals
df["rsi_ob"] = np.where(df["rsi_14"] > 70, 1, 0) # overbought flag
df["rsi_os"] = np.where(df["rsi_14"] < 30, 1, 0) # oversold flag
df["rsi_mid"] = df["rsi_14"] - 50 # centred
df["rsi_slope"] = df["rsi_14"].diff(3) # momentum of RSI
df["rsi_accel"] = df["rsi_slope"].diff(2) # acceleration
# RSI regime: above/below 50
df["rsi_bull"] = np.where(df["rsi_14"] > 50, 1, -1)
# ── MACD (12, 26, 9) ────────────────────────────────────────────────────
ema12 = close.ewm(span=12, adjust=False).mean()
ema26 = close.ewm(span=26, adjust=False).mean()
macd_line = ema12 - ema26
signal_line = macd_line.ewm(span=9, adjust=False).mean()
macd_hist = macd_line - signal_line
df["macd_line"] = macd_line
df["macd_signal"] = signal_line
df["macd_hist"] = macd_hist
# MACD derived
df["macd_cross"] = np.where(macd_line > signal_line, 1, -1)
df["macd_hist_sign"] = np.where(macd_hist > 0, 1, -1)
df["macd_hist_chg"] = macd_hist.diff(1) # histogram change
df["macd_hist_accel"]= df["macd_hist_chg"].diff(1) # second derivative
df["macd_zero_cross"]= np.where(macd_line > 0, 1, -1)
# ── ATR 14 ──────────────────────────────────────────────────────────────
tr = pd.concat([
high - low,
(high - close.shift(1)).abs(),
(low - close.shift(1)).abs()
], axis=1).max(axis=1)
atr14 = tr.ewm(alpha=1/14, min_periods=14, adjust=False).mean()
df["atr_14"] = atr14
df["natr_14"] = atr14 / close # normalised ATR
df["atr_ratio"]= atr14 / atr14.rolling(50).mean() # current vs recent vol
# ── Volatility regime ───────────────────────────────────────────────────
df["vol_high"] = np.where(df["natr_14"] > df["natr_14"].rolling(100).median(), 1, 0)
# ── Price momentum ──────────────────────────────────────────────────────
df["ret_1"] = close.pct_change(1)
df["ret_3"] = close.pct_change(3)
df["ret_8"] = close.pct_change(8)
df["ret_16"] = close.pct_change(16)
# Scaled by ATR so the model sees normalised moves
df["ret_1_atr"] = df["ret_1"] / (atr14 / close).replace(0, np.nan)
df["ret_3_atr"] = df["ret_3"] / (atr14 / close).replace(0, np.nan)
df["ret_8_atr"] = df["ret_8"] / (atr14 / close).replace(0, np.nan)
# ── EMAs & trend structure ───────────────────────────────────────────────
ema8 = close.ewm(span=8, adjust=False).mean()
ema21 = close.ewm(span=21, adjust=False).mean()
ema50 = close.ewm(span=50, adjust=False).mean()
ema100= close.ewm(span=100,adjust=False).mean()
df["ema8_21_spread"] = (ema8 - ema21) / close
df["ema21_50_spread"]= (ema21 - ema50) / close
df["ema50_100_spread"]= (ema50 - ema100) / close
df["price_vs_ema21"] = (close - ema21) / close
df["price_vs_ema50"] = (close - ema50) / close
df["trend_align"] = np.where(
(ema8 > ema21) & (ema21 > ema50), 1,
np.where((ema8 < ema21) & (ema21 < ema50), -1, 0)
)
# ── Bollinger Bands (20, 2) ──────────────────────────────────────────────
bb_mid = close.rolling(20).mean()
bb_std = close.rolling(20).std(ddof=0)
bb_up = bb_mid + 2 * bb_std
bb_lo = bb_mid - 2 * bb_std
bb_bw = (bb_up - bb_lo) / bb_mid # bandwidth
bb_pct = (close - bb_lo) / (bb_up - bb_lo) # %B
df["bb_pct"] = bb_pct
df["bb_bw"] = bb_bw
df["bb_bw_ratio"] = bb_bw / bb_bw.rolling(50).mean() # squeeze detector
df["bb_upper_touch"] = np.where(close >= bb_up, 1, 0)
df["bb_lower_touch"] = np.where(close <= bb_lo, 1, 0)
# ── Stochastic %K %D (14, 3) ────────────────────────────────────────────
lo14 = low.rolling(14).min()
hi14 = high.rolling(14).max()
stoch_k = 100 * (close - lo14) / (hi14 - lo14).replace(0, np.nan)
stoch_d = stoch_k.rolling(3).mean()
df["stoch_k"] = stoch_k
df["stoch_d"] = stoch_d
df["stoch_kd_diff"]= stoch_k - stoch_d
df["stoch_ob"] = np.where(stoch_k > 80, 1, 0)
df["stoch_os"] = np.where(stoch_k < 20, 1, 0)
# ── Candle structure ────────────────────────────────────────────────────
body = (close - open_).abs()
candle_rng= (high - low).replace(0, np.nan)
df["body_ratio"] = body / candle_rng # body vs full range
df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle_rng
df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle_rng
df["candle_dir"] = np.where(close > open_, 1, -1)
df["candle_dir_3"] = df["candle_dir"].rolling(3).sum() # short-term bias
# ── Volume-less momentum oscillator (Williams %R 14) ───────────────────
df["williams_r"] = -100 * (hi14 - close) / (hi14 - lo14).replace(0, np.nan)
# ── RSI x MACD composite signal ─────────────────────────────────────────
df["rsi_macd_bull"] = np.where(
(df["rsi_14"] > 50) & (macd_hist > 0), 1,
np.where((df["rsi_14"] < 50) & (macd_hist < 0), -1, 0)
)
# ── Divergence proxy: price vs RSI direction (3-bar) ────────────────────
price_dir3 = np.sign(close.diff(3))
rsi_dir3 = np.sign(df["rsi_14"].diff(3))
df["rsi_div"] = np.where(price_dir3 != rsi_dir3, 1, 0)
# ── Mean-reversion signal: distance from 50-bar mean normalised by ATR ──
sma50 = close.rolling(50).mean()
df["zscore_50"] = (close - sma50) / (close.rolling(50).std(ddof=0).replace(0, np.nan))
df["mean_rev_long"] = np.where(df["zscore_50"] < -1.5, 1, 0)
df["mean_rev_short"] = np.where(df["zscore_50"] > 1.5, 1, 0)
# ── Interaction features ─────────────────────────────────────────────────
df["rsi_bb_pct"] = df["rsi_14"] * df["bb_pct"]
df["macd_hist_rsi_mid"] = df["macd_hist"] * df["rsi_mid"]
df["stoch_rsi"] = df["stoch_k"] * df["rsi_14"] / 1e4 # normalised product
# ── Lag features (avoid lookahead) ──────────────────────────────────────
for lag in [1, 2, 4, 8]:
df[f"rsi_lag{lag}"] = df["rsi_14"].shift(lag)
df[f"macd_hist_lag{lag}"] = df["macd_hist"].shift(lag)
df[f"ret_lag{lag}"] = df["ret_1"].shift(lag)
# ── Hour-of-day & day-of-week cyclic encoding ───────────────────────────
if hasattr(df.index, "hour"):
hour = df.index.hour
dow = df.index.dayofweek
df["hour_sin"] = np.sin(2 * np.pi * hour / 24)
df["hour_cos"] = np.cos(2 * np.pi * hour / 24)
df["dow_sin"] = np.sin(2 * np.pi * dow / 5)
df["dow_cos"] = np.cos(2 * np.pi * dow / 5)
# ── Final fill ───────────────────────────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "NZD/USD RSI-MACD Gradient Boost Risk-Adjusted",
"model_type": "GradientBoostingClassifier",
"model_params": {
"n_estimators": 600,
"max_depth": 4,
"learning_rate": 0.03,
"subsample": 0.75,
"max_features": "sqrt",
"min_samples_leaf": 20,
"min_samples_split":40,
"warm_start": False,
},
"signal_threshold": 0.56,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [21, 21],
"min_atr": 0.0002,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximize risk-adjusted return (Sharpe/Calmar) using a deep "
"GradientBoostingClassifier with many slow-learning trees and "
"aggressive regularisation (min_samples_leaf=20, subsample=0.75). "
"Feature set deliberately differs from prior RSI+BB+Stoch attempts "
"by adding: ATR-normalised returns, z-score mean-reversion signals, "
"RSI divergence proxy, Williams %R, candle structure ratios, cyclic "
"time encoding, and interaction/lag features to give the model richer "
"multi-timeframe context. SL=0.5%/TP=1% gives 1:2 RR aligned with "
"maximising Sharpe."
),
"notes": (
"Prior PF=1.35 / ret=+18.36% used standard RSI+MACD+BB+Stoch without "
"ATR normalisation or divergence detection. This version adds z-score "
"mean-reversion context, candle structure, and temporal encoding to "
"reduce false positives. session_filter=[21,21] is intentionally "
"narrow — set to None if you want 24h coverage. min_atr=0.0002 "
"avoids dead-market signals."
),
}
|
||||||||||
|
3.95
|
USD/JPY BB Squeeze Breakout (GBM)
Maximize risk-adjusted return (Sharpe). GradientBoostingClassifier chosen for strong performance on tabular financial data with moderate fea…
|
V
@vol_drifter
|
USDJPY | 15min | 60.7%74.1% | +1.15%+7.28% | 1.062.29 | 3.13%3.13% | 20127 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 02:57:20
# Model : Gradient Boosting
# Feature Eng. : BB (20,2.0), ATR 14 + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# Bollinger Bands Squeeze Breakout Strategy — USD/JPY 15-min
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/USDJPY_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── Bollinger Bands (20, 2) ──────────────────────────────────────────────
bb_period = 20
bb_std = 2.0
bb_mid = close.rolling(bb_period).mean()
bb_sigma = close.rolling(bb_period).std(ddof=0)
bb_upper = bb_mid + bb_std * bb_sigma
bb_lower = bb_mid - bb_std * bb_sigma
df["bb_mid"] = bb_mid
df["bb_upper"] = bb_upper
df["bb_lower"] = bb_lower
# Band width and %B — required features
df["bb_width"] = (bb_upper - bb_lower) / bb_mid
df["bb_pct"] = (close - bb_lower) / (bb_upper - bb_lower)
# ── ATR 14 & NATR ────────────────────────────────────────────────────────
atr_period = 14
prev_close = close.shift(1)
tr = pd.concat([
high - low,
(high - prev_close).abs(),
(low - prev_close).abs()
], axis=1).max(axis=1)
atr = tr.ewm(span=atr_period, min_periods=atr_period, adjust=False).mean()
natr = atr / close
df["atr"] = atr
df["natr"] = natr
# ── Squeeze detection ────────────────────────────────────────────────────
# Keltner Channel (EMA20 ± 1.5 × ATR) for squeeze comparison
kc_mid = close.ewm(span=20, adjust=False).mean()
kc_upper = kc_mid + 1.5 * atr
kc_lower = kc_mid - 1.5 * atr
df["squeeze"] = np.where(
(bb_upper < kc_upper) & (bb_lower > kc_lower), 1.0, 0.0
)
# Rolling squeeze count (bars in squeeze over last 10 bars)
df["squeeze_count"] = (
df["squeeze"].rolling(10).sum()
)
# Band-width z-score (how compressed is the width vs recent history)
bw_mean = df["bb_width"].rolling(50).mean()
bw_std = df["bb_width"].rolling(50).std(ddof=0)
df["bb_width_zscore"] = (df["bb_width"] - bw_mean) / (bw_std + 1e-10)
# ── Breakout momentum ────────────────────────────────────────────────────
# Price distance from bands, normalised by ATR
df["dist_upper"] = (close - bb_upper) / (atr + 1e-10)
df["dist_lower"] = (close - bb_lower) / (atr + 1e-10)
df["dist_mid"] = (close - bb_mid) / (atr + 1e-10)
# ── Rate of change ────────────────────────────────────────────────────────
for n in [1, 4, 8, 16]:
df[f"roc_{n}"] = close.pct_change(n)
# ── RSI 14 ───────────────────────────────────────────────────────────────
delta = close.diff()
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_g = gain.ewm(span=14, min_periods=14, adjust=False).mean()
avg_l = loss.ewm(span=14, min_periods=14, adjust=False).mean()
rs = avg_g / (avg_l + 1e-10)
df["rsi_14"] = 100.0 - 100.0 / (1.0 + rs)
# RSI normalised to [-1, 1]
df["rsi_norm"] = (df["rsi_14"] - 50.0) / 50.0
# ── Momentum / trend context ──────────────────────────────────────────────
ema_9 = close.ewm(span=9, adjust=False).mean()
ema_21 = close.ewm(span=21, adjust=False).mean()
ema_50 = close.ewm(span=50, adjust=False).mean()
df["ema_9_21_diff"] = (ema_9 - ema_21) / (atr + 1e-10)
df["ema_21_50_diff"] = (ema_21 - ema_50) / (atr + 1e-10)
# Price position relative to EMAs
df["close_vs_ema9"] = (close - ema_9) / (atr + 1e-10)
df["close_vs_ema50"] = (close - ema_50) / (atr + 1e-10)
# ── Volume-proxy: ATR velocity ────────────────────────────────────────────
df["atr_roc"] = atr.pct_change(4)
# ── MACD-style oscillator ─────────────────────────────────────────────────
macd_line = close.ewm(span=12, adjust=False).mean() - close.ewm(span=26, adjust=False).mean()
macd_signal = macd_line.ewm(span=9, adjust=False).mean()
df["macd_hist"] = (macd_line - macd_signal) / (atr + 1e-10)
# ── Stochastic %K (14) ────────────────────────────────────────────────────
low_14 = low.rolling(14).min()
high_14 = high.rolling(14).max()
df["stoch_k"] = (close - low_14) / (high_14 - low_14 + 1e-10)
# ── Candle body / wick features ───────────────────────────────────────────
body = (close - open_).abs()
candle_range = (high - low).replace(0, np.nan)
df["body_ratio"] = body / candle_range
df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / (candle_range + 1e-10)
df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / (candle_range + 1e-10)
df["bull_candle"] = np.where(close > open_, 1.0, 0.0)
# ── Lagged bb_pct and bb_width ────────────────────────────────────────────
for lag in [1, 2, 4]:
df[f"bb_pct_lag{lag}"] = df["bb_pct"].shift(lag)
df[f"bb_width_lag{lag}"] = df["bb_width"].shift(lag)
# ── Band width momentum (is it expanding?) ────────────────────────────────
df["bb_width_chg1"] = df["bb_width"].diff(1)
df["bb_width_chg4"] = df["bb_width"].diff(4)
# ── Hour / session features ───────────────────────────────────────────────
df["hour_sin"] = np.sin(2 * np.pi * df.index.hour / 24)
df["hour_cos"] = np.cos(2 * np.pi * df.index.hour / 24)
df["dow_sin"] = np.sin(2 * np.pi * df.index.dayofweek / 5)
df["dow_cos"] = np.cos(2 * np.pi * df.index.dayofweek / 5)
# ── Fill NaN from warm-up ─────────────────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "USD/JPY BB Squeeze Breakout (GBM)",
"model_type": "GradientBoostingClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.75,
"min_samples_leaf": 20,
"max_features": "sqrt",
"validation_fraction": 0.1,
"n_iter_no_change": 30,
"tol": 1e-4,
"random_state": 42,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [6, 20],
"min_atr": 0.0003,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximize risk-adjusted return (Sharpe). "
"GradientBoostingClassifier chosen for strong performance on tabular "
"financial data with moderate feature counts. Deeper ensemble (400 "
"estimators, depth 4) with early stopping captures non-linear BB "
"squeeze patterns. Subsample=0.75 and sqrt features reduce overfitting. "
"SL 0.5% / TP 1.0% gives 1:2 R:R ratio. Session filter 06-20 UTC covers "
"London + NY sessions where USD/JPY liquidity is highest."
),
"notes": (
"Core signal: BB squeeze (narrow band width inside Keltner Channel) "
"followed by band expansion. Features include band width z-score, "
"breakout direction (dist_upper/lower), RSI, MACD histogram, "
"stochastic %K, EMA spreads, candle structure, and lagged BB features. "
"NATR used as min_atr filter to avoid low-volatility noise trades. "
"Horizon=4 bars (1 hour on 15-min data) aligns with typical "
"post-squeeze expansion duration."
),
}
|
||||||||||
|
2.10
|
USD/CAD BB + ATR Gradient Boosting Mean-Rev
Maximize risk-adjusted return (Sharpe/Calmar) on USD/CAD 15-min data. GradientBoostingClassifier chosen for strong generalisation on noisy F…
|
S
@silver-bull-130
|
USDCAD | 15min | 62.6%56.8% | +2.56%+4.87% | 1.151.33 | 1.75%1.75% | 35644 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 01:50:17
# Model : Gradient Boosting
# Feature Eng. : BB (20,2.0), ATR 14 + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/USDCAD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── Bollinger Bands (20, 2) ──────────────────────────────────────────────
bb_period = 20
bb_std = 2.0
bb_mid = close.rolling(bb_period).mean()
bb_std_s = close.rolling(bb_period).std(ddof=0)
bb_upper = bb_mid + bb_std * bb_std_s
bb_lower = bb_mid - bb_std * bb_std_s
df["bb_mid"] = bb_mid
df["bb_upper"] = bb_upper
df["bb_lower"] = bb_lower
df["bb_width"] = (bb_upper - bb_lower) / bb_mid
df["bb_pct"] = (close - bb_lower) / (bb_upper - bb_lower)
# ── ATR (14) & Normalised ATR ────────────────────────────────────────────
atr_period = 14
prev_close = close.shift(1)
tr = pd.concat([
high - low,
(high - prev_close).abs(),
(low - prev_close).abs()
], axis=1).max(axis=1)
atr = tr.ewm(span=atr_period, adjust=False).mean()
natr = atr / close
df["atr"] = atr
df["natr"] = natr
# ── RSI (14) ─────────────────────────────────────────────────────────────
rsi_period = 14
delta = close.diff()
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_gain = gain.ewm(span=rsi_period, adjust=False).mean()
avg_loss = loss.ewm(span=rsi_period, adjust=False).mean()
rs = avg_gain / avg_loss.replace(0, np.nan)
df["rsi"] = 100 - (100 / (1 + rs))
# ── MACD (12, 26, 9) ─────────────────────────────────────────────────────
ema12 = close.ewm(span=12, adjust=False).mean()
ema26 = close.ewm(span=26, adjust=False).mean()
macd_line = ema12 - ema26
macd_signal = macd_line.ewm(span=9, adjust=False).mean()
df["macd"] = macd_line
df["macd_sig"] = macd_signal
df["macd_hist"]= macd_line - macd_signal
# ── SMA filters (50, 200) ────────────────────────────────────────────────
df["sma_20"] = close.rolling(20).mean()
df["sma_50"] = close.rolling(50).mean()
df["sma_200"] = close.rolling(200).mean()
# Price relative to moving averages
df["close_vs_sma20"] = (close - df["sma_20"]) / df["sma_20"]
df["close_vs_sma50"] = (close - df["sma_50"]) / df["sma_50"]
df["close_vs_sma200"] = (close - df["sma_200"]) / df["sma_200"]
# ── Price momentum / returns ─────────────────────────────────────────────
df["ret_1"] = close.pct_change(1)
df["ret_4"] = close.pct_change(4)
df["ret_8"] = close.pct_change(8)
df["ret_16"] = close.pct_change(16)
df["ret_32"] = close.pct_change(32)
# ── Candle body & wick features ──────────────────────────────────────────
body = (close - open_).abs()
candle_range = (high - low).replace(0, np.nan)
df["body_ratio"] = body / candle_range
df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle_range
df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle_range
df["close_dir"] = np.sign(close - open_)
# ── Volatility regime ────────────────────────────────────────────────────
df["vol_ratio"] = natr / natr.rolling(50).mean() # ATR vs its own average
df["bb_squeeze"] = np.where(df["bb_width"] < df["bb_width"].rolling(50).quantile(0.25), 1.0, 0.0)
# ── Stochastic %K / %D (14, 3) ───────────────────────────────────────────
low14 = low.rolling(14).min()
high14 = high.rolling(14).max()
stoch_k = 100 * (close - low14) / (high14 - low14).replace(0, np.nan)
stoch_d = stoch_k.rolling(3).mean()
df["stoch_k"] = stoch_k
df["stoch_d"] = stoch_d
# ── Rate-of-change ───────────────────────────────────────────────────────
df["roc_10"] = (close - close.shift(10)) / close.shift(10)
# ── Rolling z-score of close (20-bar) ────────────────────────────────────
roll_mean = close.rolling(20).mean()
roll_std = close.rolling(20).std(ddof=0).replace(0, np.nan)
df["zscore_20"] = (close - roll_mean) / roll_std
# ── Volume-related (if volume column exists) ─────────────────────────────
if "volume" in df.columns and df["volume"].sum() > 0:
vol_ma = df["volume"].rolling(20).mean().replace(0, np.nan)
df["vol_ratio_20"] = df["volume"] / vol_ma
# ── Fill NaNs from warm-up ───────────────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "USD/CAD BB + ATR Gradient Boosting Mean-Rev",
"model_type": "GradientBoostingClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.8,
"min_samples_leaf": 20,
"max_features": "sqrt",
"validation_fraction": 0.1,
"n_iter_no_change": 30,
"tol": 1e-4,
"random_state": 42,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [7, 20],
"min_atr": 0.0002,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximize risk-adjusted return (Sharpe/Calmar) on USD/CAD 15-min data. "
"GradientBoostingClassifier chosen for strong generalisation on noisy FX "
"price data; moderate depth (4) and learning rate (0.04) with early stopping "
"prevent overfitting. Features: Bollinger Bands (mean-reversion signal via "
"bb_pct and bb_width), ATR/NATR (volatility filter), RSI, MACD, Stochastic, "
"z-score, momentum returns, and candle-body ratios. 2:1 R:R (SL 0.5%, TP 1.0%) "
"with session filter (07-20 UTC) to avoid illiquid overnight hours."
),
"notes": (
"session_filter [7,20] captures London + NY overlap on USD/CAD. "
"min_atr 0.0002 avoids flat/choppy markets. on_opposite=reverse ensures "
"the model flips direction quickly when sentiment changes. "
"target_horizon=4 bars (1 hour) aligns with typical intraday FX moves."
),
}
|
||||||||||
|
1.93
|
EMA(9/21) trend
|
M
@malcolmtan
|
EMA(9/ | 50.0%— | +0.90%— | 2.15— | 0.39%0.39% | 10— |
|
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-25 02:27:36
# Model : XGBoost
# Feature Eng. : go long when EMA(9) crosses above EMA(21), exit when it crosses back below + Auto-add features: ON
# Signal / Entry : —
# Optimization : —
# Risk Mgmt : —
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
# ── Inlined strategy_utils ──
"""
strategy_utils.py — Standard utility functions for generated strategies.
Claude imports these instead of writing boilerplate from scratch.
This ensures consistent behavior across all generated strategies.
"""
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
# Max backtest window per timeframe. A finer timeframe over a longer window
# blows up the results dict / parquet load / Modal train time (the 2026-05-12
# OOM was a 1-min × multi-year sweep) — and a 1-min strategy gains nothing from
# 2 years of 1-min bars. Enforced HERE because every training path (UI / API /
# Modal) funnels through run_strategy → load_ohlc. Env-overridable so a future
# "max plan" / dedicated-server tier can lift it.
_TF_MAX_DAYS = {
"1min": 30,
"5min": 90,
"15min": 365,
"1h": 730,
}
def _fetch_ohlc_from_internal(symbol: str, tf: str, start: str, end: str):
"""Phase 3.2: fetch parquet bytes from Server A's /internal/ohlc endpoint
instead of reading a local file. Used inside Modal containers / Mac worker
pool (Phase 3.4) so every train sees the same source of truth as the chart.
Returns: pd.DataFrame (parquet decoded), or raises on any failure so the
caller can fall back / surface a clear error in the job.
"""
import hashlib as _hashlib, hmac as _hmac, io as _io, os as _os
import urllib.request as _ur, urllib.parse as _urp
base = (_os.environ.get("QM_INTERNAL_OHLC_BASE") or "").rstrip("/")
secret = (_os.environ.get("INTERNAL_WS_SECRET") or "").strip()
if not base:
raise RuntimeError("QM_INTERNAL_OHLC_BASE not set")
if not secret:
raise RuntimeError("INTERNAL_WS_SECRET not set")
msg = f"{symbol}|{tf}|{start}|{end}".encode("utf-8")
sig = _hmac.new(secret.encode("utf-8"), msg, _hashlib.sha256).hexdigest()
qs = _urp.urlencode({
"symbol": symbol, "tf": tf,
"start": start, "end": end, "sig": sig,
})
url = f"{base}/internal/ohlc?{qs}"
req = _ur.Request(url, headers={"User-Agent": "qm-worker/1.0"})
with _ur.urlopen(req, timeout=30) as resp:
if resp.status != 200:
raise RuntimeError(f"/internal/ohlc returned {resp.status}")
payload = resp.read()
print(f"[load_ohlc:internal] {symbol} {tf} fetched {len(payload)} bytes", flush=True)
return pd.read_parquet(_io.BytesIO(payload))
def _parse_symbol_tf_from_path(data_path: str):
"""Pull SYMBOL + TF out of a path like .../EURUSD_1min.parquet."""
import os as _os, re as _re
base = _os.path.basename(str(data_path))
m = _re.match(r"^([A-Z]{6})_(\d+min|\d+h)\.parquet$", base)
if not m:
return None, None
return m.group(1), m.group(2)
def load_ohlc(data_path, start_date="", end_date=""):
"""Load OHLC parquet, sort index, filter dates. Always returns consistent format.
The lower bound is clamped per timeframe (see _TF_MAX_DAYS) — a request for
more history than the cap silently starts later.
Phase 3.2: when env QM_USE_INTERNAL_OHLC=="1", fetch over HTTP from
Server A's /internal/ohlc endpoint instead of pd.read_parquet on a local
file (which on Modal is a stale Volume snapshot). The endpoint applies the
same day-cap, so the local cap-check below is a defensive no-op in that
path. Flag defaults to "0" → unchanged behavior.
Returns: (df, close, open_, high, low)
"""
import os as _os, re as _re
_use_internal = _os.environ.get("QM_USE_INTERNAL_OHLC", "0") == "1"
if _use_internal:
_sym, _tf = _parse_symbol_tf_from_path(data_path)
if not _sym or not _tf:
raise RuntimeError(
f"QM_USE_INTERNAL_OHLC=1 but DATA_PATH basename does not match "
f"SYMBOL_TF.parquet: {data_path}"
)
df = _fetch_ohlc_from_internal(_sym, _tf, start_date or "", end_date or "")
else:
df = pd.read_parquet(data_path)
df.index = pd.to_datetime(df.index)
df = df.sort_index()
# Per-timeframe window cap (timeframe inferred from the parquet filename).
_m = _re.search(r"_(\d+min|\d+h)\.parquet$", _os.path.basename(str(data_path)))
_tf = _m.group(1) if _m else None
_max_days = _TF_MAX_DAYS.get(_tf)
if _max_days and _max_days > 0 and len(df):
_env_override = _os.environ.get(f"QM_MAX_DAYS_{_tf.upper()}")
if _env_override and _env_override.isdigit():
_max_days = int(_env_override)
try:
_eff_end = pd.Timestamp(end_date) if end_date else df.index.max()
_eff_end = min(_eff_end, df.index.max())
_floor = _eff_end - pd.Timedelta(days=_max_days)
_req_start = pd.Timestamp(start_date) if start_date else df.index.min()
if _req_start < _floor:
print(f"[load_ohlc] {_tf} backtest window capped to {_max_days}d: "
f"start {_req_start.date()} -> {_floor.date()}", flush=True)
start_date = _floor
except Exception as _e:
print(f"[load_ohlc] window-cap check skipped ({_e})", flush=True)
if start_date:
df = df[df.index >= start_date]
if end_date:
df = df[df.index <= end_date]
return df, df["close"], df["open"], df["high"], df["low"]
def make_target(close, horizon=4):
"""Create target: direction N bars ahead. Default 4 bars = 1 hour on 15-min data.
Returns: target (pd.Series of -1, 0, 1)
"""
return np.sign(close.shift(-horizon) - close)
def split_data(df, target, feature_cols, train_split=0.7, validation_date=""):
"""Train/test split. Handles both ratio and date-based splits.
Drops NaN from target before splitting. Encodes labels to [0,1,2].
Returns: dict with keys:
X_train, X_test, y_train, y_test,
y_train_enc, y_test_enc, enc,
close_train, close_test,
split_idx, split_dt, n_train, n_test
"""
# Drop NaN from target
mask = target.notna()
df = df[mask].copy()
target = target[mask]
close = df["close"]
# Build feature matrix
X = df[feature_cols].copy()
X = X.bfill().ffill()
X = X.replace([np.inf, -np.inf], np.nan).fillna(0.0)
# Split
if validation_date:
split_idx = len(df[df.index <= validation_date])
else:
split_idx = int(len(df) * train_split)
split_idx = max(1, min(split_idx, len(df) - 1))
X_train = X.iloc[:split_idx]
X_test = X.iloc[split_idx:]
y_train = target.iloc[:split_idx]
y_test = target.iloc[split_idx:]
close_train = close.iloc[:split_idx]
close_test = close.iloc[split_idx:]
split_dt = str(df.index[split_idx])
# Label encoding — always fit on [-1, 0, 1]
enc = LabelEncoder()
enc.fit([-1, 0, 1])
y_train_enc = enc.transform(y_train)
y_test_enc = enc.transform(y_test)
return {
"df": df, "X_train": X_train, "X_test": X_test,
"y_train": y_train, "y_test": y_test,
"y_train_enc": y_train_enc, "y_test_enc": y_test_enc,
"enc": enc,
"close": close, "close_train": close_train, "close_test": close_test,
"split_idx": split_idx, "split_dt": split_dt,
"n_train": len(X_train), "n_test": len(X_test),
}
def compute_overlays(close, df_index):
"""Compute BB and MA overlays on full dataset. Always consistent.
Returns: (bb_dict, ma_dict)
"""
bb_mid = close.rolling(20).mean()
bb_std = close.rolling(20).std()
bb_upper = bb_mid + 2 * bb_std
bb_lower = bb_mid - 2 * bb_std
ma50 = close.rolling(50).mean()
ma100 = close.rolling(100).mean()
ma200 = close.rolling(200).mean()
def _safe(s):
s = s.reindex(df_index).bfill().ffill()
return [float(x) if (x is not None and not np.isnan(x) and not np.isinf(x)) else None
for x in s.values]
bb = {"upper": _safe(bb_upper), "mid": _safe(bb_mid), "lower": _safe(bb_lower)}
ma = {"ma50": _safe(ma50), "ma100": _safe(ma100), "ma200": _safe(ma200)}
return bb, ma
def run_backtest(signal, close, capital=10000, cost=2e-5):
"""Run backtest with transaction costs.
Uses price-based trade returns (same as webapp _compute_trades).
Signal 0 = hold (keep current position), not close.
Returns: dict with equity, trade_returns, long_returns, short_returns, bar_returns
"""
sig_arr = signal.values
price_arr = close.values
idx = signal.index
n = len(price_arr)
# Trade returns — price-based (matches webapp _compute_trades exactly)
trade_returns = []
long_returns = []
short_returns = []
trade_log = []
last_dir = None
entry_price = None
entry_bar = None
for i in range(n):
s = sig_arr[i]
c = price_arr[i]
if s != 0.0 and s != last_dir:
# Direction change — close previous trade, open new
if last_dir is not None and entry_price is not None and entry_price != 0:
ret = float(last_dir * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if last_dir == 1:
long_returns.append(ret)
else:
short_returns.append(ret)
trade_log.append({
"type": "Buy" if last_dir == 1 else "Sell",
"entry_time": str(idx[entry_bar]),
"exit_time": str(idx[i]),
"entry_price": round(entry_price, 5),
"exit_price": round(c, 5),
"pnl": round(last_dir * (c - entry_price), 5),
"pnl_pct": round(ret * 100, 3),
"exit_reason": "signal",
})
entry_price = c
entry_bar = i
last_dir = s
# Close last open trade
if last_dir is not None and entry_price is not None and n > 0 and entry_price != 0:
c = price_arr[-1]
ret = float(last_dir * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if last_dir == 1:
long_returns.append(ret)
else:
short_returns.append(ret)
trade_log.append({
"type": "Buy" if last_dir == 1 else "Sell",
"entry_time": str(idx[entry_bar]),
"exit_time": str(idx[-1]),
"entry_price": round(entry_price, 5),
"exit_price": round(c, 5),
"pnl": round(last_dir * (c - entry_price), 5),
"pnl_pct": round(ret * 100, 3),
"exit_reason": "end",
})
# Equity curve from trade returns
cumret = 1.0
equity_vals = np.full(n, float(capital))
trade_idx = 0
in_trade = False
t_entry_price = None
t_dir = None
for i in range(n):
s = sig_arr[i]
c = price_arr[i]
if s != 0.0 and s != t_dir:
if t_dir is not None and t_entry_price is not None and t_entry_price != 0:
t_ret = t_dir * (c - t_entry_price) / t_entry_price - cost
cumret *= (1 + t_ret)
t_entry_price = c
t_dir = s
equity_vals[i] = capital * cumret
# Bar returns for Sharpe
bar_returns = np.zeros(n)
for i in range(1, n):
if price_arr[i - 1] != 0 and last_dir is not None:
bar_returns[i] = sig_arr[i - 1] * (price_arr[i] - price_arr[i - 1]) / price_arr[i - 1] if sig_arr[i - 1] != 0 else 0.0
return {
"equity": pd.Series(equity_vals, index=close.index),
"trade_returns": trade_returns,
"long_returns": long_returns,
"short_returns": short_returns,
"bar_returns": bar_returns,
"trade_log": trade_log,
}
def compute_trade_stats(trades, capital=10000):
"""Single source of truth for trade statistics.
Every display path reads from this — no recomputation anywhere.
All values are rounded and JSON-safe (no inf/nan).
"""
if not trades:
return {"n": 0, "wins": 0, "losses": 0, "wr": 0, "avg": 0,
"best": 0, "worst": 0, "ret": 0, "np": 0, "mdd": 0,
"pf": 0, "rr": 0, "expect": 0}
w = [r for r in trades if r > 0]
l = [r for r in trades if r < 0]
cumret = 1.0
for r in trades:
cumret *= (1 + r)
net_p = capital * (cumret - 1)
# Max drawdown
eq = np.cumprod([1.0] + [1 + r for r in trades])
peak = np.maximum.accumulate(eq)
mdd = float(((eq - peak) / peak).min()) if len(eq) > 1 else 0.0
# Profit Factor
gross_w = sum(w) if w else 0
gross_l = abs(sum(l)) if l else 0
pf = gross_w / gross_l if gross_l > 0 else (9999.0 if gross_w > 0 else 0)
# Risk:Reward
avg_w = float(np.mean(w)) if w else 0
avg_l = abs(float(np.mean(l))) if l else 0
rr = avg_w / avg_l if avg_l > 0 else (9999.0 if avg_w > 0 else 0)
# Expectancy
expect = net_p / len(trades)
return {
"n": len(trades), "wins": len(w), "losses": len(l),
"wr": round(len(w) / len(trades), 4),
"avg": round(float(np.mean(trades)), 6),
"best": round(max(w), 6) if w else 0,
"worst": round(min(l), 6) if l else 0,
"ret": round(cumret - 1, 6),
"np": round(net_p, 2),
"mdd": round(mdd, 6),
"pf": round(pf, 2),
"rr": round(rr, 2),
"expect": round(expect, 2),
}
def compute_metrics(bt_result, close_test, capital=10000):
"""Compute all standard metrics from backtest result.
Uses trade-level compounding (same as webapp _trade_stats) for accuracy.
Returns: dict with total_ret, bh_ret, sharpe_strat, sharpe_bh, mdd, n_trades
"""
equity = bt_result["equity"]
trade_returns = bt_result["trade_returns"]
# Total return — trade-level compounding (matches webapp)
if trade_returns:
cumret = 1.0
for r in trade_returns:
cumret *= (1 + r)
total_ret = cumret - 1
else:
total_ret = 0.0
# Buy and hold
bh_equity = capital * (close_test / close_test.iloc[0])
bh_ret = (bh_equity.iloc[-1] - capital) / capital if capital != 0 else 0.0
# Sharpe ratio — trade-level (matches webapp: sqrt(252*26) annualization)
if len(trade_returns) >= 2 and float(np.std(trade_returns)) > 0:
sharpe_strat = float(np.mean(trade_returns) / np.std(trade_returns) * np.sqrt(252 * 26))
else:
sharpe_strat = 0.0
bh_rets = bh_equity.pct_change().dropna()
if len(bh_rets) > 1 and bh_rets.std() != 0:
sharpe_bh = float((bh_rets.mean() / bh_rets.std()) * np.sqrt(252 * 24 * 4))
else:
sharpe_bh = 0.0
# Max drawdown — trade-level (matches webapp)
if trade_returns:
eq = np.cumprod([1.0] + [1 + r for r in trade_returns])
peak = np.maximum.accumulate(eq)
mdd = float(((eq - peak) / peak).min()) if len(eq) > 1 else 0.0
else:
mdd = 0.0
return {
"total_ret": float(total_ret),
"bh_ret": float(bh_ret),
"sharpe_strat": float(sharpe_strat) if not np.isnan(sharpe_strat) else 0.0,
"sharpe_bh": float(sharpe_bh) if not np.isnan(sharpe_bh) else 0.0,
"mdd": float(mdd),
"n_trades": len(trade_returns),
}
# Diagnostics line/histogram series (equity / drawdown / rolling_acc / conf_hist)
# only feed the small Diagnostics charts — they're never used by the price chart
# or scroll-back. On a 1-min model trained over the (2.2-capped) window these are
# still ~30k points each; downsample to a visually-identical resolution before the
# dict leaves the trainer so it doesn't carry that into Server-A RAM / Postgres.
_RESULTS_SERIES_MAX = 5000
def _downsample_idx(n, cap=_RESULTS_SERIES_MAX):
"""Evenly-spaced index list spanning [0, n-1] (first+last always kept), or
None when no downsampling is needed (n <= cap)."""
if n <= cap:
return None
return np.unique(np.linspace(0, n - 1, cap).astype(int)).tolist()
def _take(arr, idx):
"""Subset a list by an index list (idx may be None → return arr unchanged)."""
if idx is None or not isinstance(arr, list):
return arr
return [arr[i] for i in idx]
# trade_log / train_trade_log are lists of per-trade dicts (display-only — the
# Trade Log tab). They scale with TRADE count, not bar count, so the bar-window
# cap (Phase 2.2) doesn't bound them — a degenerate near-every-bar model can put
# 10k+ trade dicts in the blob (>3 MB). Cap each (independently — a small-N model
# keeps every trade) to the most-recent N, recording `*_total` + `*_truncated`
# so the true count is still reported. Real strategies have far fewer than
# _TRADE_LOG_MAX trades, so this only ever bites pathological models.
_TRADE_LOG_MAX = 5000
def _cap_trade_log(tl):
"""Return (capped_list, original_len, was_truncated)."""
if not isinstance(tl, list) or len(tl) <= _TRADE_LOG_MAX:
return tl, (len(tl) if isinstance(tl, list) else 0), False
return tl[-_TRADE_LOG_MAX:], len(tl), True
def build_return_dict(split_result, bt_result, metrics, model, feature_cols,
signal_full, p_pos_test, p_neg_test, custom_figs=None,
bt_train_result=None, pre_stats=None):
"""Assemble the complete return dict. Handles ALL serialization.
Never returns Timestamps, numpy arrays, or non-JSON types.
Returns: JSON-safe dict with all required keys
"""
df = split_result["df"]
close = split_result["close"]
close_test = split_result["close_test"]
X_test = split_result["X_test"]
y_test = split_result["y_test"]
equity = bt_result["equity"]
bar_returns = bt_result["bar_returns"]
# OHLC
ohlc_dates = [str(x) for x in df.index.tolist()]
def _safe_list(arr):
return [float(x) if (x is not None and not np.isnan(x) and not np.isinf(x)) else None
for x in arr]
# Overlays
bb, ma = compute_overlays(close, df.index)
# Buy and hold equity
capital = equity.iloc[0] if len(equity) > 0 else 10000
bh_equity = capital * (close_test / close_test.iloc[0])
# Confusion matrix
from sklearn.metrics import confusion_matrix
pred_test = model.predict(X_test)
y_test_arr = np.asarray(y_test)
cm = confusion_matrix(y_test_arr, pred_test, labels=[-1, 0, 1])
# Rolling accuracy
sig_arr = signal_full.reindex(close_test.index).values
correct = pd.Series((pred_test == y_test_arr).astype(float), index=X_test.index)
active_test = pd.Series(sig_arr != 0, index=close_test.index) if len(sig_arr) == len(close_test) else pd.Series(True, index=close_test.index)
correct_active = correct.where(active_test, other=np.nan)
rolling_acc = correct_active.rolling(30, min_periods=1).mean()
# Feature importance
importances = model.feature_importances_
fi_pairs = sorted(zip(feature_cols, importances), key=lambda x: x[1])[-15:]
# Drawdown
rolling_max = equity.cummax()
drawdown = (equity - rolling_max) / rolling_max.replace(0, np.nan)
drawdown = drawdown.fillna(0.0)
# ── Downsample the Diagnostics-only series (see _downsample_idx) ──────────
_eq_dates = [str(x) for x in close_test.index.tolist()]
_eq_strat = _safe_list(equity.values)
_eq_bh = _safe_list(bh_equity.values)
_eq_idx = _downsample_idx(len(_eq_dates))
_eq_dates, _eq_strat, _eq_bh = _take(_eq_dates, _eq_idx), _take(_eq_strat, _eq_idx), _take(_eq_bh, _eq_idx)
_ra_dates = [str(x) for x in rolling_acc.index.tolist()]
_ra_vals = [float(x) if (not np.isnan(x) and not np.isinf(x)) else None for x in rolling_acc.values]
_ra_idx = _downsample_idx(len(_ra_dates))
_ra_dates, _ra_vals = _take(_ra_dates, _ra_idx), _take(_ra_vals, _ra_idx)
_dd_dates = [str(x) for x in drawdown.index.tolist()]
_dd_vals = _safe_list(drawdown.values)
_dd_idx = _downsample_idx(len(_dd_dates))
_dd_dates, _dd_vals = _take(_dd_dates, _dd_idx), _take(_dd_vals, _dd_idx)
_cp_pos = [float(x) for x in (p_pos_test.tolist() if hasattr(p_pos_test, 'tolist') else list(p_pos_test))]
_cp_neg = [float(x) for x in (p_neg_test.tolist() if hasattr(p_neg_test, 'tolist') else list(p_neg_test))]
_cp_pos = _take(_cp_pos, _downsample_idx(len(_cp_pos)))
_cp_neg = _take(_cp_neg, _downsample_idx(len(_cp_neg)))
# ── Trade logs — display-only (Trade Log tab); cap to most-recent N with a
# `_total` field so the true count is still reported (see _cap_trade_log).
# NB: ret_dist arrays are left FULL — a downstream path in callbacks.py
# recomputes n_trades/win-rate from len(ret_dist), so a sample would skew
# the displayed counts; they're small anyway and gzip handles them.
_tl_test, _tl_test_n, _tl_test_tr = _cap_trade_log(bt_result.get("trade_log", []))
_tl_tr, _tl_tr_n, _tl_tr_tr = _cap_trade_log(bt_train_result.get("trade_log", []) if bt_train_result else [])
return {
"ohlc": {
"dates": ohlc_dates,
"open": _safe_list(df["open"].values),
"high": _safe_list(df["high"].values),
"low": _safe_list(df["low"].values),
"close": _safe_list(df["close"].values),
},
"signals": {
"dates": [str(x) for x in signal_full.index.tolist()],
"values": [float(x) for x in signal_full.values],
},
"bb": bb,
"ma": ma,
"equity": {
"dates": _eq_dates,
"strategy": _eq_strat,
"bh": _eq_bh,
},
"feature_importance": {
"names": [p[0] for p in fi_pairs],
"values": [float(p[1]) for p in fi_pairs],
},
"conf_matrix": cm.tolist(),
"conf_hist": {
"p_pos": _cp_pos,
"p_neg": _cp_neg,
},
"rolling_acc": {
"dates": _ra_dates,
"values": _ra_vals,
},
"drawdown": {
"dates": _dd_dates,
"values": _dd_vals,
},
"ret_dist": [float(x) for x in bt_result["trade_returns"]],
"ret_dist_long": [float(x) for x in bt_result["long_returns"]],
"ret_dist_short": [float(x) for x in bt_result["short_returns"]],
"train_ret_dist": [float(x) for x in bt_train_result["trade_returns"]] if bt_train_result else [],
"train_ret_dist_long": [float(x) for x in bt_train_result["long_returns"]] if bt_train_result else [],
"train_ret_dist_short": [float(x) for x in bt_train_result["short_returns"]] if bt_train_result else [],
"trade_log": _tl_test,
"train_trade_log": _tl_tr,
"trade_log_total": _tl_test_n,
"train_trade_log_total": _tl_tr_n,
"trade_log_truncated": _tl_test_tr,
"train_trade_log_truncated": _tl_tr_tr,
**(pre_stats or {}),
"metrics": metrics,
"split_dt": split_result["split_dt"],
"split_idx": int(split_result["split_idx"]),
"n_train": int(split_result["n_train"]),
"n_test": int(split_result["n_test"]),
"feature_cols": list(feature_cols),
"custom_figs": custom_figs or [],
}
# ════════════════════════════════════════════════════════════════════════════
# STRATEGY FRAMEWORK v2 — Config-driven architecture
# Claude writes feature_engineering() + strategy_config(). Framework does rest.
# ════════════════════════════════════════════════════════════════════════════
import importlib
_MODEL_REGISTRY = {
"XGBClassifier": ("xgboost", "XGBClassifier"),
"RandomForestClassifier": ("sklearn.ensemble", "RandomForestClassifier"),
"GradientBoostingClassifier": ("sklearn.ensemble", "GradientBoostingClassifier"),
"LogisticRegression": ("sklearn.linear_model", "LogisticRegression"),
"ExtraTreesClassifier": ("sklearn.ensemble", "ExtraTreesClassifier"),
"AdaBoostClassifier": ("sklearn.ensemble", "AdaBoostClassifier"),
}
def _build_model_from_config(config, X_train, y_train_enc):
"""Build, fit, and wrap a model from strategy_config dict."""
model_type = config.get("model_type", "RandomForestClassifier")
model_params = dict(config.get("model_params", {}))
if model_type not in _MODEL_REGISTRY:
raise ValueError(f"Unknown model_type '{model_type}'. Valid: {list(_MODEL_REGISTRY.keys())}")
module_path, class_name = _MODEL_REGISTRY[model_type]
mod = importlib.import_module(module_path)
cls = getattr(mod, class_name)
# XGBoost defaults
if class_name == "XGBClassifier":
model_params.setdefault("use_label_encoder", False)
model_params.setdefault("eval_metric", "mlogloss")
model_params.setdefault("tree_method", "hist")
# Determinism > speed (2026-05-25). XGBoost hist with n_jobs=-1 is
# NON-reproducible even with random_state set — the parallel histogram
# gradient-sum order varies across threads, so the SAME code + data
# gives a slightly different model (and backtest) every run. Forcing
# single-thread makes training bit-reproducible so: (a) a user who
# copies a strategy and reruns it gets identical numbers, (b) the
# community "Live" score matches a redeploy, (c) "same code, different
# result" support reports go away. Cost: single-threaded XGB (a few
# seconds slower on large windows; hist is fast so it's minor). FORCED
# (not setdefault) so the guarantee can't be silently broken by a
# strategy passing n_jobs. Exact reproducibility holds within the
# platform (pinned versions / same Modal image); a user's own machine
# with different xgboost/numpy/CPU can still differ in low-order bits.
model_params["n_jobs"] = 1
# Common defaults
model_params.setdefault("random_state", 42)
from model_wrapper import ModelWrapper
clf = cls(**model_params)
clf.fit(X_train, y_train_enc)
enc = LabelEncoder()
enc.fit([-1, 0, 1])
return ModelWrapper(clf, original_classes=enc.classes_, n_features=X_train.shape[1])
def _generate_signals(model, X, threshold):
"""Framework-owned signal generation. Deterministic threshold logic."""
proba = model.predict_proba(X)
classes = list(model.classes_)
idx_pos = classes.index(1) if 1 in classes else None
idx_neg = classes.index(-1) if -1 in classes else None
p_pos = proba[:, idx_pos] if idx_pos is not None else np.zeros(len(X))
p_neg = proba[:, idx_neg] if idx_neg is not None else np.zeros(len(X))
signal_vals = np.zeros(len(X))
signal_vals = np.where(p_pos >= threshold, 1.0, signal_vals)
signal_vals = np.where(p_neg >= threshold, -1.0, signal_vals)
# Both exceed: pick stronger
both = (p_pos >= threshold) & (p_neg >= threshold)
signal_vals[both] = np.where(p_pos[both] >= p_neg[both], 1.0, -1.0)
return pd.Series(signal_vals, index=X.index), p_pos, p_neg
# ── Filter functions (all no-ops when config value is None) ──────────────
def _apply_direction_filter(signal, direction):
"""Zero out signals that don't match allowed direction."""
if direction is None or direction == "both":
return signal
s = signal.copy()
if direction == "long":
s[s < 0] = 0.0
elif direction == "short":
s[s > 0] = 0.0
return s
def _apply_session_filter(signal, index, session_hours):
"""Zero out signals outside session hours [start, end] UTC."""
if session_hours is None:
return signal
s = signal.copy()
start_h, end_h = session_hours[0], session_hours[1]
hours = index.hour
if start_h <= end_h:
mask = (hours >= start_h) & (hours < end_h)
else: # wrap around midnight, e.g. [22, 6]
mask = (hours >= start_h) | (hours < end_h)
s[~mask] = 0.0
return s
def _apply_atr_filter(signal, close, high, low, min_atr):
"""Zero out signals when NATR(14) is below threshold."""
if min_atr is None:
return signal
hl = high - low
hc = (high - close.shift(1)).abs()
lc = (low - close.shift(1)).abs()
tr = pd.concat([hl, hc, lc], axis=1).max(axis=1)
atr14 = tr.ewm(com=13, adjust=False).mean()
natr = atr14 / close.replace(0, np.nan)
s = signal.copy()
s[natr < min_atr] = 0.0
return s
def _apply_trend_filter(signal, close, trend_filter):
"""Only allow signals aligned with trend. e.g. 'sma_50': longs above SMA, shorts below."""
if trend_filter is None:
return signal
# Parse: "sma_50" → SMA with period 50
parts = trend_filter.lower().replace("-", "_").split("_")
if len(parts) >= 2 and parts[0] in ("sma", "ema"):
period = int(parts[1])
else:
return signal # unknown filter, skip
if parts[0] == "sma":
trend_line = close.rolling(period).mean()
else:
trend_line = close.ewm(span=period, adjust=False).mean()
s = signal.copy()
# Longs only above trend, shorts only below
s[(s > 0) & (close < trend_line)] = 0.0
s[(s < 0) & (close > trend_line)] = 0.0
return s
# ── run_backtest_v2: framework-owned SL/TP/cooldown/position management ──
def run_backtest_v2(signal, close, high, low, config, capital=10000, cost=2e-5):
"""Backtest with SL/TP/cooldown/direction handling built into the engine.
Unlike run_backtest (v1), this function handles position exits internally.
Returns: same dict shape as run_backtest()
"""
stop_loss = config.get("stop_loss")
take_profit = config.get("take_profit")
cooldown = config.get("cooldown", 0)
on_opposite = config.get("on_opposite", "reverse")
sig_arr = signal.values
close_arr = close.values
high_arr = high.values
low_arr = low.values
idx = signal.index
n = len(close_arr)
trade_returns = []
long_returns = []
short_returns = []
trade_log = []
equity_vals = np.full(n, float(capital))
cumret = 1.0
position = 0.0 # current direction: 1.0, -1.0, or 0.0 (flat)
entry_price = None
entry_bar = None # index into arrays for entry time
cooldown_remaining = 0
def _log_trade(exit_bar, exit_px, ret, reason):
trade_log.append({
"type": "Buy" if position == 1.0 else "Sell",
"entry_time": str(idx[entry_bar]),
"exit_time": str(idx[exit_bar]),
"entry_price": round(entry_price, 5),
"exit_price": round(exit_px, 5),
"pnl": round(position * (exit_px - entry_price), 5),
"pnl_pct": round(ret * 100, 3),
"exit_reason": reason,
})
for i in range(n):
c = close_arr[i]
h = high_arr[i]
lo = low_arr[i]
s = sig_arr[i]
# 1. Check SL/TP if in trade
if position != 0.0 and entry_price is not None:
hit_sl = False
hit_tp = False
exit_price = None
if position == 1.0: # long
if stop_loss is not None and lo <= entry_price * (1 - stop_loss):
hit_sl = True
exit_price = entry_price * (1 - stop_loss)
elif take_profit is not None and h >= entry_price * (1 + take_profit):
hit_tp = True
exit_price = entry_price * (1 + take_profit)
else: # short
if stop_loss is not None and h >= entry_price * (1 + stop_loss):
hit_sl = True
exit_price = entry_price * (1 + stop_loss)
elif take_profit is not None and lo <= entry_price * (1 - take_profit):
hit_tp = True
exit_price = entry_price * (1 - take_profit)
if hit_sl or hit_tp:
ret = float(position * (exit_price - entry_price) / entry_price - cost)
trade_returns.append(ret)
if position == 1.0:
long_returns.append(ret)
else:
short_returns.append(ret)
_log_trade(i, exit_price, ret, "SL" if hit_sl else "TP")
cumret *= (1 + ret)
position = 0.0
entry_price = None
entry_bar = None
cooldown_remaining = cooldown
equity_vals[i] = capital * cumret
continue
# 2. Cooldown
if cooldown_remaining > 0:
cooldown_remaining -= 1
equity_vals[i] = capital * cumret
continue
# 3. Signal processing
if s != 0.0:
if position == 0.0:
# Open new trade
position = s
entry_price = c
entry_bar = i
elif s != position:
# Opposite signal
if on_opposite == "reverse":
# Close current + open opposite
ret = float(position * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if position == 1.0:
long_returns.append(ret)
else:
short_returns.append(ret)
_log_trade(i, c, ret, "signal")
cumret *= (1 + ret)
position = s
entry_price = c
entry_bar = i
else: # close_only
# Close current, go flat
ret = float(position * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if position == 1.0:
long_returns.append(ret)
else:
short_returns.append(ret)
_log_trade(i, c, ret, "close_only")
cumret *= (1 + ret)
position = 0.0
entry_price = None
entry_bar = None
cooldown_remaining = cooldown
equity_vals[i] = capital * cumret
# Close last open trade at final close
if position != 0.0 and entry_price is not None and n > 0 and entry_price != 0:
c = close_arr[-1]
ret = float(position * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if position == 1.0:
long_returns.append(ret)
else:
short_returns.append(ret)
_log_trade(n - 1, c, ret, "end")
cumret *= (1 + ret)
equity_vals[-1] = capital * cumret
# Bar returns for Sharpe (approximate)
bar_returns = np.zeros(n)
for i in range(1, n):
if close_arr[i - 1] != 0 and sig_arr[i - 1] != 0:
bar_returns[i] = sig_arr[i - 1] * (close_arr[i] - close_arr[i - 1]) / close_arr[i - 1]
return {
"equity": pd.Series(equity_vals, index=close.index),
"trade_returns": trade_returns,
"long_returns": long_returns,
"short_returns": short_returns,
"bar_returns": bar_returns,
"trade_log": trade_log,
}
# ── run_strategy: the v2 orchestrator ────────────────────────────────────
def run_strategy(feature_fn, config_fn, data_path, start_date="", end_date="",
validation_date="", train_split=0.7, register_model_fn=None):
"""Config-driven strategy execution. Claude writes feature_fn + config_fn,
framework does everything else.
Returns: results dict (same format as webapp expects)
"""
config = config_fn()
# Auto-correct SL/TP if Claude passed percentage instead of decimal
for _key in ("stop_loss", "take_profit"):
_val = config.get(_key)
if _val is not None and _val > 0.1: # >10% is almost certainly a percentage
config[_key] = _val / 100.0
print(f"[strategy] Auto-corrected {_key}: {_val} -> {config[_key]} (was percentage, converted to decimal)")
# 1. Load data
df, close, open_, high, low = load_ohlc(data_path, start_date, end_date)
# 2. Feature engineering (Claude's function)
df = feature_fn(df, close, open_, high, low)
close = df["close"]
open_ = df["open"]
high = df["high"]
low = df["low"]
# 3. Warm-up detection: drop rows where features have NaN BEFORE any fill
feature_cols = [c for c in df.columns if c not in ("open", "high", "low", "close")]
raw_nans = df[feature_cols].isna().any(axis=1)
valid_rows = ~raw_nans
if valid_rows.any():
first_valid = valid_rows.idxmax()
if raw_nans.loc[:first_valid].any():
df = df.loc[first_valid:].copy()
close = df["close"]
open_ = df["open"]
high = df["high"]
low = df["low"]
# 4. Target
horizon = config.get("target_horizon", 4)
target = make_target(close, horizon=horizon)
# 5. Split (ffill only within each partition — no bfill leak)
mask = target.notna()
df = df[mask].copy()
target = target[mask]
close = df["close"]
high = df["high"]
low = df["low"]
X = df[feature_cols].copy()
X = X.replace([np.inf, -np.inf], np.nan)
if validation_date:
split_idx = len(df[df.index <= validation_date])
else:
split_idx = int(len(df) * train_split)
split_idx = max(1, min(split_idx, len(df) - 1))
# ffill within train and test separately (no leak)
X_train = X.iloc[:split_idx].ffill().fillna(0.0)
X_test = X.iloc[split_idx:].ffill().fillna(0.0)
X = pd.concat([X_train, X_test])
y_train = target.iloc[:split_idx]
y_test = target.iloc[split_idx:]
close_train = close.iloc[:split_idx]
close_test = close.iloc[split_idx:]
high_test = high.iloc[split_idx:]
low_test = low.iloc[split_idx:]
enc = LabelEncoder()
enc.fit([-1, 0, 1])
y_train_enc = enc.transform(y_train)
y_test_enc = enc.transform(y_test)
split_dt = str(df.index[split_idx])
sp = {
"df": df, "X_train": X_train, "X_test": X_test,
"y_train": y_train, "y_test": y_test,
"y_train_enc": y_train_enc, "y_test_enc": y_test_enc,
"enc": enc,
"close": close, "close_train": close_train, "close_test": close_test,
"split_idx": split_idx, "split_dt": split_dt,
"n_train": len(X_train), "n_test": len(X_test),
}
# 6. Build model from config
model = _build_model_from_config(config, X_train, y_train_enc)
# 7. Generate signals
threshold = config.get("signal_threshold", 0.55)
signal_train, p_pos_train, p_neg_train = _generate_signals(model, X_train, threshold)
signal_test, p_pos_test, p_neg_test = _generate_signals(model, X_test, threshold)
# 8. Apply filters (order: direction → session → ATR → trend)
direction = config.get("direction", "both")
signal_test = _apply_direction_filter(signal_test, direction)
signal_train = _apply_direction_filter(signal_train, direction)
session_filter = config.get("session_filter")
signal_test = _apply_session_filter(signal_test, signal_test.index, session_filter)
signal_train = _apply_session_filter(signal_train, signal_train.index, session_filter)
min_atr = config.get("min_atr")
if min_atr is not None:
signal_test = _apply_atr_filter(signal_test, close_test, high_test, low_test, min_atr)
trend_filter = config.get("trend_filter")
if trend_filter is not None:
signal_test = _apply_trend_filter(signal_test, close_test, trend_filter)
signal_full = pd.concat([signal_train, signal_test])
# 9. Backtest with SL/TP/cooldown (test + train)
high_train = high.iloc[:split_idx]
low_train = low.iloc[:split_idx]
has_risk = (config.get("stop_loss") is not None or
config.get("take_profit") is not None or
config.get("cooldown", 0) > 0 or
config.get("on_opposite", "reverse") != "reverse")
if has_risk:
bt = run_backtest_v2(signal_test, close_test, high_test, low_test, config, capital=10000)
bt_train = run_backtest_v2(signal_train, close_train, high_train, low_train, config, capital=10000)
else:
bt = run_backtest(signal_test, close_test, capital=10000)
bt_train = run_backtest(signal_train, close_train, capital=10000)
# 10. Metrics
metrics = compute_metrics(bt, close_test, capital=10000)
# 11. Pre-compute all trade stats (single source of truth)
pre_stats = {
"train_stats": compute_trade_stats(bt_train.get("trade_returns", []), capital=10000),
"test_stats": compute_trade_stats(bt.get("trade_returns", []), capital=10000),
"long_stats": compute_trade_stats(bt.get("long_returns", []), capital=10000),
"short_stats": compute_trade_stats(bt.get("short_returns", []), capital=10000),
}
# 12. Register model
if register_model_fn is not None:
register_model_fn(model)
# 13. Build return dict
return build_return_dict(sp, bt, metrics, model, feature_cols,
signal_full, p_pos_test, p_neg_test, custom_figs=[],
bt_train_result=bt_train, pre_stats=pre_stats)
# ── End strategy_utils ──
DATA_PATH = '/root/Desktop/QuantifyMe/data/ohlc/GBPUSD_15min.parquet'
START_DATE = '2026-04-15'
END_DATE = '2026-05-25'
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── EMA crossover core signals ──────────────────────────────────────────
ema9 = close.ewm(span=9, adjust=False).mean()
ema21 = close.ewm(span=21, adjust=False).mean()
ema50 = close.ewm(span=50, adjust=False).mean()
ema200 = close.ewm(span=200, adjust=False).mean()
df["ema9"] = ema9
df["ema21"] = ema21
df["ema50"] = ema50
df["ema200"] = ema200
# Raw spread and normalised spread
df["ema_diff"] = ema9 - ema21
df["ema_diff_norm"] = (ema9 - ema21) / close
# Cross signal: +1 when ema9 > ema21, -1 otherwise
df["ema_cross_sign"] = np.where(ema9 > ema21, 1.0, -1.0)
# Momentum of the spread (rate of change of spread)
df["ema_diff_roc1"] = df["ema_diff"].diff(1)
df["ema_diff_roc3"] = df["ema_diff"].diff(3)
# Distance of price from ema50 and ema200 (normalised)
df["dist_ema50"] = (close - ema50) / close
df["dist_ema200"] = (close - ema200) / close
# ── RSI (14) ────────────────────────────────────────────────────────────
delta = close.diff()
gain = delta.clip(lower=0.0)
loss = (-delta).clip(lower=0.0)
avg_g = gain.ewm(com=13, adjust=False).mean()
avg_l = loss.ewm(com=13, adjust=False).mean()
rs = avg_g / avg_l.replace(0.0, np.nan)
rsi14 = 100.0 - 100.0 / (1.0 + rs)
df["rsi14"] = rsi14
# RSI normalised and centred
df["rsi14_norm"] = (rsi14 - 50.0) / 50.0
# ── MACD ────────────────────────────────────────────────────────────────
ema12 = close.ewm(span=12, adjust=False).mean()
ema26 = close.ewm(span=26, adjust=False).mean()
macd_line = ema12 - ema26
signal_ln = macd_line.ewm(span=9, adjust=False).mean()
macd_hist = macd_line - signal_ln
df["macd_line"] = macd_line / close
df["macd_signal"] = signal_ln / close
df["macd_hist"] = macd_hist / close
df["macd_cross"] = np.where(macd_line > signal_ln, 1.0, -1.0)
# ── Bollinger Bands (20, 2) ──────────────────────────────────────────────
bb_mid = close.rolling(20).mean()
bb_std = close.rolling(20).std(ddof=0)
bb_upper = bb_mid + 2.0 * bb_std
bb_lower = bb_mid - 2.0 * bb_std
bb_width = (bb_upper - bb_lower) / bb_mid.replace(0.0, np.nan)
bb_pct = (close - bb_lower) / (bb_upper - bb_lower).replace(0.0, np.nan)
df["bb_width"] = bb_width
df["bb_pct"] = bb_pct
# ── ATR (14) ─────────────────────────────────────────────────────────────
tr = pd.concat([
high - low,
(high - close.shift(1)).abs(),
(low - close.shift(1)).abs()
], axis=1).max(axis=1)
atr14 = tr.ewm(com=13, adjust=False).mean()
df["atr14"] = atr14
df["natr14"] = atr14 / close # normalised ATR (volatility proxy)
# ── Stochastic %K / %D (14, 3) ──────────────────────────────────────────
low14 = low.rolling(14).min()
high14 = high.rolling(14).max()
stoch_k = 100.0 * (close - low14) / (high14 - low14).replace(0.0, np.nan)
stoch_d = stoch_k.rolling(3).mean()
df["stoch_k"] = stoch_k / 100.0
df["stoch_d"] = stoch_d / 100.0
df["stoch_diff"] = (stoch_k - stoch_d) / 100.0
# ── Rate of Change ───────────────────────────────────────────────────────
df["roc1"] = close.pct_change(1)
df["roc4"] = close.pct_change(4)
df["roc8"] = close.pct_change(8)
df["roc16"] = close.pct_change(16)
# ── Candle features ──────────────────────────────────────────────────────
body = (close - open_).abs()
candle_rng = (high - low).replace(0.0, np.nan)
df["body_ratio"] = body / candle_rng
df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle_rng
df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle_rng
df["candle_dir"] = np.where(close >= open_, 1.0, -1.0)
# ── Volume-like proxy: range relative to rolling average ────────────────
df["range_ratio"] = candle_rng / candle_rng.rolling(20).mean()
# ── Lagged EMA diff features ─────────────────────────────────────────────
for lag in [1, 2, 3, 4]:
df[f"ema_diff_lag{lag}"] = df["ema_diff_norm"].shift(lag)
# ── Lagged RSI ───────────────────────────────────────────────────────────
for lag in [1, 2, 4]:
df[f"rsi14_lag{lag}"] = df["rsi14_norm"].shift(lag)
# ── Rolling volatility (std of returns) ──────────────────────────────────
ret = close.pct_change()
df["vol_8"] = ret.rolling(8).std()
df["vol_16"] = ret.rolling(16).std()
df["vol_32"] = ret.rolling(32).std()
# ── Trend strength: ADX-like (simplified) ────────────────────────────────
plus_dm = (high.diff()).clip(lower=0.0)
minus_dm = (-low.diff()).clip(lower=0.0)
overlap = pd.concat([plus_dm, minus_dm], axis=1).min(axis=1)
plus_dm = plus_dm - overlap
minus_dm = minus_dm - overlap
smooth_tr = tr.ewm(com=13, adjust=False).mean()
plus_di = 100.0 * plus_dm.ewm(com=13, adjust=False).mean() / smooth_tr.replace(0.0, np.nan)
minus_di = 100.0 * minus_dm.ewm(com=13, adjust=False).mean() / smooth_tr.replace(0.0, np.nan)
di_sum = (plus_di + minus_di).replace(0.0, np.nan)
adx = ((plus_di - minus_di).abs() / di_sum * 100.0).ewm(com=13, adjust=False).mean()
df["adx"] = adx / 100.0
df["plus_di"] = plus_di / 100.0
df["minus_di"] = minus_di / 100.0
# ── Session hour (UTC) ───────────────────────────────────────────────────
if hasattr(df.index, "hour"):
df["hour_sin"] = np.sin(2.0 * np.pi * df.index.hour / 24.0)
df["hour_cos"] = np.cos(2.0 * np.pi * df.index.hour / 24.0)
else:
df["hour_sin"] = 0.0
df["hour_cos"] = 1.0
# ── Fill any NaN from warm-up periods ────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "EMA 9/21 Crossover + MACD Momentum (XGBoost)",
"model_type": "XGBClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.80,
"colsample_bytree": 0.75,
"min_child_weight": 3,
"gamma": 0.10,
"reg_alpha": 0.05,
"reg_lambda": 1.50,
"objective": "binary:logistic",
"tree_method": "hist",
"random_state": 42,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.0030,
"take_profit": 0.0060,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [6, 20],
"min_atr": None,
"trend_filter": "sma_50",
"target_horizon": 4,
"objective": (
"Maximize Sharpe ratio on EUR/USD 15-min data. "
"Core signal: EMA(9) vs EMA(21) crossover enriched with MACD, RSI, "
"Bollinger %B, Stochastic, ATR, ADX, candle structure and rolling "
"volatility. XGBoost with moderate depth (4) and strong regularisation "
"(gamma, alpha, lambda) prevents overfitting on ~6 weeks of intraday data. "
"A 0.55 probability threshold filters low-confidence signals. "
"A 2:1 TP:SL ratio (30 bp SL / 60 bp TP) improves the reward-risk "
"balance. Session filter [6,20] UTC keeps the model away from the thin "
"Asian pre-open. trend_filter sma_50 aligns entries with the prevailing "
"short-term trend to reduce chop. Cooldown=0 and reverse-on-opposite "
"allow continuous participation in trending EMA crossover moves."
),
"notes": (
"round-trip cost 2e-5 is accounted for by the framework. "
"target_horizon=4 bars (1 hour ahead) suits EMA crossover which "
"generates medium-frequency signals rather than tick-level scalps. "
"All features are normalised or expressed as ratios to minimise "
"scale sensitivity for the logistic-objective XGBoost."
),
}
# ── Framework v2: auto-generated wrapper ──
def train_and_backtest():
_vd = VALIDATION_DATE if 'VALIDATION_DATE' in globals() else ''
_ts = TRAIN_SPLIT if 'TRAIN_SPLIT' in globals() else 0.7
return run_strategy(
feature_engineering, strategy_config,
DATA_PATH, START_DATE, END_DATE,
_vd, _ts,
register_model_fn=register_model
)
|
||||||||||
|
1.59
|
Bollinger reversion
|
M
@malcolmtan
|
Bollin | 47.9%— | +1.53%— | 1.46— | 0.67%0.67% | 71— |
|
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-25 02:29:29
# Model : XGBoost
# Feature Eng. : buy when price closes below the lower Bollinger Band(20,2) and RSI(14) < 35, exit at the middle band + Auto-add features: ON
# Signal / Entry : —
# Optimization : —
# Risk Mgmt : —
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# Bollinger Band Mean-Reversion + RSI Filter (XGBoost, Sharpe)
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
# ── Inlined strategy_utils ──
"""
strategy_utils.py — Standard utility functions for generated strategies.
Claude imports these instead of writing boilerplate from scratch.
This ensures consistent behavior across all generated strategies.
"""
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
# Max backtest window per timeframe. A finer timeframe over a longer window
# blows up the results dict / parquet load / Modal train time (the 2026-05-12
# OOM was a 1-min × multi-year sweep) — and a 1-min strategy gains nothing from
# 2 years of 1-min bars. Enforced HERE because every training path (UI / API /
# Modal) funnels through run_strategy → load_ohlc. Env-overridable so a future
# "max plan" / dedicated-server tier can lift it.
_TF_MAX_DAYS = {
"1min": 30,
"5min": 90,
"15min": 365,
"1h": 730,
}
def _fetch_ohlc_from_internal(symbol: str, tf: str, start: str, end: str):
"""Phase 3.2: fetch parquet bytes from Server A's /internal/ohlc endpoint
instead of reading a local file. Used inside Modal containers / Mac worker
pool (Phase 3.4) so every train sees the same source of truth as the chart.
Returns: pd.DataFrame (parquet decoded), or raises on any failure so the
caller can fall back / surface a clear error in the job.
"""
import hashlib as _hashlib, hmac as _hmac, io as _io, os as _os
import urllib.request as _ur, urllib.parse as _urp
base = (_os.environ.get("QM_INTERNAL_OHLC_BASE") or "").rstrip("/")
secret = (_os.environ.get("INTERNAL_WS_SECRET") or "").strip()
if not base:
raise RuntimeError("QM_INTERNAL_OHLC_BASE not set")
if not secret:
raise RuntimeError("INTERNAL_WS_SECRET not set")
msg = f"{symbol}|{tf}|{start}|{end}".encode("utf-8")
sig = _hmac.new(secret.encode("utf-8"), msg, _hashlib.sha256).hexdigest()
qs = _urp.urlencode({
"symbol": symbol, "tf": tf,
"start": start, "end": end, "sig": sig,
})
url = f"{base}/internal/ohlc?{qs}"
req = _ur.Request(url, headers={"User-Agent": "qm-worker/1.0"})
with _ur.urlopen(req, timeout=30) as resp:
if resp.status != 200:
raise RuntimeError(f"/internal/ohlc returned {resp.status}")
payload = resp.read()
print(f"[load_ohlc:internal] {symbol} {tf} fetched {len(payload)} bytes", flush=True)
return pd.read_parquet(_io.BytesIO(payload))
def _parse_symbol_tf_from_path(data_path: str):
"""Pull SYMBOL + TF out of a path like .../EURUSD_1min.parquet."""
import os as _os, re as _re
base = _os.path.basename(str(data_path))
m = _re.match(r"^([A-Z]{6})_(\d+min|\d+h)\.parquet$", base)
if not m:
return None, None
return m.group(1), m.group(2)
def load_ohlc(data_path, start_date="", end_date=""):
"""Load OHLC parquet, sort index, filter dates. Always returns consistent format.
The lower bound is clamped per timeframe (see _TF_MAX_DAYS) — a request for
more history than the cap silently starts later.
Phase 3.2: when env QM_USE_INTERNAL_OHLC=="1", fetch over HTTP from
Server A's /internal/ohlc endpoint instead of pd.read_parquet on a local
file (which on Modal is a stale Volume snapshot). The endpoint applies the
same day-cap, so the local cap-check below is a defensive no-op in that
path. Flag defaults to "0" → unchanged behavior.
Returns: (df, close, open_, high, low)
"""
import os as _os, re as _re
_use_internal = _os.environ.get("QM_USE_INTERNAL_OHLC", "0") == "1"
if _use_internal:
_sym, _tf = _parse_symbol_tf_from_path(data_path)
if not _sym or not _tf:
raise RuntimeError(
f"QM_USE_INTERNAL_OHLC=1 but DATA_PATH basename does not match "
f"SYMBOL_TF.parquet: {data_path}"
)
df = _fetch_ohlc_from_internal(_sym, _tf, start_date or "", end_date or "")
else:
df = pd.read_parquet(data_path)
df.index = pd.to_datetime(df.index)
df = df.sort_index()
# Per-timeframe window cap (timeframe inferred from the parquet filename).
_m = _re.search(r"_(\d+min|\d+h)\.parquet$", _os.path.basename(str(data_path)))
_tf = _m.group(1) if _m else None
_max_days = _TF_MAX_DAYS.get(_tf)
if _max_days and _max_days > 0 and len(df):
_env_override = _os.environ.get(f"QM_MAX_DAYS_{_tf.upper()}")
if _env_override and _env_override.isdigit():
_max_days = int(_env_override)
try:
_eff_end = pd.Timestamp(end_date) if end_date else df.index.max()
_eff_end = min(_eff_end, df.index.max())
_floor = _eff_end - pd.Timedelta(days=_max_days)
_req_start = pd.Timestamp(start_date) if start_date else df.index.min()
if _req_start < _floor:
print(f"[load_ohlc] {_tf} backtest window capped to {_max_days}d: "
f"start {_req_start.date()} -> {_floor.date()}", flush=True)
start_date = _floor
except Exception as _e:
print(f"[load_ohlc] window-cap check skipped ({_e})", flush=True)
if start_date:
df = df[df.index >= start_date]
if end_date:
df = df[df.index <= end_date]
return df, df["close"], df["open"], df["high"], df["low"]
def make_target(close, horizon=4):
"""Create target: direction N bars ahead. Default 4 bars = 1 hour on 15-min data.
Returns: target (pd.Series of -1, 0, 1)
"""
return np.sign(close.shift(-horizon) - close)
def split_data(df, target, feature_cols, train_split=0.7, validation_date=""):
"""Train/test split. Handles both ratio and date-based splits.
Drops NaN from target before splitting. Encodes labels to [0,1,2].
Returns: dict with keys:
X_train, X_test, y_train, y_test,
y_train_enc, y_test_enc, enc,
close_train, close_test,
split_idx, split_dt, n_train, n_test
"""
# Drop NaN from target
mask = target.notna()
df = df[mask].copy()
target = target[mask]
close = df["close"]
# Build feature matrix
X = df[feature_cols].copy()
X = X.bfill().ffill()
X = X.replace([np.inf, -np.inf], np.nan).fillna(0.0)
# Split
if validation_date:
split_idx = len(df[df.index <= validation_date])
else:
split_idx = int(len(df) * train_split)
split_idx = max(1, min(split_idx, len(df) - 1))
X_train = X.iloc[:split_idx]
X_test = X.iloc[split_idx:]
y_train = target.iloc[:split_idx]
y_test = target.iloc[split_idx:]
close_train = close.iloc[:split_idx]
close_test = close.iloc[split_idx:]
split_dt = str(df.index[split_idx])
# Label encoding — always fit on [-1, 0, 1]
enc = LabelEncoder()
enc.fit([-1, 0, 1])
y_train_enc = enc.transform(y_train)
y_test_enc = enc.transform(y_test)
return {
"df": df, "X_train": X_train, "X_test": X_test,
"y_train": y_train, "y_test": y_test,
"y_train_enc": y_train_enc, "y_test_enc": y_test_enc,
"enc": enc,
"close": close, "close_train": close_train, "close_test": close_test,
"split_idx": split_idx, "split_dt": split_dt,
"n_train": len(X_train), "n_test": len(X_test),
}
def compute_overlays(close, df_index):
"""Compute BB and MA overlays on full dataset. Always consistent.
Returns: (bb_dict, ma_dict)
"""
bb_mid = close.rolling(20).mean()
bb_std = close.rolling(20).std()
bb_upper = bb_mid + 2 * bb_std
bb_lower = bb_mid - 2 * bb_std
ma50 = close.rolling(50).mean()
ma100 = close.rolling(100).mean()
ma200 = close.rolling(200).mean()
def _safe(s):
s = s.reindex(df_index).bfill().ffill()
return [float(x) if (x is not None and not np.isnan(x) and not np.isinf(x)) else None
for x in s.values]
bb = {"upper": _safe(bb_upper), "mid": _safe(bb_mid), "lower": _safe(bb_lower)}
ma = {"ma50": _safe(ma50), "ma100": _safe(ma100), "ma200": _safe(ma200)}
return bb, ma
def run_backtest(signal, close, capital=10000, cost=2e-5):
"""Run backtest with transaction costs.
Uses price-based trade returns (same as webapp _compute_trades).
Signal 0 = hold (keep current position), not close.
Returns: dict with equity, trade_returns, long_returns, short_returns, bar_returns
"""
sig_arr = signal.values
price_arr = close.values
idx = signal.index
n = len(price_arr)
# Trade returns — price-based (matches webapp _compute_trades exactly)
trade_returns = []
long_returns = []
short_returns = []
trade_log = []
last_dir = None
entry_price = None
entry_bar = None
for i in range(n):
s = sig_arr[i]
c = price_arr[i]
if s != 0.0 and s != last_dir:
# Direction change — close previous trade, open new
if last_dir is not None and entry_price is not None and entry_price != 0:
ret = float(last_dir * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if last_dir == 1:
long_returns.append(ret)
else:
short_returns.append(ret)
trade_log.append({
"type": "Buy" if last_dir == 1 else "Sell",
"entry_time": str(idx[entry_bar]),
"exit_time": str(idx[i]),
"entry_price": round(entry_price, 5),
"exit_price": round(c, 5),
"pnl": round(last_dir * (c - entry_price), 5),
"pnl_pct": round(ret * 100, 3),
"exit_reason": "signal",
})
entry_price = c
entry_bar = i
last_dir = s
# Close last open trade
if last_dir is not None and entry_price is not None and n > 0 and entry_price != 0:
c = price_arr[-1]
ret = float(last_dir * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if last_dir == 1:
long_returns.append(ret)
else:
short_returns.append(ret)
trade_log.append({
"type": "Buy" if last_dir == 1 else "Sell",
"entry_time": str(idx[entry_bar]),
"exit_time": str(idx[-1]),
"entry_price": round(entry_price, 5),
"exit_price": round(c, 5),
"pnl": round(last_dir * (c - entry_price), 5),
"pnl_pct": round(ret * 100, 3),
"exit_reason": "end",
})
# Equity curve from trade returns
cumret = 1.0
equity_vals = np.full(n, float(capital))
trade_idx = 0
in_trade = False
t_entry_price = None
t_dir = None
for i in range(n):
s = sig_arr[i]
c = price_arr[i]
if s != 0.0 and s != t_dir:
if t_dir is not None and t_entry_price is not None and t_entry_price != 0:
t_ret = t_dir * (c - t_entry_price) / t_entry_price - cost
cumret *= (1 + t_ret)
t_entry_price = c
t_dir = s
equity_vals[i] = capital * cumret
# Bar returns for Sharpe
bar_returns = np.zeros(n)
for i in range(1, n):
if price_arr[i - 1] != 0 and last_dir is not None:
bar_returns[i] = sig_arr[i - 1] * (price_arr[i] - price_arr[i - 1]) / price_arr[i - 1] if sig_arr[i - 1] != 0 else 0.0
return {
"equity": pd.Series(equity_vals, index=close.index),
"trade_returns": trade_returns,
"long_returns": long_returns,
"short_returns": short_returns,
"bar_returns": bar_returns,
"trade_log": trade_log,
}
def compute_trade_stats(trades, capital=10000):
"""Single source of truth for trade statistics.
Every display path reads from this — no recomputation anywhere.
All values are rounded and JSON-safe (no inf/nan).
"""
if not trades:
return {"n": 0, "wins": 0, "losses": 0, "wr": 0, "avg": 0,
"best": 0, "worst": 0, "ret": 0, "np": 0, "mdd": 0,
"pf": 0, "rr": 0, "expect": 0}
w = [r for r in trades if r > 0]
l = [r for r in trades if r < 0]
cumret = 1.0
for r in trades:
cumret *= (1 + r)
net_p = capital * (cumret - 1)
# Max drawdown
eq = np.cumprod([1.0] + [1 + r for r in trades])
peak = np.maximum.accumulate(eq)
mdd = float(((eq - peak) / peak).min()) if len(eq) > 1 else 0.0
# Profit Factor
gross_w = sum(w) if w else 0
gross_l = abs(sum(l)) if l else 0
pf = gross_w / gross_l if gross_l > 0 else (9999.0 if gross_w > 0 else 0)
# Risk:Reward
avg_w = float(np.mean(w)) if w else 0
avg_l = abs(float(np.mean(l))) if l else 0
rr = avg_w / avg_l if avg_l > 0 else (9999.0 if avg_w > 0 else 0)
# Expectancy
expect = net_p / len(trades)
return {
"n": len(trades), "wins": len(w), "losses": len(l),
"wr": round(len(w) / len(trades), 4),
"avg": round(float(np.mean(trades)), 6),
"best": round(max(w), 6) if w else 0,
"worst": round(min(l), 6) if l else 0,
"ret": round(cumret - 1, 6),
"np": round(net_p, 2),
"mdd": round(mdd, 6),
"pf": round(pf, 2),
"rr": round(rr, 2),
"expect": round(expect, 2),
}
def compute_metrics(bt_result, close_test, capital=10000):
"""Compute all standard metrics from backtest result.
Uses trade-level compounding (same as webapp _trade_stats) for accuracy.
Returns: dict with total_ret, bh_ret, sharpe_strat, sharpe_bh, mdd, n_trades
"""
equity = bt_result["equity"]
trade_returns = bt_result["trade_returns"]
# Total return — trade-level compounding (matches webapp)
if trade_returns:
cumret = 1.0
for r in trade_returns:
cumret *= (1 + r)
total_ret = cumret - 1
else:
total_ret = 0.0
# Buy and hold
bh_equity = capital * (close_test / close_test.iloc[0])
bh_ret = (bh_equity.iloc[-1] - capital) / capital if capital != 0 else 0.0
# Sharpe ratio — trade-level (matches webapp: sqrt(252*26) annualization)
if len(trade_returns) >= 2 and float(np.std(trade_returns)) > 0:
sharpe_strat = float(np.mean(trade_returns) / np.std(trade_returns) * np.sqrt(252 * 26))
else:
sharpe_strat = 0.0
bh_rets = bh_equity.pct_change().dropna()
if len(bh_rets) > 1 and bh_rets.std() != 0:
sharpe_bh = float((bh_rets.mean() / bh_rets.std()) * np.sqrt(252 * 24 * 4))
else:
sharpe_bh = 0.0
# Max drawdown — trade-level (matches webapp)
if trade_returns:
eq = np.cumprod([1.0] + [1 + r for r in trade_returns])
peak = np.maximum.accumulate(eq)
mdd = float(((eq - peak) / peak).min()) if len(eq) > 1 else 0.0
else:
mdd = 0.0
return {
"total_ret": float(total_ret),
"bh_ret": float(bh_ret),
"sharpe_strat": float(sharpe_strat) if not np.isnan(sharpe_strat) else 0.0,
"sharpe_bh": float(sharpe_bh) if not np.isnan(sharpe_bh) else 0.0,
"mdd": float(mdd),
"n_trades": len(trade_returns),
}
# Diagnostics line/histogram series (equity / drawdown / rolling_acc / conf_hist)
# only feed the small Diagnostics charts — they're never used by the price chart
# or scroll-back. On a 1-min model trained over the (2.2-capped) window these are
# still ~30k points each; downsample to a visually-identical resolution before the
# dict leaves the trainer so it doesn't carry that into Server-A RAM / Postgres.
_RESULTS_SERIES_MAX = 5000
def _downsample_idx(n, cap=_RESULTS_SERIES_MAX):
"""Evenly-spaced index list spanning [0, n-1] (first+last always kept), or
None when no downsampling is needed (n <= cap)."""
if n <= cap:
return None
return np.unique(np.linspace(0, n - 1, cap).astype(int)).tolist()
def _take(arr, idx):
"""Subset a list by an index list (idx may be None → return arr unchanged)."""
if idx is None or not isinstance(arr, list):
return arr
return [arr[i] for i in idx]
# trade_log / train_trade_log are lists of per-trade dicts (display-only — the
# Trade Log tab). They scale with TRADE count, not bar count, so the bar-window
# cap (Phase 2.2) doesn't bound them — a degenerate near-every-bar model can put
# 10k+ trade dicts in the blob (>3 MB). Cap each (independently — a small-N model
# keeps every trade) to the most-recent N, recording `*_total` + `*_truncated`
# so the true count is still reported. Real strategies have far fewer than
# _TRADE_LOG_MAX trades, so this only ever bites pathological models.
_TRADE_LOG_MAX = 5000
def _cap_trade_log(tl):
"""Return (capped_list, original_len, was_truncated)."""
if not isinstance(tl, list) or len(tl) <= _TRADE_LOG_MAX:
return tl, (len(tl) if isinstance(tl, list) else 0), False
return tl[-_TRADE_LOG_MAX:], len(tl), True
def build_return_dict(split_result, bt_result, metrics, model, feature_cols,
signal_full, p_pos_test, p_neg_test, custom_figs=None,
bt_train_result=None, pre_stats=None):
"""Assemble the complete return dict. Handles ALL serialization.
Never returns Timestamps, numpy arrays, or non-JSON types.
Returns: JSON-safe dict with all required keys
"""
df = split_result["df"]
close = split_result["close"]
close_test = split_result["close_test"]
X_test = split_result["X_test"]
y_test = split_result["y_test"]
equity = bt_result["equity"]
bar_returns = bt_result["bar_returns"]
# OHLC
ohlc_dates = [str(x) for x in df.index.tolist()]
def _safe_list(arr):
return [float(x) if (x is not None and not np.isnan(x) and not np.isinf(x)) else None
for x in arr]
# Overlays
bb, ma = compute_overlays(close, df.index)
# Buy and hold equity
capital = equity.iloc[0] if len(equity) > 0 else 10000
bh_equity = capital * (close_test / close_test.iloc[0])
# Confusion matrix
from sklearn.metrics import confusion_matrix
pred_test = model.predict(X_test)
y_test_arr = np.asarray(y_test)
cm = confusion_matrix(y_test_arr, pred_test, labels=[-1, 0, 1])
# Rolling accuracy
sig_arr = signal_full.reindex(close_test.index).values
correct = pd.Series((pred_test == y_test_arr).astype(float), index=X_test.index)
active_test = pd.Series(sig_arr != 0, index=close_test.index) if len(sig_arr) == len(close_test) else pd.Series(True, index=close_test.index)
correct_active = correct.where(active_test, other=np.nan)
rolling_acc = correct_active.rolling(30, min_periods=1).mean()
# Feature importance
importances = model.feature_importances_
fi_pairs = sorted(zip(feature_cols, importances), key=lambda x: x[1])[-15:]
# Drawdown
rolling_max = equity.cummax()
drawdown = (equity - rolling_max) / rolling_max.replace(0, np.nan)
drawdown = drawdown.fillna(0.0)
# ── Downsample the Diagnostics-only series (see _downsample_idx) ──────────
_eq_dates = [str(x) for x in close_test.index.tolist()]
_eq_strat = _safe_list(equity.values)
_eq_bh = _safe_list(bh_equity.values)
_eq_idx = _downsample_idx(len(_eq_dates))
_eq_dates, _eq_strat, _eq_bh = _take(_eq_dates, _eq_idx), _take(_eq_strat, _eq_idx), _take(_eq_bh, _eq_idx)
_ra_dates = [str(x) for x in rolling_acc.index.tolist()]
_ra_vals = [float(x) if (not np.isnan(x) and not np.isinf(x)) else None for x in rolling_acc.values]
_ra_idx = _downsample_idx(len(_ra_dates))
_ra_dates, _ra_vals = _take(_ra_dates, _ra_idx), _take(_ra_vals, _ra_idx)
_dd_dates = [str(x) for x in drawdown.index.tolist()]
_dd_vals = _safe_list(drawdown.values)
_dd_idx = _downsample_idx(len(_dd_dates))
_dd_dates, _dd_vals = _take(_dd_dates, _dd_idx), _take(_dd_vals, _dd_idx)
_cp_pos = [float(x) for x in (p_pos_test.tolist() if hasattr(p_pos_test, 'tolist') else list(p_pos_test))]
_cp_neg = [float(x) for x in (p_neg_test.tolist() if hasattr(p_neg_test, 'tolist') else list(p_neg_test))]
_cp_pos = _take(_cp_pos, _downsample_idx(len(_cp_pos)))
_cp_neg = _take(_cp_neg, _downsample_idx(len(_cp_neg)))
# ── Trade logs — display-only (Trade Log tab); cap to most-recent N with a
# `_total` field so the true count is still reported (see _cap_trade_log).
# NB: ret_dist arrays are left FULL — a downstream path in callbacks.py
# recomputes n_trades/win-rate from len(ret_dist), so a sample would skew
# the displayed counts; they're small anyway and gzip handles them.
_tl_test, _tl_test_n, _tl_test_tr = _cap_trade_log(bt_result.get("trade_log", []))
_tl_tr, _tl_tr_n, _tl_tr_tr = _cap_trade_log(bt_train_result.get("trade_log", []) if bt_train_result else [])
return {
"ohlc": {
"dates": ohlc_dates,
"open": _safe_list(df["open"].values),
"high": _safe_list(df["high"].values),
"low": _safe_list(df["low"].values),
"close": _safe_list(df["close"].values),
},
"signals": {
"dates": [str(x) for x in signal_full.index.tolist()],
"values": [float(x) for x in signal_full.values],
},
"bb": bb,
"ma": ma,
"equity": {
"dates": _eq_dates,
"strategy": _eq_strat,
"bh": _eq_bh,
},
"feature_importance": {
"names": [p[0] for p in fi_pairs],
"values": [float(p[1]) for p in fi_pairs],
},
"conf_matrix": cm.tolist(),
"conf_hist": {
"p_pos": _cp_pos,
"p_neg": _cp_neg,
},
"rolling_acc": {
"dates": _ra_dates,
"values": _ra_vals,
},
"drawdown": {
"dates": _dd_dates,
"values": _dd_vals,
},
"ret_dist": [float(x) for x in bt_result["trade_returns"]],
"ret_dist_long": [float(x) for x in bt_result["long_returns"]],
"ret_dist_short": [float(x) for x in bt_result["short_returns"]],
"train_ret_dist": [float(x) for x in bt_train_result["trade_returns"]] if bt_train_result else [],
"train_ret_dist_long": [float(x) for x in bt_train_result["long_returns"]] if bt_train_result else [],
"train_ret_dist_short": [float(x) for x in bt_train_result["short_returns"]] if bt_train_result else [],
"trade_log": _tl_test,
"train_trade_log": _tl_tr,
"trade_log_total": _tl_test_n,
"train_trade_log_total": _tl_tr_n,
"trade_log_truncated": _tl_test_tr,
"train_trade_log_truncated": _tl_tr_tr,
**(pre_stats or {}),
"metrics": metrics,
"split_dt": split_result["split_dt"],
"split_idx": int(split_result["split_idx"]),
"n_train": int(split_result["n_train"]),
"n_test": int(split_result["n_test"]),
"feature_cols": list(feature_cols),
"custom_figs": custom_figs or [],
}
# ════════════════════════════════════════════════════════════════════════════
# STRATEGY FRAMEWORK v2 — Config-driven architecture
# Claude writes feature_engineering() + strategy_config(). Framework does rest.
# ════════════════════════════════════════════════════════════════════════════
import importlib
_MODEL_REGISTRY = {
"XGBClassifier": ("xgboost", "XGBClassifier"),
"RandomForestClassifier": ("sklearn.ensemble", "RandomForestClassifier"),
"GradientBoostingClassifier": ("sklearn.ensemble", "GradientBoostingClassifier"),
"LogisticRegression": ("sklearn.linear_model", "LogisticRegression"),
"ExtraTreesClassifier": ("sklearn.ensemble", "ExtraTreesClassifier"),
"AdaBoostClassifier": ("sklearn.ensemble", "AdaBoostClassifier"),
}
def _build_model_from_config(config, X_train, y_train_enc):
"""Build, fit, and wrap a model from strategy_config dict."""
model_type = config.get("model_type", "RandomForestClassifier")
model_params = dict(config.get("model_params", {}))
if model_type not in _MODEL_REGISTRY:
raise ValueError(f"Unknown model_type '{model_type}'. Valid: {list(_MODEL_REGISTRY.keys())}")
module_path, class_name = _MODEL_REGISTRY[model_type]
mod = importlib.import_module(module_path)
cls = getattr(mod, class_name)
# XGBoost defaults
if class_name == "XGBClassifier":
model_params.setdefault("use_label_encoder", False)
model_params.setdefault("eval_metric", "mlogloss")
model_params.setdefault("tree_method", "hist")
# Determinism > speed (2026-05-25). XGBoost hist with n_jobs=-1 is
# NON-reproducible even with random_state set — the parallel histogram
# gradient-sum order varies across threads, so the SAME code + data
# gives a slightly different model (and backtest) every run. Forcing
# single-thread makes training bit-reproducible so: (a) a user who
# copies a strategy and reruns it gets identical numbers, (b) the
# community "Live" score matches a redeploy, (c) "same code, different
# result" support reports go away. Cost: single-threaded XGB (a few
# seconds slower on large windows; hist is fast so it's minor). FORCED
# (not setdefault) so the guarantee can't be silently broken by a
# strategy passing n_jobs. Exact reproducibility holds within the
# platform (pinned versions / same Modal image); a user's own machine
# with different xgboost/numpy/CPU can still differ in low-order bits.
model_params["n_jobs"] = 1
# Common defaults
model_params.setdefault("random_state", 42)
from model_wrapper import ModelWrapper
clf = cls(**model_params)
clf.fit(X_train, y_train_enc)
enc = LabelEncoder()
enc.fit([-1, 0, 1])
return ModelWrapper(clf, original_classes=enc.classes_, n_features=X_train.shape[1])
def _generate_signals(model, X, threshold):
"""Framework-owned signal generation. Deterministic threshold logic."""
proba = model.predict_proba(X)
classes = list(model.classes_)
idx_pos = classes.index(1) if 1 in classes else None
idx_neg = classes.index(-1) if -1 in classes else None
p_pos = proba[:, idx_pos] if idx_pos is not None else np.zeros(len(X))
p_neg = proba[:, idx_neg] if idx_neg is not None else np.zeros(len(X))
signal_vals = np.zeros(len(X))
signal_vals = np.where(p_pos >= threshold, 1.0, signal_vals)
signal_vals = np.where(p_neg >= threshold, -1.0, signal_vals)
# Both exceed: pick stronger
both = (p_pos >= threshold) & (p_neg >= threshold)
signal_vals[both] = np.where(p_pos[both] >= p_neg[both], 1.0, -1.0)
return pd.Series(signal_vals, index=X.index), p_pos, p_neg
# ── Filter functions (all no-ops when config value is None) ──────────────
def _apply_direction_filter(signal, direction):
"""Zero out signals that don't match allowed direction."""
if direction is None or direction == "both":
return signal
s = signal.copy()
if direction == "long":
s[s < 0] = 0.0
elif direction == "short":
s[s > 0] = 0.0
return s
def _apply_session_filter(signal, index, session_hours):
"""Zero out signals outside session hours [start, end] UTC."""
if session_hours is None:
return signal
s = signal.copy()
start_h, end_h = session_hours[0], session_hours[1]
hours = index.hour
if start_h <= end_h:
mask = (hours >= start_h) & (hours < end_h)
else: # wrap around midnight, e.g. [22, 6]
mask = (hours >= start_h) | (hours < end_h)
s[~mask] = 0.0
return s
def _apply_atr_filter(signal, close, high, low, min_atr):
"""Zero out signals when NATR(14) is below threshold."""
if min_atr is None:
return signal
hl = high - low
hc = (high - close.shift(1)).abs()
lc = (low - close.shift(1)).abs()
tr = pd.concat([hl, hc, lc], axis=1).max(axis=1)
atr14 = tr.ewm(com=13, adjust=False).mean()
natr = atr14 / close.replace(0, np.nan)
s = signal.copy()
s[natr < min_atr] = 0.0
return s
def _apply_trend_filter(signal, close, trend_filter):
"""Only allow signals aligned with trend. e.g. 'sma_50': longs above SMA, shorts below."""
if trend_filter is None:
return signal
# Parse: "sma_50" → SMA with period 50
parts = trend_filter.lower().replace("-", "_").split("_")
if len(parts) >= 2 and parts[0] in ("sma", "ema"):
period = int(parts[1])
else:
return signal # unknown filter, skip
if parts[0] == "sma":
trend_line = close.rolling(period).mean()
else:
trend_line = close.ewm(span=period, adjust=False).mean()
s = signal.copy()
# Longs only above trend, shorts only below
s[(s > 0) & (close < trend_line)] = 0.0
s[(s < 0) & (close > trend_line)] = 0.0
return s
# ── run_backtest_v2: framework-owned SL/TP/cooldown/position management ──
def run_backtest_v2(signal, close, high, low, config, capital=10000, cost=2e-5):
"""Backtest with SL/TP/cooldown/direction handling built into the engine.
Unlike run_backtest (v1), this function handles position exits internally.
Returns: same dict shape as run_backtest()
"""
stop_loss = config.get("stop_loss")
take_profit = config.get("take_profit")
cooldown = config.get("cooldown", 0)
on_opposite = config.get("on_opposite", "reverse")
sig_arr = signal.values
close_arr = close.values
high_arr = high.values
low_arr = low.values
idx = signal.index
n = len(close_arr)
trade_returns = []
long_returns = []
short_returns = []
trade_log = []
equity_vals = np.full(n, float(capital))
cumret = 1.0
position = 0.0 # current direction: 1.0, -1.0, or 0.0 (flat)
entry_price = None
entry_bar = None # index into arrays for entry time
cooldown_remaining = 0
def _log_trade(exit_bar, exit_px, ret, reason):
trade_log.append({
"type": "Buy" if position == 1.0 else "Sell",
"entry_time": str(idx[entry_bar]),
"exit_time": str(idx[exit_bar]),
"entry_price": round(entry_price, 5),
"exit_price": round(exit_px, 5),
"pnl": round(position * (exit_px - entry_price), 5),
"pnl_pct": round(ret * 100, 3),
"exit_reason": reason,
})
for i in range(n):
c = close_arr[i]
h = high_arr[i]
lo = low_arr[i]
s = sig_arr[i]
# 1. Check SL/TP if in trade
if position != 0.0 and entry_price is not None:
hit_sl = False
hit_tp = False
exit_price = None
if position == 1.0: # long
if stop_loss is not None and lo <= entry_price * (1 - stop_loss):
hit_sl = True
exit_price = entry_price * (1 - stop_loss)
elif take_profit is not None and h >= entry_price * (1 + take_profit):
hit_tp = True
exit_price = entry_price * (1 + take_profit)
else: # short
if stop_loss is not None and h >= entry_price * (1 + stop_loss):
hit_sl = True
exit_price = entry_price * (1 + stop_loss)
elif take_profit is not None and lo <= entry_price * (1 - take_profit):
hit_tp = True
exit_price = entry_price * (1 - take_profit)
if hit_sl or hit_tp:
ret = float(position * (exit_price - entry_price) / entry_price - cost)
trade_returns.append(ret)
if position == 1.0:
long_returns.append(ret)
else:
short_returns.append(ret)
_log_trade(i, exit_price, ret, "SL" if hit_sl else "TP")
cumret *= (1 + ret)
position = 0.0
entry_price = None
entry_bar = None
cooldown_remaining = cooldown
equity_vals[i] = capital * cumret
continue
# 2. Cooldown
if cooldown_remaining > 0:
cooldown_remaining -= 1
equity_vals[i] = capital * cumret
continue
# 3. Signal processing
if s != 0.0:
if position == 0.0:
# Open new trade
position = s
entry_price = c
entry_bar = i
elif s != position:
# Opposite signal
if on_opposite == "reverse":
# Close current + open opposite
ret = float(position * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if position == 1.0:
long_returns.append(ret)
else:
short_returns.append(ret)
_log_trade(i, c, ret, "signal")
cumret *= (1 + ret)
position = s
entry_price = c
entry_bar = i
else: # close_only
# Close current, go flat
ret = float(position * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if position == 1.0:
long_returns.append(ret)
else:
short_returns.append(ret)
_log_trade(i, c, ret, "close_only")
cumret *= (1 + ret)
position = 0.0
entry_price = None
entry_bar = None
cooldown_remaining = cooldown
equity_vals[i] = capital * cumret
# Close last open trade at final close
if position != 0.0 and entry_price is not None and n > 0 and entry_price != 0:
c = close_arr[-1]
ret = float(position * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if position == 1.0:
long_returns.append(ret)
else:
short_returns.append(ret)
_log_trade(n - 1, c, ret, "end")
cumret *= (1 + ret)
equity_vals[-1] = capital * cumret
# Bar returns for Sharpe (approximate)
bar_returns = np.zeros(n)
for i in range(1, n):
if close_arr[i - 1] != 0 and sig_arr[i - 1] != 0:
bar_returns[i] = sig_arr[i - 1] * (close_arr[i] - close_arr[i - 1]) / close_arr[i - 1]
return {
"equity": pd.Series(equity_vals, index=close.index),
"trade_returns": trade_returns,
"long_returns": long_returns,
"short_returns": short_returns,
"bar_returns": bar_returns,
"trade_log": trade_log,
}
# ── run_strategy: the v2 orchestrator ────────────────────────────────────
def run_strategy(feature_fn, config_fn, data_path, start_date="", end_date="",
validation_date="", train_split=0.7, register_model_fn=None):
"""Config-driven strategy execution. Claude writes feature_fn + config_fn,
framework does everything else.
Returns: results dict (same format as webapp expects)
"""
config = config_fn()
# Auto-correct SL/TP if Claude passed percentage instead of decimal
for _key in ("stop_loss", "take_profit"):
_val = config.get(_key)
if _val is not None and _val > 0.1: # >10% is almost certainly a percentage
config[_key] = _val / 100.0
print(f"[strategy] Auto-corrected {_key}: {_val} -> {config[_key]} (was percentage, converted to decimal)")
# 1. Load data
df, close, open_, high, low = load_ohlc(data_path, start_date, end_date)
# 2. Feature engineering (Claude's function)
df = feature_fn(df, close, open_, high, low)
close = df["close"]
open_ = df["open"]
high = df["high"]
low = df["low"]
# 3. Warm-up detection: drop rows where features have NaN BEFORE any fill
feature_cols = [c for c in df.columns if c not in ("open", "high", "low", "close")]
raw_nans = df[feature_cols].isna().any(axis=1)
valid_rows = ~raw_nans
if valid_rows.any():
first_valid = valid_rows.idxmax()
if raw_nans.loc[:first_valid].any():
df = df.loc[first_valid:].copy()
close = df["close"]
open_ = df["open"]
high = df["high"]
low = df["low"]
# 4. Target
horizon = config.get("target_horizon", 4)
target = make_target(close, horizon=horizon)
# 5. Split (ffill only within each partition — no bfill leak)
mask = target.notna()
df = df[mask].copy()
target = target[mask]
close = df["close"]
high = df["high"]
low = df["low"]
X = df[feature_cols].copy()
X = X.replace([np.inf, -np.inf], np.nan)
if validation_date:
split_idx = len(df[df.index <= validation_date])
else:
split_idx = int(len(df) * train_split)
split_idx = max(1, min(split_idx, len(df) - 1))
# ffill within train and test separately (no leak)
X_train = X.iloc[:split_idx].ffill().fillna(0.0)
X_test = X.iloc[split_idx:].ffill().fillna(0.0)
X = pd.concat([X_train, X_test])
y_train = target.iloc[:split_idx]
y_test = target.iloc[split_idx:]
close_train = close.iloc[:split_idx]
close_test = close.iloc[split_idx:]
high_test = high.iloc[split_idx:]
low_test = low.iloc[split_idx:]
enc = LabelEncoder()
enc.fit([-1, 0, 1])
y_train_enc = enc.transform(y_train)
y_test_enc = enc.transform(y_test)
split_dt = str(df.index[split_idx])
sp = {
"df": df, "X_train": X_train, "X_test": X_test,
"y_train": y_train, "y_test": y_test,
"y_train_enc": y_train_enc, "y_test_enc": y_test_enc,
"enc": enc,
"close": close, "close_train": close_train, "close_test": close_test,
"split_idx": split_idx, "split_dt": split_dt,
"n_train": len(X_train), "n_test": len(X_test),
}
# 6. Build model from config
model = _build_model_from_config(config, X_train, y_train_enc)
# 7. Generate signals
threshold = config.get("signal_threshold", 0.55)
signal_train, p_pos_train, p_neg_train = _generate_signals(model, X_train, threshold)
signal_test, p_pos_test, p_neg_test = _generate_signals(model, X_test, threshold)
# 8. Apply filters (order: direction → session → ATR → trend)
direction = config.get("direction", "both")
signal_test = _apply_direction_filter(signal_test, direction)
signal_train = _apply_direction_filter(signal_train, direction)
session_filter = config.get("session_filter")
signal_test = _apply_session_filter(signal_test, signal_test.index, session_filter)
signal_train = _apply_session_filter(signal_train, signal_train.index, session_filter)
min_atr = config.get("min_atr")
if min_atr is not None:
signal_test = _apply_atr_filter(signal_test, close_test, high_test, low_test, min_atr)
trend_filter = config.get("trend_filter")
if trend_filter is not None:
signal_test = _apply_trend_filter(signal_test, close_test, trend_filter)
signal_full = pd.concat([signal_train, signal_test])
# 9. Backtest with SL/TP/cooldown (test + train)
high_train = high.iloc[:split_idx]
low_train = low.iloc[:split_idx]
has_risk = (config.get("stop_loss") is not None or
config.get("take_profit") is not None or
config.get("cooldown", 0) > 0 or
config.get("on_opposite", "reverse") != "reverse")
if has_risk:
bt = run_backtest_v2(signal_test, close_test, high_test, low_test, config, capital=10000)
bt_train = run_backtest_v2(signal_train, close_train, high_train, low_train, config, capital=10000)
else:
bt = run_backtest(signal_test, close_test, capital=10000)
bt_train = run_backtest(signal_train, close_train, capital=10000)
# 10. Metrics
metrics = compute_metrics(bt, close_test, capital=10000)
# 11. Pre-compute all trade stats (single source of truth)
pre_stats = {
"train_stats": compute_trade_stats(bt_train.get("trade_returns", []), capital=10000),
"test_stats": compute_trade_stats(bt.get("trade_returns", []), capital=10000),
"long_stats": compute_trade_stats(bt.get("long_returns", []), capital=10000),
"short_stats": compute_trade_stats(bt.get("short_returns", []), capital=10000),
}
# 12. Register model
if register_model_fn is not None:
register_model_fn(model)
# 13. Build return dict
return build_return_dict(sp, bt, metrics, model, feature_cols,
signal_full, p_pos_test, p_neg_test, custom_figs=[],
bt_train_result=bt_train, pre_stats=pre_stats)
# ── End strategy_utils ──
DATA_PATH = '/root/Desktop/QuantifyMe/data/ohlc/AUDUSD_15min.parquet'
START_DATE = '2026-04-15'
END_DATE = '2026-05-25'
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── Bollinger Bands (20, 2) ──────────────────────────────────────────────
bb_period = 20
bb_std = 2.0
bb_mid = close.rolling(bb_period).mean()
bb_sigma = close.rolling(bb_period).std(ddof=0)
bb_upper = bb_mid + bb_std * bb_sigma
bb_lower = bb_mid - bb_std * bb_sigma
df["bb_mid"] = bb_mid
df["bb_upper"] = bb_upper
df["bb_lower"] = bb_lower
# %B — position of close within the band (0 = lower, 1 = upper)
bb_range = bb_upper - bb_lower
df["bb_pct_b"] = np.where(bb_range > 0, (close - bb_lower) / bb_range, 0.5)
# Bandwidth — normalised band width (regime filter)
df["bb_bandwidth"] = np.where(bb_mid > 0, bb_range / bb_mid, 0.0)
# Distance from each band (signed, normalised by sigma)
df["dist_lower"] = np.where(bb_sigma > 0, (close - bb_lower) / bb_sigma, 0.0)
df["dist_upper"] = np.where(bb_sigma > 0, (bb_upper - close) / bb_sigma, 0.0)
df["dist_mid"] = np.where(bb_sigma > 0, (close - bb_mid) / bb_sigma, 0.0)
# Below lower band flag
df["below_lower"] = np.where(close < bb_lower, 1, 0)
# Above upper band flag
df["above_upper"] = np.where(close > bb_upper, 1, 0)
# ── RSI (14) ─────────────────────────────────────────────────────────────
rsi_period = 14
delta = close.diff()
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_gain = gain.ewm(com=rsi_period - 1, min_periods=rsi_period).mean()
avg_loss = loss.ewm(com=rsi_period - 1, min_periods=rsi_period).mean()
rs = np.where(avg_loss > 0, avg_gain / avg_loss, 100.0)
rsi = 100.0 - 100.0 / (1.0 + rs)
df["rsi"] = rsi
# RSI-derived flags and distances
df["rsi_oversold"] = np.where(rsi < 35, 1, 0)
df["rsi_overbought"] = np.where(rsi > 65, 1, 0)
df["rsi_dist_35"] = rsi - 35.0 # negative when oversold
df["rsi_dist_65"] = rsi - 65.0 # positive when overbought
df["rsi_norm"] = (rsi - 50.0) / 50.0 # centred, ±1 range
# ── Core entry condition features ────────────────────────────────────────
# Buy setup: close < lower BB AND RSI < 35
df["long_setup"] = np.where((close < bb_lower) & (rsi < 35), 1, 0)
# Sell setup: close > upper BB AND RSI > 65
df["short_setup"] = np.where((close > bb_upper) & (rsi > 65), 1, 0)
# ── ATR (14) — volatility context ────────────────────────────────────────
atr_period = 14
hl = high - low
hc = (high - close.shift(1)).abs()
lc = (low - close.shift(1)).abs()
tr = pd.concat([hl, hc, lc], axis=1).max(axis=1)
atr = tr.ewm(com=atr_period - 1, min_periods=atr_period).mean()
df["atr"] = atr
df["natr"] = np.where(close > 0, atr / close, 0.0)
# ── Momentum / Rate-of-Change ─────────────────────────────────────────────
for n in [1, 3, 5, 10]:
df[f"roc_{n}"] = np.where(
close.shift(n) > 0,
(close - close.shift(n)) / close.shift(n),
0.0
)
# ── EMA trend context (fast / slow) ──────────────────────────────────────
ema_fast = close.ewm(span=9, min_periods=9).mean()
ema_slow = close.ewm(span=21, min_periods=21).mean()
df["ema_fast"] = ema_fast
df["ema_slow"] = ema_slow
df["ema_diff"] = np.where(ema_slow > 0, (ema_fast - ema_slow) / ema_slow, 0.0)
df["ema_bull"] = np.where(ema_fast > ema_slow, 1, 0)
# SMA-50 trend filter helper (used by framework trend_filter)
df["sma_50"] = close.rolling(50).mean()
# ── Candle body & wick features ───────────────────────────────────────────
body = (close - open_).abs()
candle_rng = (high - low).replace(0, np.nan)
df["body_ratio"] = (body / candle_rng).fillna(0.0)
df["upper_wick"] = np.where(candle_rng.notna(), (high - close.clip(lower=open_)) / candle_rng.fillna(1), 0.0)
df["lower_wick"] = np.where(candle_rng.notna(), (close.clip(upper=open_) - low) / candle_rng.fillna(1), 0.0)
df["bull_candle"] = np.where(close > open_, 1, 0)
# ── Volume-like proxy — true range z-score ────────────────────────────────
tr_mean = tr.rolling(20).mean()
tr_std = tr.rolling(20).std(ddof=0).replace(0, np.nan)
df["tr_zscore"] = ((tr - tr_mean) / tr_std).fillna(0.0)
# ── Lagged RSI and %B (1, 2, 3 bars back) ────────────────────────────────
for lag in [1, 2, 3]:
df[f"rsi_lag{lag}"] = df["rsi"].shift(lag)
df[f"bb_pct_b_lag{lag}"] = df["bb_pct_b"].shift(lag)
# ── RSI slope ────────────────────────────────────────────────────────────
df["rsi_slope3"] = df["rsi"] - df["rsi"].shift(3)
# ── Mean-reversion proximity: how far price is from middle band ───────────
df["pct_to_mid"] = np.where(close > 0, (bb_mid - close) / close, 0.0)
# ── Fill any NaNs from warm-up ────────────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "BB Mean-Reversion + RSI Oversold/Overbought (XGBoost)",
"model_type": "XGBClassifier",
"model_params": {
"n_estimators": 500,
"max_depth": 4,
"learning_rate": 0.03,
"subsample": 0.75,
"colsample_bytree": 0.70,
"min_child_weight": 5,
"gamma": 0.1,
"reg_alpha": 0.05,
"reg_lambda": 1.5,
"objective": "binary:logistic",
"random_state": 42,
"n_jobs": -1,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.0010,
"take_profit": 0.0020,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [7, 17],
"min_atr": 0.00005,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximize Sharpe ratio by exploiting Bollinger Band mean-reversion "
"with RSI confirmation. Entry conditions (close < lower BB, RSI < 35 "
"for longs; close > upper BB, RSI > 65 for shorts) are encoded as "
"features together with momentum, ATR volatility, candle structure, "
"and lagged indicators. XGBoost with strong regularisation "
"(reg_lambda=1.5, gamma=0.1, min_child_weight=5) and a low learning "
"rate avoids overfitting on the 6-week window. Session filter "
"[7,17] UTC targets liquid London/NY overlap, reducing noise. "
"TP:SL ratio of 2:1 supports positive expected value even at "
"moderate win rates, pushing Sharpe higher."
),
"notes": (
"Features: %B position, RSI (raw + flags + slope + lags), "
"EMA cross, ATR/NATR, ROC(1/3/5/10), candle body/wick ratios, "
"TR z-score, distance-to-midband, long/short setup flags. "
"Round-trip cost ~2e-5 is implicitly absorbed by the 10-pip TP target. "
"Cooldown=0 allows immediate re-entry after mean-reversion completes."
),
}
# ── Framework v2: auto-generated wrapper ──
def train_and_backtest():
_vd = VALIDATION_DATE if 'VALIDATION_DATE' in globals() else ''
_ts = TRAIN_SPLIT if 'TRAIN_SPLIT' in globals() else 0.7
return run_strategy(
feature_engineering, strategy_config,
DATA_PATH, START_DATE, END_DATE,
_vd, _ts,
register_model_fn=register_model
)
|
||||||||||
|
1.06
|
NZD/USD Stoch+BB+RSI Gradient Boosting Mean-Revert
Maximise risk-adjusted return (Sharpe / Calmar) on NZD/USD 15-min. GradientBoostingClassifier selected for its strong generalisation on stru…
|
C
@candle_owl
|
NZDUSD | 15min | 59.1%61.0% | +3.83%+6.88% | 1.101.24 | 4.90%4.90% | 38159 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 01:54:56
# Model : Gradient Boosting
# Feature Eng. : BB (20,2.0), RSI 14, Stochastic (14,3) + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/NZDUSD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── Bollinger Bands (20, 2) ──────────────────────────────────────────────
bb_period = 20
bb_std = 2.0
bb_mid = close.rolling(bb_period).mean()
bb_std_ = close.rolling(bb_period).std(ddof=0)
bb_upper = bb_mid + bb_std * bb_std_
bb_lower = bb_mid - bb_std * bb_std_
df["bb_mid"] = bb_mid
df["bb_upper"] = bb_upper
df["bb_lower"] = bb_lower
df["bb_width"] = (bb_upper - bb_lower) / bb_mid
df["bb_pct"] = (close - bb_lower) / (bb_upper - bb_lower)
# ── RSI (14) ─────────────────────────────────────────────────────────────
rsi_period = 14
delta = close.diff()
gain = delta.clip(lower=0)
loss = -delta.clip(upper=0)
avg_gain = gain.ewm(com=rsi_period - 1, min_periods=rsi_period).mean()
avg_loss = loss.ewm(com=rsi_period - 1, min_periods=rsi_period).mean()
rs = avg_gain / avg_loss.replace(0, np.nan)
df["rsi"] = 100 - (100 / (1 + rs))
# ── Stochastic Oscillator (K=14, D=3) ────────────────────────────────────
stoch_k_period = 14
stoch_d_period = 3
lowest_low = low.rolling(stoch_k_period).min()
highest_high = high.rolling(stoch_k_period).max()
stoch_range = (highest_high - lowest_low).replace(0, np.nan)
df["stoch_k"] = 100 * (close - lowest_low) / stoch_range
df["stoch_d"] = df["stoch_k"].rolling(stoch_d_period).mean()
df["stoch_kd_diff"] = df["stoch_k"] - df["stoch_d"]
# ── ATR (14) — for normalised volatility / min_atr filter ────────────────
atr_period = 14
tr = pd.concat([
high - low,
(high - close.shift(1)).abs(),
(low - close.shift(1)).abs()
], axis=1).max(axis=1)
atr = tr.ewm(com=atr_period - 1, min_periods=atr_period).mean()
df["atr"] = atr
df["natr"] = atr / close # normalised ATR used by min_atr filter
# ── Price momentum / rate-of-change ──────────────────────────────────────
df["roc_4"] = close.pct_change(4) # 1-hour momentum on 15-min bars
df["roc_8"] = close.pct_change(8) # 2-hour momentum
df["roc_16"] = close.pct_change(16) # 4-hour momentum
# ── EMA trend context ─────────────────────────────────────────────────────
df["ema_20"] = close.ewm(span=20, adjust=False).mean()
df["ema_50"] = close.ewm(span=50, adjust=False).mean()
df["ema_100"] = close.ewm(span=100, adjust=False).mean()
df["sma_50"] = close.rolling(50).mean() # used by trend_filter
df["ema_cross_20_50"] = df["ema_20"] - df["ema_50"]
df["ema_cross_50_100"] = df["ema_50"] - df["ema_100"]
df["close_vs_ema20"] = (close - df["ema_20"]) / df["ema_20"]
# ── Candlestick body / wick features ─────────────────────────────────────
df["body"] = (close - open_).abs()
df["candle_dir"] = np.where(close >= open_, 1.0, -1.0)
df["upper_wick"] = high - pd.concat([close, open_], axis=1).max(axis=1)
df["lower_wick"] = pd.concat([close, open_], axis=1).min(axis=1) - low
df["body_ratio"] = df["body"] / (high - low).replace(0, np.nan)
# ── Volume-proxy: realised range rolling stats ────────────────────────────
df["hl_range"] = high - low
df["hl_range_ma8"] = df["hl_range"].rolling(8).mean()
df["hl_range_ratio"]= df["hl_range"] / df["hl_range_ma8"]
# ── RSI derived signals ───────────────────────────────────────────────────
df["rsi_overbought"] = np.where(df["rsi"] > 70, 1.0, 0.0)
df["rsi_oversold"] = np.where(df["rsi"] < 30, 1.0, 0.0)
df["rsi_momentum"] = df["rsi"].diff(4)
# ── Stochastic derived signals ────────────────────────────────────────────
df["stoch_overbought"] = np.where(df["stoch_k"] > 80, 1.0, 0.0)
df["stoch_oversold"] = np.where(df["stoch_k"] < 20, 1.0, 0.0)
# ── BB squeeze: width vs rolling mean of width ────────────────────────────
df["bb_width_ma20"] = df["bb_width"].rolling(20).mean()
df["bb_squeeze"] = np.where(df["bb_width"] < df["bb_width_ma20"], 1.0, 0.0)
# ── Interaction features ──────────────────────────────────────────────────
df["rsi_bb_pct"] = df["rsi"] * df["bb_pct"]
df["stoch_k_bb_pct"] = df["stoch_k"] * df["bb_pct"]
df["rsi_stoch_diff"] = df["rsi"] - df["stoch_k"]
# ── Lagged features (avoids look-ahead) ──────────────────────────────────
for lag in [1, 2, 3, 4]:
df[f"rsi_lag{lag}"] = df["rsi"].shift(lag)
df[f"stoch_k_lag{lag}"] = df["stoch_k"].shift(lag)
df[f"bb_pct_lag{lag}"] = df["bb_pct"].shift(lag)
df[f"roc4_lag{lag}"] = df["roc_4"].shift(lag)
# ── Fill NaN from warm-up ─────────────────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "NZD/USD Stoch+BB+RSI Gradient Boosting Mean-Revert",
"model_type": "GradientBoostingClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.8,
"min_samples_leaf": 20,
"max_features": "sqrt",
"validation_fraction": 0.1,
"n_iter_no_change": 30,
"tol": 1e-4,
"random_state": 42,
},
"signal_threshold": 0.56,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [7, 20],
"min_atr": 0.0002,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximise risk-adjusted return (Sharpe / Calmar) on NZD/USD 15-min. "
"GradientBoostingClassifier selected for its strong generalisation on "
"structured tabular data with noisy financial features. n_estimators=400 "
"with early stopping (n_iter_no_change=30) prevents overfitting. "
"max_depth=4 keeps trees shallow to reduce variance. subsample=0.8 + "
"max_features=sqrt add stochasticity for robustness. SL 0.5% / TP 1.0% "
"gives a minimum 2:1 reward-risk ratio. Session filter 07-20 UTC covers "
"Sydney open through NY overlap, maximising NZD/USD liquidity. "
"Reverse on opposite signal keeps the model continuously positioned in "
"the highest-confidence direction. min_atr filter avoids flat/illiquid "
"periods where the model edges degrade."
),
"notes": (
"Features: Bollinger Bands (20,2) width & %B, RSI(14), Stochastic K/D "
"(14,3), ATR(14)/NATR, EMA cross (20/50/100), SMA50 trend context, "
"price ROC (4/8/16 bars), candlestick body/wick ratios, HL range "
"normalisation, BB squeeze flag, RSI/Stoch overbought-oversold flags, "
"interaction terms (RSI*%B, StochK*%B), and 4 lags each of RSI, StochK, "
"%B and ROC4. Threshold 0.56 slightly above 0.50 to filter marginal "
"signals without sacrificing too many trades."
),
}
|
||||||||||
|
0.91
|
NZD/USD EMA Cross + ATR Gradient Boosting
Maximize risk-adjusted return (Sharpe / Calmar). GradientBoostingClassifier with moderate depth (4) and low learning rate (0.03) to reduce o…
|
E
@elastic-moose-350
|
NZDUSD | 15min | 63.3%60.2% | +9.88%+4.50% | 1.181.16 | 3.44%3.44% | 712103 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 03:16:00
# Model : Gradient Boosting
# Feature Eng. : EMA (50,200), ATR 14 + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/NZDUSD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── EMA 50 and EMA 200 ──────────────────────────────────────────────────
ema_50 = close.ewm(span=50, adjust=False).mean()
ema_200 = close.ewm(span=200, adjust=False).mean()
df["ema_50"] = ema_50
df["ema_200"] = ema_200
df["dm_ema_50"] = (close - ema_50) / ema_50
df["dm_ema_200"] = (close - ema_200) / ema_200
# EMA cross signal: ema_50 vs ema_200
df["ema_cross"] = df["ema_50"] - df["ema_200"]
# Cross direction: +1 when ema_50 > ema_200, -1 otherwise
df["ema_cross_sign"] = np.where(df["ema_cross"] > 0, 1.0, -1.0)
# Cross event: 1 when cross just happened (sign flip)
prev_cross = df["ema_cross"].shift(1)
df["ema_cross_event"] = np.where(
(df["ema_cross"] * prev_cross) < 0, 1.0, 0.0
)
# ── ATR 14 ──────────────────────────────────────────────────────────────
prev_close = close.shift(1)
tr = pd.concat([
high - low,
(high - prev_close).abs(),
(low - prev_close).abs()
], axis=1).max(axis=1)
atr = tr.ewm(span=14, adjust=False).mean()
df["atr"] = atr
df["natr"] = atr / close
# ── RSI 14 ──────────────────────────────────────────────────────────────
delta = close.diff()
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_gain = gain.ewm(span=14, adjust=False).mean()
avg_loss = loss.ewm(span=14, adjust=False).mean()
rs = avg_gain / (avg_loss + 1e-10)
rsi = 100 - (100 / (1 + rs))
df["rsi_14"] = rsi
df["rsi_norm"] = (rsi - 50) / 50 # centred and scaled
# ── MACD ────────────────────────────────────────────────────────────────
ema_12 = close.ewm(span=12, adjust=False).mean()
ema_26 = close.ewm(span=26, adjust=False).mean()
macd = ema_12 - ema_26
signal = macd.ewm(span=9, adjust=False).mean()
df["macd"] = macd
df["macd_signal"] = signal
df["macd_hist"] = macd - signal
df["macd_norm"] = macd / close
df["macd_hist_norm"] = (macd - signal) / close
# ── Bollinger Bands (20, 2) ──────────────────────────────────────────────
sma_20 = close.rolling(20).mean()
std_20 = close.rolling(20).std()
bb_upper = sma_20 + 2 * std_20
bb_lower = sma_20 - 2 * std_20
bb_width = (bb_upper - bb_lower) / (sma_20 + 1e-10)
bb_pos = (close - bb_lower) / (bb_upper - bb_lower + 1e-10)
df["bb_width"] = bb_width
df["bb_pos"] = bb_pos
# ── Momentum & Rate-of-Change ────────────────────────────────────────────
df["mom_4"] = close.pct_change(4)
df["mom_8"] = close.pct_change(8)
df["mom_16"] = close.pct_change(16)
# ── Rolling volatility (realised vol over 20 bars) ──────────────────────
log_ret = np.log(close / close.shift(1))
df["rvol_20"] = log_ret.rolling(20).std()
# ── Stochastic Oscillator (14) ───────────────────────────────────────────
low_14 = low.rolling(14).min()
high_14 = high.rolling(14).max()
stoch_k = 100 * (close - low_14) / (high_14 - low_14 + 1e-10)
stoch_d = stoch_k.rolling(3).mean()
df["stoch_k"] = stoch_k
df["stoch_d"] = stoch_d
df["stoch_diff"] = stoch_k - stoch_d
# ── Candle body / range features ────────────────────────────────────────
df["body"] = (close - open_).abs() / (high - low + 1e-10)
df["upper_wick"] = (high - close.clip(lower=open_)) / (high - low + 1e-10)
df["lower_wick"] = (close.clip(upper=open_) - low) / (high - low + 1e-10)
df["bar_dir"] = np.where(close > open_, 1.0, -1.0)
# ── Price position relative to EMAs ─────────────────────────────────────
df["close_vs_ema50_sign"] = np.where(close > ema_50, 1.0, -1.0)
df["close_vs_ema200_sign"] = np.where(close > ema_200, 1.0, -1.0)
# ── Lagged features (1-bar and 2-bar lags on key signals) ───────────────
for col in ["rsi_norm", "macd_hist_norm", "mom_4", "ema_cross", "natr", "bb_pos"]:
df[f"{col}_lag1"] = df[col].shift(1)
df[f"{col}_lag2"] = df[col].shift(2)
# ── Fill NaN from warm-up ────────────────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "NZD/USD EMA Cross + ATR Gradient Boosting",
"model_type": "GradientBoostingClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.03,
"subsample": 0.8,
"min_samples_leaf": 20,
"max_features": "sqrt",
"validation_fraction": 0.1,
"n_iter_no_change": 30,
"tol": 1e-4,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.01,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": None,
"min_atr": None,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximize risk-adjusted return (Sharpe / Calmar). "
"GradientBoostingClassifier with moderate depth (4) and low learning rate (0.03) "
"to reduce overfitting on 15-min NZD/USD. SL=0.5%, TP=1.0% gives 1:2 RR. "
"EMA 50/200 cross is the primary trend feature; ATR normalises volatility context. "
"Supplementary RSI, MACD, Bollinger, Stochastic and candle-body features capture "
"momentum and mean-reversion signals. Early stopping via n_iter_no_change guards "
"against overfit on the training partition."
),
"notes": (
"target_horizon=4 (1 hour) matches typical intraday swing on NZD/USD. "
"reverse on opposite signal keeps the model responsive during trending regimes. "
"No session filter applied — NZD/USD has reasonable liquidity around the clock. "
"min_samples_leaf=20 and subsample=0.8 add regularisation without grid search."
),
}
|
||||||||||
|
0.86
|
EUR/USD Stoch+BB+RSI Gradient Boosting Mean-Rev
Maximize risk-adjusted return (Sharpe/Calmar) on EUR/USD 15-min data. GradientBoostingClassifier chosen for strong out-of-bag regularisation…
|
E
@echo-quanta-127
|
EURUSD | 15min | 61.2%53.2% | +1.02%+3.69% | 1.061.14 | 2.59%2.59% | 21447 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 02:33:17
# Model : Gradient Boosting
# Feature Eng. : BB (20,2.0), RSI 14, Stochastic (14,3) + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/EURUSD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── Bollinger Bands (20, 2) ──────────────────────────────────────────────
bb_period = 20
bb_std = 2.0
bb_mid = close.rolling(bb_period).mean()
bb_std_v = close.rolling(bb_period).std(ddof=0)
bb_upper = bb_mid + bb_std * bb_std_v
bb_lower = bb_mid - bb_std * bb_std_v
df["bb_mid"] = bb_mid
df["bb_upper"] = bb_upper
df["bb_lower"] = bb_lower
df["bb_width"] = (bb_upper - bb_lower) / bb_mid
df["bb_pct"] = (close - bb_lower) / (bb_upper - bb_lower)
# ── RSI 14 ───────────────────────────────────────────────────────────────
rsi_period = 14
delta = close.diff()
gain = delta.clip(lower=0)
loss = -delta.clip(upper=0)
avg_gain = gain.ewm(com=rsi_period - 1, min_periods=rsi_period).mean()
avg_loss = loss.ewm(com=rsi_period - 1, min_periods=rsi_period).mean()
rs = avg_gain / avg_loss.replace(0, np.nan)
df["rsi"] = 100 - (100 / (1 + rs))
# ── Stochastic Oscillator (K=14, D=3) ────────────────────────────────────
stoch_k_period = 14
stoch_d_period = 3
lowest_low = low.rolling(stoch_k_period).min()
highest_high = high.rolling(stoch_k_period).max()
range_hl = (highest_high - lowest_low).replace(0, np.nan)
df["stoch_k"] = 100 * (close - lowest_low) / range_hl
df["stoch_d"] = df["stoch_k"].rolling(stoch_d_period).mean()
df["stoch_kd_diff"] = df["stoch_k"] - df["stoch_d"]
# ── ATR (14) ──────────────────────────────────────────────────────────────
atr_period = 14
prev_close = close.shift(1)
tr = pd.concat([
high - low,
(high - prev_close).abs(),
(low - prev_close).abs()
], axis=1).max(axis=1)
df["atr"] = tr.ewm(com=atr_period - 1, min_periods=atr_period).mean()
df["natr"] = df["atr"] / close
# ── SMA filters ──────────────────────────────────────────────────────────
df["sma_20"] = close.rolling(20).mean()
df["sma_50"] = close.rolling(50).mean()
df["sma_200"] = close.rolling(200).mean()
df["price_vs_sma50"] = close / df["sma_50"] - 1
df["price_vs_sma200"] = close / df["sma_200"] - 1
# ── EMA cross ────────────────────────────────────────────────────────────
ema_fast = close.ewm(span=8, adjust=False).mean()
ema_slow = close.ewm(span=21, adjust=False).mean()
df["ema_cross"] = ema_fast - ema_slow
# ── MACD ─────────────────────────────────────────────────────────────────
ema12 = close.ewm(span=12, adjust=False).mean()
ema26 = close.ewm(span=26, adjust=False).mean()
macd_line = ema12 - ema26
macd_signal = macd_line.ewm(span=9, adjust=False).mean()
df["macd"] = macd_line
df["macd_sig"] = macd_signal
df["macd_hist"] = macd_line - macd_signal
# ── Momentum / Rate-of-change ────────────────────────────────────────────
df["roc_4"] = close.pct_change(4)
df["roc_8"] = close.pct_change(8)
df["roc_16"] = close.pct_change(16)
# ── Candle features ───────────────────────────────────────────────────────
df["candle_body"] = (close - open_) / close
df["candle_range"] = (high - low) / close
df["upper_shadow"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / close
df["lower_shadow"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / close
# ── Volatility regime ─────────────────────────────────────────────────────
df["vol_ratio"] = df["atr"] / df["atr"].rolling(50).mean()
# ── RSI regime bins (np.where instead of pd.cut) ─────────────────────────
df["rsi_oversold"] = np.where(df["rsi"] < 30, 1, 0)
df["rsi_overbought"]= np.where(df["rsi"] > 70, 1, 0)
df["rsi_mid"] = np.where((df["rsi"] >= 40) & (df["rsi"] <= 60), 1, 0)
# ── Stochastic regime bins ────────────────────────────────────────────────
df["stoch_oversold"] = np.where(df["stoch_k"] < 20, 1, 0)
df["stoch_overbought"] = np.where(df["stoch_k"] > 80, 1, 0)
# ── BB regime bins ────────────────────────────────────────────────────────
df["bb_squeeze"] = np.where(df["bb_width"] < df["bb_width"].rolling(50).quantile(0.20), 1, 0)
df["bb_expansion"] = np.where(df["bb_width"] > df["bb_width"].rolling(50).quantile(0.80), 1, 0)
df["price_below_bb_lower"] = np.where(close < bb_lower, 1, 0)
df["price_above_bb_upper"] = np.where(close > bb_upper, 1, 0)
# ── Volume proxy — bar range z-score ──────────────────────────────────────
range_series = high - low
range_mean = range_series.rolling(20).mean()
range_std = range_series.rolling(20).std(ddof=0)
df["range_zscore"] = (range_series - range_mean) / range_std.replace(0, np.nan)
# ── Lagged features ───────────────────────────────────────────────────────
for lag in [1, 2, 3, 4]:
df[f"rsi_lag{lag}"] = df["rsi"].shift(lag)
df[f"stoch_k_lag{lag}"] = df["stoch_k"].shift(lag)
df[f"bb_pct_lag{lag}"] = df["bb_pct"].shift(lag)
df[f"macd_hist_lag{lag}"] = df["macd_hist"].shift(lag)
# ── Interaction features ──────────────────────────────────────────────────
df["rsi_x_bb_pct"] = df["rsi"] * df["bb_pct"]
df["stoch_x_bb_pct"] = df["stoch_k"] * df["bb_pct"]
df["macd_x_ema_cross"] = df["macd_hist"] * df["ema_cross"]
df["rsi_x_stoch_kd"] = df["rsi"] * df["stoch_kd_diff"]
# ── Fill NaN from warm-up ─────────────────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "EUR/USD Stoch+BB+RSI Gradient Boosting Mean-Rev",
"model_type": "GradientBoostingClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.75,
"min_samples_leaf": 20,
"max_features": "sqrt",
"validation_fraction": 0.1,
"n_iter_no_change": 30,
"tol": 1e-4,
"random_state": 42,
},
"signal_threshold": 0.56,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [6, 18],
"min_atr": 0.0002,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximize risk-adjusted return (Sharpe/Calmar) on EUR/USD 15-min data. "
"GradientBoostingClassifier chosen for strong out-of-bag regularisation "
"via subsample=0.75 and early stopping (n_iter_no_change=30). "
"max_depth=4 limits overfitting on mean-reversion regime. "
"learning_rate=0.04 with 400 trees balances bias-variance. "
"Signal threshold 0.56 filters low-confidence signals for better precision. "
"Session filter 06-18 UTC targets London+NY overlap with highest liquidity. "
"SL=0.5%, TP=1.0% gives 1:2 R:R aligned with mean-reversion edge. "
"Target horizon=4 bars (1 hour) captures short-term mean-reversion cycles."
),
"notes": (
"Features: Stochastic(14,3), BB(20,2), RSI(14) as primary signals. "
"Supplemented by MACD, EMA cross, ATR volatility filter, candle body/shadow, "
"range z-score, lagged versions of key oscillators, and interaction terms. "
"Regime bins (oversold/overbought/squeeze/expansion) add non-linear context. "
"min_atr=0.0002 avoids trading during dead/illiquid periods."
),
}
|
||||||||||
|
0.59
|
USD/JPY Multi-MA + RSI/BB XGBoost Sharpe
Maximize Sharpe ratio on USD/JPY 1-min data using XGBoost with returns, RSI, Bollinger Bands, multiple MAs (50/100/200), MACD, ATR, and cand…
|
M
@malcolmtan
|
USD/JP | 60.7%— | +0.42%— | 1.22— | 0.53%0.53% | 84— |
|
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-08 02:08:02
# Model : XGBoost
# Feature Eng. : Auto-add features: ON
# Signal / Entry : —
# Optimization : —
# Risk Mgmt : —
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/USDJPY_1min.parquet"
START_DATE = "2026-05-04 00:00:00"
END_DATE = "2026-05-07 00:00:00"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.6993736951983298
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# --- Returns over multiple horizons ---
for n in [1, 3, 5, 10, 20]:
df[f"ret_{n}"] = close.pct_change(n)
# --- RSI 14 ---
delta = close.diff()
gain = delta.clip(lower=0)
loss = -delta.clip(upper=0)
avg_gain = gain.ewm(com=13, min_periods=14).mean()
avg_loss = loss.ewm(com=13, min_periods=14).mean()
rs = avg_gain / (avg_loss + 1e-10)
df["rsi_14"] = 100.0 - (100.0 / (1.0 + rs))
# --- RSI derived features ---
df["rsi_14_zscore"] = (df["rsi_14"] - df["rsi_14"].rolling(50).mean()) / (df["rsi_14"].rolling(50).std() + 1e-10)
df["rsi_overbought"] = np.where(df["rsi_14"] > 70, 1, 0)
df["rsi_oversold"] = np.where(df["rsi_14"] < 30, 1, 0)
# --- Bollinger Bands 20, 2 ---
bb_mid = close.rolling(20).mean()
bb_std = close.rolling(20).std()
bb_upper = bb_mid + 2.0 * bb_std
bb_lower = bb_mid - 2.0 * bb_std
df["bb_mid"] = bb_mid
df["bb_upper"] = bb_upper
df["bb_lower"] = bb_lower
df["bb_width"] = (bb_upper - bb_lower) / (bb_mid + 1e-10)
df["bb_pct_b"] = (close - bb_lower) / (bb_upper - bb_lower + 1e-10)
df["bb_above"] = np.where(close > bb_upper, 1, 0)
df["bb_below"] = np.where(close < bb_lower, 1, 0)
# --- Moving Averages ---
for w in [50, 100, 200]:
df[f"sma_{w}"] = close.rolling(w).mean()
df[f"price_vs_sma_{w}"] = (close - df[f"sma_{w}"]) / (df[f"sma_{w}"] + 1e-10)
# --- MA crossover signals ---
df["sma50_vs_sma100"] = np.where(df["sma_50"] > df["sma_100"], 1, -1)
df["sma50_vs_sma200"] = np.where(df["sma_50"] > df["sma_200"], 1, -1)
df["sma100_vs_sma200"] = np.where(df["sma_100"] > df["sma_200"], 1, -1)
# --- ATR 14 ---
tr = pd.concat([
high - low,
(high - close.shift(1)).abs(),
(low - close.shift(1)).abs()
], axis=1).max(axis=1)
df["atr_14"] = tr.ewm(com=13, min_periods=14).mean()
df["natr_14"] = df["atr_14"] / (close + 1e-10)
# --- Momentum / rate of change ---
for n in [5, 10, 20]:
df[f"mom_{n}"] = close - close.shift(n)
df[f"roc_{n}"] = (close - close.shift(n)) / (close.shift(n) + 1e-10)
# --- Volume features (if volume exists) ---
if "volume" in df.columns:
vol = df["volume"].replace(0, np.nan)
df["vol_sma_20"] = vol.rolling(20).mean()
df["vol_ratio_20"] = vol / (df["vol_sma_20"] + 1e-10)
else:
df["vol_ratio_20"] = 1.0
# --- Price spread & body features ---
df["hl_spread"] = (high - low) / (close + 1e-10)
df["body_ratio"] = (close - open_).abs() / (high - low + 1e-10)
df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / (high - low + 1e-10)
df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / (high - low + 1e-10)
df["bull_candle"] = np.where(close > open_, 1, 0)
# --- Lagged returns for autocorrelation signal ---
for lag in [1, 2, 3, 5]:
df[f"ret1_lag{lag}"] = df["ret_1"].shift(lag)
# --- Rolling volatility ---
df["vol_10"] = df["ret_1"].rolling(10).std()
df["vol_20"] = df["ret_1"].rolling(20).std()
df["vol_50"] = df["ret_1"].rolling(50).std()
# --- Z-score of close over 20 and 50 bars ---
df["zscore_20"] = (close - close.rolling(20).mean()) / (close.rolling(20).std() + 1e-10)
df["zscore_50"] = (close - close.rolling(50).mean()) / (close.rolling(50).std() + 1e-10)
# --- Relative distance of price from BB bands ---
df["dist_upper"] = (bb_upper - close) / (close + 1e-10)
df["dist_lower"] = (close - bb_lower) / (close + 1e-10)
# --- EMA 9 and 21 for short-term momentum ---
df["ema_9"] = close.ewm(span=9, min_periods=9).mean()
df["ema_21"] = close.ewm(span=21, min_periods=21).mean()
df["ema9_vs_ema21"] = np.where(df["ema_9"] > df["ema_21"], 1, -1)
df["price_vs_ema9"] = (close - df["ema_9"]) / (df["ema_9"] + 1e-10)
df["price_vs_ema21"] = (close - df["ema_21"]) / (df["ema_21"] + 1e-10)
# --- MACD-like signal ---
ema_12 = close.ewm(span=12, min_periods=12).mean()
ema_26 = close.ewm(span=26, min_periods=26).mean()
macd_line = ema_12 - ema_26
signal_line = macd_line.ewm(span=9, min_periods=9).mean()
df["macd"] = macd_line
df["macd_signal"] = signal_line
df["macd_hist"] = macd_line - signal_line
df["macd_cross"] = np.where(macd_line > signal_line, 1, -1)
# --- Fill NaN from warm-up periods ---
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "USD/JPY Multi-MA + RSI/BB XGBoost Sharpe",
"model_type": "XGBClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.75,
"colsample_bytree": 0.75,
"min_child_weight": 5,
"gamma": 0.1,
"reg_alpha": 0.1,
"reg_lambda": 1.5,
"objective": "binary:logistic",
"tree_method": "hist",
"random_state": 42,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.0008,
"take_profit": 0.0016,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "close_only",
"session_filter": None,
"min_atr": None,
"trend_filter": None,
"target_horizon": 5,
"objective": (
"Maximize Sharpe ratio on USD/JPY 1-min data using XGBoost with "
"returns, RSI, Bollinger Bands, multiple MAs (50/100/200), MACD, "
"ATR, and candle-body features. Stop-loss and take-profit set at "
"a 1:2 risk/reward to filter noise and improve Sharpe. n_estimators "
"and moderate depth balance bias-variance. Regularization (alpha/lambda) "
"reduces overfitting on short date range."
),
"notes": (
"Target horizon of 5 bars (5 minutes) is chosen to capture short-term "
"directional moves on 1-min data without excessive label noise. "
"colsample_bytree and subsample add stochasticity to reduce variance. "
"close_only on opposite signal avoids whipsaw from rapid reversals. "
"No session filter applied since USD/JPY has liquidity around the clock."
),
}
|
||||||||||