Community Scripts · QuantifyMe

Score▼	Strategy	Author	All 45 EURUSD 14 USDJPY 2 GBPUSD 5 AUDUSD 7 USDCAD 5 USDCHF 1 NZDUSD 6	All 45 15min 40	Win Rate▼	Return▼	PF▼	MDD▼	Trades▼
1.93	EMA(9/21) trend	M @malcolmtan	EMA(9/		50.0%—	+0.90%—	2.15—	0.39%0.39%	10—
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-25 02:27:36 # Model : XGBoost # Feature Eng. : go long when EMA(9) crosses above EMA(21), exit when it crosses back below + Auto-add features: ON # Signal / Entry : — # Optimization : — # Risk Mgmt : — # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd # ── Inlined strategy_utils ── """ strategy_utils.py — Standard utility functions for generated strategies. Claude imports these instead of writing boilerplate from scratch. This ensures consistent behavior across all generated strategies. """ import numpy as np import pandas as pd from sklearn.preprocessing import LabelEncoder # Max backtest window per timeframe. A finer timeframe over a longer window # blows up the results dict / parquet load / Modal train time (the 2026-05-12 # OOM was a 1-min × multi-year sweep) — and a 1-min strategy gains nothing from # 2 years of 1-min bars. Enforced HERE because every training path (UI / API / # Modal) funnels through run_strategy → load_ohlc. Env-overridable so a future # "max plan" / dedicated-server tier can lift it. _TF_MAX_DAYS = { "1min": 30, "5min": 90, "15min": 365, "1h": 730, } def _fetch_ohlc_from_internal(symbol: str, tf: str, start: str, end: str): """Phase 3.2: fetch parquet bytes from Server A's /internal/ohlc endpoint instead of reading a local file. Used inside Modal containers / Mac worker pool (Phase 3.4) so every train sees the same source of truth as the chart. Returns: pd.DataFrame (parquet decoded), or raises on any failure so the caller can fall back / surface a clear error in the job. """ import hashlib as _hashlib, hmac as _hmac, io as _io, os as _os import urllib.request as _ur, urllib.parse as _urp base = (_os.environ.get("QM_INTERNAL_OHLC_BASE") or "").rstrip("/") secret = (_os.environ.get("INTERNAL_WS_SECRET") or "").strip() if not base: raise RuntimeError("QM_INTERNAL_OHLC_BASE not set") if not secret: raise RuntimeError("INTERNAL_WS_SECRET not set") msg = f"{symbol}\|{tf}\|{start}\|{end}".encode("utf-8") sig = _hmac.new(secret.encode("utf-8"), msg, _hashlib.sha256).hexdigest() qs = _urp.urlencode({ "symbol": symbol, "tf": tf, "start": start, "end": end, "sig": sig, }) url = f"{base}/internal/ohlc?{qs}" req = _ur.Request(url, headers={"User-Agent": "qm-worker/1.0"}) with _ur.urlopen(req, timeout=30) as resp: if resp.status != 200: raise RuntimeError(f"/internal/ohlc returned {resp.status}") payload = resp.read() print(f"[load_ohlc:internal] {symbol} {tf} fetched {len(payload)} bytes", flush=True) return pd.read_parquet(_io.BytesIO(payload)) def _parse_symbol_tf_from_path(data_path: str): """Pull SYMBOL + TF out of a path like .../EURUSD_1min.parquet.""" import os as _os, re as _re base = _os.path.basename(str(data_path)) m = _re.match(r"^([A-Z]{6})_(\d+min\|\d+h)\.parquet$", base) if not m: return None, None return m.group(1), m.group(2) def load_ohlc(data_path, start_date="", end_date=""): """Load OHLC parquet, sort index, filter dates. Always returns consistent format. The lower bound is clamped per timeframe (see _TF_MAX_DAYS) — a request for more history than the cap silently starts later. Phase 3.2: when env QM_USE_INTERNAL_OHLC=="1", fetch over HTTP from Server A's /internal/ohlc endpoint instead of pd.read_parquet on a local file (which on Modal is a stale Volume snapshot). The endpoint applies the same day-cap, so the local cap-check below is a defensive no-op in that path. Flag defaults to "0" → unchanged behavior. Returns: (df, close, open_, high, low) """ import os as _os, re as _re _use_internal = _os.environ.get("QM_USE_INTERNAL_OHLC", "0") == "1" if _use_internal: _sym, _tf = _parse_symbol_tf_from_path(data_path) if not _sym or not _tf: raise RuntimeError( f"QM_USE_INTERNAL_OHLC=1 but DATA_PATH basename does not match " f"SYMBOL_TF.parquet: {data_path}" ) df = _fetch_ohlc_from_internal(_sym, _tf, start_date or "", end_date or "") else: df = pd.read_parquet(data_path) df.index = pd.to_datetime(df.index) df = df.sort_index() # Per-timeframe window cap (timeframe inferred from the parquet filename). _m = _re.search(r"_(\d+min\|\d+h)\.parquet$", _os.path.basename(str(data_path))) _tf = _m.group(1) if _m else None _max_days = _TF_MAX_DAYS.get(_tf) if _max_days and _max_days > 0 and len(df): _env_override = _os.environ.get(f"QM_MAX_DAYS_{_tf.upper()}") if _env_override and _env_override.isdigit(): _max_days = int(_env_override) try: _eff_end = pd.Timestamp(end_date) if end_date else df.index.max() _eff_end = min(_eff_end, df.index.max()) _floor = _eff_end - pd.Timedelta(days=_max_days) _req_start = pd.Timestamp(start_date) if start_date else df.index.min() if _req_start < _floor: print(f"[load_ohlc] {_tf} backtest window capped to {_max_days}d: " f"start {_req_start.date()} -> {_floor.date()}", flush=True) start_date = _floor except Exception as _e: print(f"[load_ohlc] window-cap check skipped ({_e})", flush=True) if start_date: df = df[df.index >= start_date] if end_date: df = df[df.index <= end_date] return df, df["close"], df["open"], df["high"], df["low"] def make_target(close, horizon=4): """Create target: direction N bars ahead. Default 4 bars = 1 hour on 15-min data. Returns: target (pd.Series of -1, 0, 1) """ return np.sign(close.shift(-horizon) - close) def split_data(df, target, feature_cols, train_split=0.7, validation_date=""): """Train/test split. Handles both ratio and date-based splits. Drops NaN from target before splitting. Encodes labels to [0,1,2]. Returns: dict with keys: X_train, X_test, y_train, y_test, y_train_enc, y_test_enc, enc, close_train, close_test, split_idx, split_dt, n_train, n_test """ # Drop NaN from target mask = target.notna() df = df[mask].copy() target = target[mask] close = df["close"] # Build feature matrix X = df[feature_cols].copy() X = X.bfill().ffill() X = X.replace([np.inf, -np.inf], np.nan).fillna(0.0) # Split if validation_date: split_idx = len(df[df.index <= validation_date]) else: split_idx = int(len(df) * train_split) split_idx = max(1, min(split_idx, len(df) - 1)) X_train = X.iloc[:split_idx] X_test = X.iloc[split_idx:] y_train = target.iloc[:split_idx] y_test = target.iloc[split_idx:] close_train = close.iloc[:split_idx] close_test = close.iloc[split_idx:] split_dt = str(df.index[split_idx]) # Label encoding — always fit on [-1, 0, 1] enc = LabelEncoder() enc.fit([-1, 0, 1]) y_train_enc = enc.transform(y_train) y_test_enc = enc.transform(y_test) return { "df": df, "X_train": X_train, "X_test": X_test, "y_train": y_train, "y_test": y_test, "y_train_enc": y_train_enc, "y_test_enc": y_test_enc, "enc": enc, "close": close, "close_train": close_train, "close_test": close_test, "split_idx": split_idx, "split_dt": split_dt, "n_train": len(X_train), "n_test": len(X_test), } def compute_overlays(close, df_index): """Compute BB and MA overlays on full dataset. Always consistent. Returns: (bb_dict, ma_dict) """ bb_mid = close.rolling(20).mean() bb_std = close.rolling(20).std() bb_upper = bb_mid + 2 * bb_std bb_lower = bb_mid - 2 * bb_std ma50 = close.rolling(50).mean() ma100 = close.rolling(100).mean() ma200 = close.rolling(200).mean() def _safe(s): s = s.reindex(df_index).bfill().ffill() return [float(x) if (x is not None and not np.isnan(x) and not np.isinf(x)) else None for x in s.values] bb = {"upper": _safe(bb_upper), "mid": _safe(bb_mid), "lower": _safe(bb_lower)} ma = {"ma50": _safe(ma50), "ma100": _safe(ma100), "ma200": _safe(ma200)} return bb, ma def run_backtest(signal, close, capital=10000, cost=2e-5): """Run backtest with transaction costs. Uses price-based trade returns (same as webapp _compute_trades). Signal 0 = hold (keep current position), not close. Returns: dict with equity, trade_returns, long_returns, short_returns, bar_returns """ sig_arr = signal.values price_arr = close.values idx = signal.index n = len(price_arr) # Trade returns — price-based (matches webapp _compute_trades exactly) trade_returns = [] long_returns = [] short_returns = [] trade_log = [] last_dir = None entry_price = None entry_bar = None for i in range(n): s = sig_arr[i] c = price_arr[i] if s != 0.0 and s != last_dir: # Direction change — close previous trade, open new if last_dir is not None and entry_price is not None and entry_price != 0: ret = float(last_dir * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if last_dir == 1: long_returns.append(ret) else: short_returns.append(ret) trade_log.append({ "type": "Buy" if last_dir == 1 else "Sell", "entry_time": str(idx[entry_bar]), "exit_time": str(idx[i]), "entry_price": round(entry_price, 5), "exit_price": round(c, 5), "pnl": round(last_dir * (c - entry_price), 5), "pnl_pct": round(ret * 100, 3), "exit_reason": "signal", }) entry_price = c entry_bar = i last_dir = s # Close last open trade if last_dir is not None and entry_price is not None and n > 0 and entry_price != 0: c = price_arr[-1] ret = float(last_dir * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if last_dir == 1: long_returns.append(ret) else: short_returns.append(ret) trade_log.append({ "type": "Buy" if last_dir == 1 else "Sell", "entry_time": str(idx[entry_bar]), "exit_time": str(idx[-1]), "entry_price": round(entry_price, 5), "exit_price": round(c, 5), "pnl": round(last_dir * (c - entry_price), 5), "pnl_pct": round(ret * 100, 3), "exit_reason": "end", }) # Equity curve from trade returns cumret = 1.0 equity_vals = np.full(n, float(capital)) trade_idx = 0 in_trade = False t_entry_price = None t_dir = None for i in range(n): s = sig_arr[i] c = price_arr[i] if s != 0.0 and s != t_dir: if t_dir is not None and t_entry_price is not None and t_entry_price != 0: t_ret = t_dir * (c - t_entry_price) / t_entry_price - cost cumret = (1 + t_ret) t_entry_price = c t_dir = s equity_vals[i] = capital cumret # Bar returns for Sharpe bar_returns = np.zeros(n) for i in range(1, n): if price_arr[i - 1] != 0 and last_dir is not None: bar_returns[i] = sig_arr[i - 1] * (price_arr[i] - price_arr[i - 1]) / price_arr[i - 1] if sig_arr[i - 1] != 0 else 0.0 return { "equity": pd.Series(equity_vals, index=close.index), "trade_returns": trade_returns, "long_returns": long_returns, "short_returns": short_returns, "bar_returns": bar_returns, "trade_log": trade_log, } def compute_trade_stats(trades, capital=10000): """Single source of truth for trade statistics. Every display path reads from this — no recomputation anywhere. All values are rounded and JSON-safe (no inf/nan). """ if not trades: return {"n": 0, "wins": 0, "losses": 0, "wr": 0, "avg": 0, "best": 0, "worst": 0, "ret": 0, "np": 0, "mdd": 0, "pf": 0, "rr": 0, "expect": 0} w = [r for r in trades if r > 0] l = [r for r in trades if r < 0] cumret = 1.0 for r in trades: cumret = (1 + r) net_p = capital (cumret - 1) # Max drawdown eq = np.cumprod([1.0] + [1 + r for r in trades]) peak = np.maximum.accumulate(eq) mdd = float(((eq - peak) / peak).min()) if len(eq) > 1 else 0.0 # Profit Factor gross_w = sum(w) if w else 0 gross_l = abs(sum(l)) if l else 0 pf = gross_w / gross_l if gross_l > 0 else (9999.0 if gross_w > 0 else 0) # Risk:Reward avg_w = float(np.mean(w)) if w else 0 avg_l = abs(float(np.mean(l))) if l else 0 rr = avg_w / avg_l if avg_l > 0 else (9999.0 if avg_w > 0 else 0) # Expectancy expect = net_p / len(trades) return { "n": len(trades), "wins": len(w), "losses": len(l), "wr": round(len(w) / len(trades), 4), "avg": round(float(np.mean(trades)), 6), "best": round(max(w), 6) if w else 0, "worst": round(min(l), 6) if l else 0, "ret": round(cumret - 1, 6), "np": round(net_p, 2), "mdd": round(mdd, 6), "pf": round(pf, 2), "rr": round(rr, 2), "expect": round(expect, 2), } def compute_metrics(bt_result, close_test, capital=10000): """Compute all standard metrics from backtest result. Uses trade-level compounding (same as webapp _trade_stats) for accuracy. Returns: dict with total_ret, bh_ret, sharpe_strat, sharpe_bh, mdd, n_trades """ equity = bt_result["equity"] trade_returns = bt_result["trade_returns"] # Total return — trade-level compounding (matches webapp) if trade_returns: cumret = 1.0 for r in trade_returns: cumret = (1 + r) total_ret = cumret - 1 else: total_ret = 0.0 # Buy and hold bh_equity = capital (close_test / close_test.iloc[0]) bh_ret = (bh_equity.iloc[-1] - capital) / capital if capital != 0 else 0.0 # Sharpe ratio — trade-level (matches webapp: sqrt(25226) annualization) if len(trade_returns) >= 2 and float(np.std(trade_returns)) > 0: sharpe_strat = float(np.mean(trade_returns) / np.std(trade_returns) np.sqrt(252 * 26)) else: sharpe_strat = 0.0 bh_rets = bh_equity.pct_change().dropna() if len(bh_rets) > 1 and bh_rets.std() != 0: sharpe_bh = float((bh_rets.mean() / bh_rets.std()) * np.sqrt(252 * 24 * 4)) else: sharpe_bh = 0.0 # Max drawdown — trade-level (matches webapp) if trade_returns: eq = np.cumprod([1.0] + [1 + r for r in trade_returns]) peak = np.maximum.accumulate(eq) mdd = float(((eq - peak) / peak).min()) if len(eq) > 1 else 0.0 else: mdd = 0.0 return { "total_ret": float(total_ret), "bh_ret": float(bh_ret), "sharpe_strat": float(sharpe_strat) if not np.isnan(sharpe_strat) else 0.0, "sharpe_bh": float(sharpe_bh) if not np.isnan(sharpe_bh) else 0.0, "mdd": float(mdd), "n_trades": len(trade_returns), } # Diagnostics line/histogram series (equity / drawdown / rolling_acc / conf_hist) # only feed the small Diagnostics charts — they're never used by the price chart # or scroll-back. On a 1-min model trained over the (2.2-capped) window these are # still ~30k points each; downsample to a visually-identical resolution before the # dict leaves the trainer so it doesn't carry that into Server-A RAM / Postgres. _RESULTS_SERIES_MAX = 5000 def _downsample_idx(n, cap=_RESULTS_SERIES_MAX): """Evenly-spaced index list spanning [0, n-1] (first+last always kept), or None when no downsampling is needed (n <= cap).""" if n <= cap: return None return np.unique(np.linspace(0, n - 1, cap).astype(int)).tolist() def _take(arr, idx): """Subset a list by an index list (idx may be None → return arr unchanged).""" if idx is None or not isinstance(arr, list): return arr return [arr[i] for i in idx] # trade_log / train_trade_log are lists of per-trade dicts (display-only — the # Trade Log tab). They scale with TRADE count, not bar count, so the bar-window # cap (Phase 2.2) doesn't bound them — a degenerate near-every-bar model can put # 10k+ trade dicts in the blob (>3 MB). Cap each (independently — a small-N model # keeps every trade) to the most-recent N, recording `_total` + `_truncated` # so the true count is still reported. Real strategies have far fewer than # _TRADE_LOG_MAX trades, so this only ever bites pathological models. _TRADE_LOG_MAX = 5000 def _cap_trade_log(tl): """Return (capped_list, original_len, was_truncated).""" if not isinstance(tl, list) or len(tl) <= _TRADE_LOG_MAX: return tl, (len(tl) if isinstance(tl, list) else 0), False return tl[-_TRADE_LOG_MAX:], len(tl), True def build_return_dict(split_result, bt_result, metrics, model, feature_cols, signal_full, p_pos_test, p_neg_test, custom_figs=None, bt_train_result=None, pre_stats=None): """Assemble the complete return dict. Handles ALL serialization. Never returns Timestamps, numpy arrays, or non-JSON types. Returns: JSON-safe dict with all required keys """ df = split_result["df"] close = split_result["close"] close_test = split_result["close_test"] X_test = split_result["X_test"] y_test = split_result["y_test"] equity = bt_result["equity"] bar_returns = bt_result["bar_returns"] # OHLC ohlc_dates = [str(x) for x in df.index.tolist()] def _safe_list(arr): return [float(x) if (x is not None and not np.isnan(x) and not np.isinf(x)) else None for x in arr] # Overlays bb, ma = compute_overlays(close, df.index) # Buy and hold equity capital = equity.iloc[0] if len(equity) > 0 else 10000 bh_equity = capital * (close_test / close_test.iloc[0]) # Confusion matrix from sklearn.metrics import confusion_matrix pred_test = model.predict(X_test) y_test_arr = np.asarray(y_test) cm = confusion_matrix(y_test_arr, pred_test, labels=[-1, 0, 1]) # Rolling accuracy sig_arr = signal_full.reindex(close_test.index).values correct = pd.Series((pred_test == y_test_arr).astype(float), index=X_test.index) active_test = pd.Series(sig_arr != 0, index=close_test.index) if len(sig_arr) == len(close_test) else pd.Series(True, index=close_test.index) correct_active = correct.where(active_test, other=np.nan) rolling_acc = correct_active.rolling(30, min_periods=1).mean() # Feature importance importances = model.feature_importances_ fi_pairs = sorted(zip(feature_cols, importances), key=lambda x: x[1])[-15:] # Drawdown rolling_max = equity.cummax() drawdown = (equity - rolling_max) / rolling_max.replace(0, np.nan) drawdown = drawdown.fillna(0.0) # ── Downsample the Diagnostics-only series (see _downsample_idx) ────────── _eq_dates = [str(x) for x in close_test.index.tolist()] _eq_strat = _safe_list(equity.values) _eq_bh = _safe_list(bh_equity.values) _eq_idx = _downsample_idx(len(_eq_dates)) _eq_dates, _eq_strat, _eq_bh = _take(_eq_dates, _eq_idx), _take(_eq_strat, _eq_idx), _take(_eq_bh, _eq_idx) _ra_dates = [str(x) for x in rolling_acc.index.tolist()] _ra_vals = [float(x) if (not np.isnan(x) and not np.isinf(x)) else None for x in rolling_acc.values] _ra_idx = _downsample_idx(len(_ra_dates)) _ra_dates, _ra_vals = _take(_ra_dates, _ra_idx), _take(_ra_vals, _ra_idx) _dd_dates = [str(x) for x in drawdown.index.tolist()] _dd_vals = _safe_list(drawdown.values) _dd_idx = _downsample_idx(len(_dd_dates)) _dd_dates, _dd_vals = _take(_dd_dates, _dd_idx), _take(_dd_vals, _dd_idx) _cp_pos = [float(x) for x in (p_pos_test.tolist() if hasattr(p_pos_test, 'tolist') else list(p_pos_test))] _cp_neg = [float(x) for x in (p_neg_test.tolist() if hasattr(p_neg_test, 'tolist') else list(p_neg_test))] _cp_pos = _take(_cp_pos, _downsample_idx(len(_cp_pos))) _cp_neg = _take(_cp_neg, _downsample_idx(len(_cp_neg))) # ── Trade logs — display-only (Trade Log tab); cap to most-recent N with a # `_total` field so the true count is still reported (see _cap_trade_log). # NB: ret_dist arrays are left FULL — a downstream path in callbacks.py # recomputes n_trades/win-rate from len(ret_dist), so a sample would skew # the displayed counts; they're small anyway and gzip handles them. _tl_test, _tl_test_n, _tl_test_tr = _cap_trade_log(bt_result.get("trade_log", [])) _tl_tr, _tl_tr_n, _tl_tr_tr = _cap_trade_log(bt_train_result.get("trade_log", []) if bt_train_result else []) return { "ohlc": { "dates": ohlc_dates, "open": _safe_list(df["open"].values), "high": _safe_list(df["high"].values), "low": _safe_list(df["low"].values), "close": _safe_list(df["close"].values), }, "signals": { "dates": [str(x) for x in signal_full.index.tolist()], "values": [float(x) for x in signal_full.values], }, "bb": bb, "ma": ma, "equity": { "dates": _eq_dates, "strategy": _eq_strat, "bh": _eq_bh, }, "feature_importance": { "names": [p[0] for p in fi_pairs], "values": [float(p[1]) for p in fi_pairs], }, "conf_matrix": cm.tolist(), "conf_hist": { "p_pos": _cp_pos, "p_neg": _cp_neg, }, "rolling_acc": { "dates": _ra_dates, "values": _ra_vals, }, "drawdown": { "dates": _dd_dates, "values": _dd_vals, }, "ret_dist": [float(x) for x in bt_result["trade_returns"]], "ret_dist_long": [float(x) for x in bt_result["long_returns"]], "ret_dist_short": [float(x) for x in bt_result["short_returns"]], "train_ret_dist": [float(x) for x in bt_train_result["trade_returns"]] if bt_train_result else [], "train_ret_dist_long": [float(x) for x in bt_train_result["long_returns"]] if bt_train_result else [], "train_ret_dist_short": [float(x) for x in bt_train_result["short_returns"]] if bt_train_result else [], "trade_log": _tl_test, "train_trade_log": _tl_tr, "trade_log_total": _tl_test_n, "train_trade_log_total": _tl_tr_n, "trade_log_truncated": _tl_test_tr, "train_trade_log_truncated": _tl_tr_tr, (pre_stats or {}), "metrics": metrics, "split_dt": split_result["split_dt"], "split_idx": int(split_result["split_idx"]), "n_train": int(split_result["n_train"]), "n_test": int(split_result["n_test"]), "feature_cols": list(feature_cols), "custom_figs": custom_figs or [], } # ════════════════════════════════════════════════════════════════════════════ # STRATEGY FRAMEWORK v2 — Config-driven architecture # Claude writes feature_engineering() + strategy_config(). Framework does rest. # ════════════════════════════════════════════════════════════════════════════ import importlib _MODEL_REGISTRY = { "XGBClassifier": ("xgboost", "XGBClassifier"), "RandomForestClassifier": ("sklearn.ensemble", "RandomForestClassifier"), "GradientBoostingClassifier": ("sklearn.ensemble", "GradientBoostingClassifier"), "LogisticRegression": ("sklearn.linear_model", "LogisticRegression"), "ExtraTreesClassifier": ("sklearn.ensemble", "ExtraTreesClassifier"), "AdaBoostClassifier": ("sklearn.ensemble", "AdaBoostClassifier"), } def _build_model_from_config(config, X_train, y_train_enc): """Build, fit, and wrap a model from strategy_config dict.""" model_type = config.get("model_type", "RandomForestClassifier") model_params = dict(config.get("model_params", {})) if model_type not in _MODEL_REGISTRY: raise ValueError(f"Unknown model_type '{model_type}'. Valid: {list(_MODEL_REGISTRY.keys())}") module_path, class_name = _MODEL_REGISTRY[model_type] mod = importlib.import_module(module_path) cls = getattr(mod, class_name) # XGBoost defaults if class_name == "XGBClassifier": model_params.setdefault("use_label_encoder", False) model_params.setdefault("eval_metric", "mlogloss") model_params.setdefault("tree_method", "hist") # Determinism > speed (2026-05-25). XGBoost hist with n_jobs=-1 is # NON-reproducible even with random_state set — the parallel histogram # gradient-sum order varies across threads, so the SAME code + data # gives a slightly different model (and backtest) every run. Forcing # single-thread makes training bit-reproducible so: (a) a user who # copies a strategy and reruns it gets identical numbers, (b) the # community "Live" score matches a redeploy, (c) "same code, different # result" support reports go away. Cost: single-threaded XGB (a few # seconds slower on large windows; hist is fast so it's minor). FORCED # (not setdefault) so the guarantee can't be silently broken by a # strategy passing n_jobs. Exact reproducibility holds within the # platform (pinned versions / same Modal image); a user's own machine # with different xgboost/numpy/CPU can still differ in low-order bits. model_params["n_jobs"] = 1 # Common defaults model_params.setdefault("random_state", 42) from model_wrapper import ModelWrapper clf = cls(model_params) clf.fit(X_train, y_train_enc) enc = LabelEncoder() enc.fit([-1, 0, 1]) return ModelWrapper(clf, original_classes=enc.classes_, n_features=X_train.shape[1]) def _generate_signals(model, X, threshold): """Framework-owned signal generation. Deterministic threshold logic.""" proba = model.predict_proba(X) classes = list(model.classes_) idx_pos = classes.index(1) if 1 in classes else None idx_neg = classes.index(-1) if -1 in classes else None p_pos = proba[:, idx_pos] if idx_pos is not None else np.zeros(len(X)) p_neg = proba[:, idx_neg] if idx_neg is not None else np.zeros(len(X)) signal_vals = np.zeros(len(X)) signal_vals = np.where(p_pos >= threshold, 1.0, signal_vals) signal_vals = np.where(p_neg >= threshold, -1.0, signal_vals) # Both exceed: pick stronger both = (p_pos >= threshold) & (p_neg >= threshold) signal_vals[both] = np.where(p_pos[both] >= p_neg[both], 1.0, -1.0) return pd.Series(signal_vals, index=X.index), p_pos, p_neg # ── Filter functions (all no-ops when config value is None) ────────────── def _apply_direction_filter(signal, direction): """Zero out signals that don't match allowed direction.""" if direction is None or direction == "both": return signal s = signal.copy() if direction == "long": s[s < 0] = 0.0 elif direction == "short": s[s > 0] = 0.0 return s def _apply_session_filter(signal, index, session_hours): """Zero out signals outside session hours [start, end] UTC.""" if session_hours is None: return signal s = signal.copy() start_h, end_h = session_hours[0], session_hours[1] hours = index.hour if start_h <= end_h: mask = (hours >= start_h) & (hours < end_h) else: # wrap around midnight, e.g. [22, 6] mask = (hours >= start_h) \| (hours < end_h) s[~mask] = 0.0 return s def _apply_atr_filter(signal, close, high, low, min_atr): """Zero out signals when NATR(14) is below threshold.""" if min_atr is None: return signal hl = high - low hc = (high - close.shift(1)).abs() lc = (low - close.shift(1)).abs() tr = pd.concat([hl, hc, lc], axis=1).max(axis=1) atr14 = tr.ewm(com=13, adjust=False).mean() natr = atr14 / close.replace(0, np.nan) s = signal.copy() s[natr < min_atr] = 0.0 return s def _apply_trend_filter(signal, close, trend_filter): """Only allow signals aligned with trend. e.g. 'sma_50': longs above SMA, shorts below.""" if trend_filter is None: return signal # Parse: "sma_50" → SMA with period 50 parts = trend_filter.lower().replace("-", "_").split("_") if len(parts) >= 2 and parts[0] in ("sma", "ema"): period = int(parts[1]) else: return signal # unknown filter, skip if parts[0] == "sma": trend_line = close.rolling(period).mean() else: trend_line = close.ewm(span=period, adjust=False).mean() s = signal.copy() # Longs only above trend, shorts only below s[(s > 0) & (close < trend_line)] = 0.0 s[(s < 0) & (close > trend_line)] = 0.0 return s # ── run_backtest_v2: framework-owned SL/TP/cooldown/position management ── def run_backtest_v2(signal, close, high, low, config, capital=10000, cost=2e-5): """Backtest with SL/TP/cooldown/direction handling built into the engine. Unlike run_backtest (v1), this function handles position exits internally. Returns: same dict shape as run_backtest() """ stop_loss = config.get("stop_loss") take_profit = config.get("take_profit") cooldown = config.get("cooldown", 0) on_opposite = config.get("on_opposite", "reverse") sig_arr = signal.values close_arr = close.values high_arr = high.values low_arr = low.values idx = signal.index n = len(close_arr) trade_returns = [] long_returns = [] short_returns = [] trade_log = [] equity_vals = np.full(n, float(capital)) cumret = 1.0 position = 0.0 # current direction: 1.0, -1.0, or 0.0 (flat) entry_price = None entry_bar = None # index into arrays for entry time cooldown_remaining = 0 def _log_trade(exit_bar, exit_px, ret, reason): trade_log.append({ "type": "Buy" if position == 1.0 else "Sell", "entry_time": str(idx[entry_bar]), "exit_time": str(idx[exit_bar]), "entry_price": round(entry_price, 5), "exit_price": round(exit_px, 5), "pnl": round(position * (exit_px - entry_price), 5), "pnl_pct": round(ret * 100, 3), "exit_reason": reason, }) for i in range(n): c = close_arr[i] h = high_arr[i] lo = low_arr[i] s = sig_arr[i] # 1. Check SL/TP if in trade if position != 0.0 and entry_price is not None: hit_sl = False hit_tp = False exit_price = None if position == 1.0: # long if stop_loss is not None and lo <= entry_price * (1 - stop_loss): hit_sl = True exit_price = entry_price * (1 - stop_loss) elif take_profit is not None and h >= entry_price * (1 + take_profit): hit_tp = True exit_price = entry_price * (1 + take_profit) else: # short if stop_loss is not None and h >= entry_price * (1 + stop_loss): hit_sl = True exit_price = entry_price * (1 + stop_loss) elif take_profit is not None and lo <= entry_price * (1 - take_profit): hit_tp = True exit_price = entry_price * (1 - take_profit) if hit_sl or hit_tp: ret = float(position * (exit_price - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(i, exit_price, ret, "SL" if hit_sl else "TP") cumret = (1 + ret) position = 0.0 entry_price = None entry_bar = None cooldown_remaining = cooldown equity_vals[i] = capital cumret continue # 2. Cooldown if cooldown_remaining > 0: cooldown_remaining -= 1 equity_vals[i] = capital * cumret continue # 3. Signal processing if s != 0.0: if position == 0.0: # Open new trade position = s entry_price = c entry_bar = i elif s != position: # Opposite signal if on_opposite == "reverse": # Close current + open opposite ret = float(position * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(i, c, ret, "signal") cumret = (1 + ret) position = s entry_price = c entry_bar = i else: # close_only # Close current, go flat ret = float(position (c - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(i, c, ret, "close_only") cumret = (1 + ret) position = 0.0 entry_price = None entry_bar = None cooldown_remaining = cooldown equity_vals[i] = capital cumret # Close last open trade at final close if position != 0.0 and entry_price is not None and n > 0 and entry_price != 0: c = close_arr[-1] ret = float(position * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(n - 1, c, ret, "end") cumret = (1 + ret) equity_vals[-1] = capital cumret # Bar returns for Sharpe (approximate) bar_returns = np.zeros(n) for i in range(1, n): if close_arr[i - 1] != 0 and sig_arr[i - 1] != 0: bar_returns[i] = sig_arr[i - 1] * (close_arr[i] - close_arr[i - 1]) / close_arr[i - 1] return { "equity": pd.Series(equity_vals, index=close.index), "trade_returns": trade_returns, "long_returns": long_returns, "short_returns": short_returns, "bar_returns": bar_returns, "trade_log": trade_log, } # ── run_strategy: the v2 orchestrator ──────────────────────────────────── def run_strategy(feature_fn, config_fn, data_path, start_date="", end_date="", validation_date="", train_split=0.7, register_model_fn=None): """Config-driven strategy execution. Claude writes feature_fn + config_fn, framework does everything else. Returns: results dict (same format as webapp expects) """ config = config_fn() # Auto-correct SL/TP if Claude passed percentage instead of decimal for _key in ("stop_loss", "take_profit"): _val = config.get(_key) if _val is not None and _val > 0.1: # >10% is almost certainly a percentage config[_key] = _val / 100.0 print(f"[strategy] Auto-corrected {_key}: {_val} -> {config[_key]} (was percentage, converted to decimal)") # 1. Load data df, close, open_, high, low = load_ohlc(data_path, start_date, end_date) # 2. Feature engineering (Claude's function) df = feature_fn(df, close, open_, high, low) close = df["close"] open_ = df["open"] high = df["high"] low = df["low"] # 3. Warm-up detection: drop rows where features have NaN BEFORE any fill feature_cols = [c for c in df.columns if c not in ("open", "high", "low", "close")] raw_nans = df[feature_cols].isna().any(axis=1) valid_rows = ~raw_nans if valid_rows.any(): first_valid = valid_rows.idxmax() if raw_nans.loc[:first_valid].any(): df = df.loc[first_valid:].copy() close = df["close"] open_ = df["open"] high = df["high"] low = df["low"] # 4. Target horizon = config.get("target_horizon", 4) target = make_target(close, horizon=horizon) # 5. Split (ffill only within each partition — no bfill leak) mask = target.notna() df = df[mask].copy() target = target[mask] close = df["close"] high = df["high"] low = df["low"] X = df[feature_cols].copy() X = X.replace([np.inf, -np.inf], np.nan) if validation_date: split_idx = len(df[df.index <= validation_date]) else: split_idx = int(len(df) * train_split) split_idx = max(1, min(split_idx, len(df) - 1)) # ffill within train and test separately (no leak) X_train = X.iloc[:split_idx].ffill().fillna(0.0) X_test = X.iloc[split_idx:].ffill().fillna(0.0) X = pd.concat([X_train, X_test]) y_train = target.iloc[:split_idx] y_test = target.iloc[split_idx:] close_train = close.iloc[:split_idx] close_test = close.iloc[split_idx:] high_test = high.iloc[split_idx:] low_test = low.iloc[split_idx:] enc = LabelEncoder() enc.fit([-1, 0, 1]) y_train_enc = enc.transform(y_train) y_test_enc = enc.transform(y_test) split_dt = str(df.index[split_idx]) sp = { "df": df, "X_train": X_train, "X_test": X_test, "y_train": y_train, "y_test": y_test, "y_train_enc": y_train_enc, "y_test_enc": y_test_enc, "enc": enc, "close": close, "close_train": close_train, "close_test": close_test, "split_idx": split_idx, "split_dt": split_dt, "n_train": len(X_train), "n_test": len(X_test), } # 6. Build model from config model = _build_model_from_config(config, X_train, y_train_enc) # 7. Generate signals threshold = config.get("signal_threshold", 0.55) signal_train, p_pos_train, p_neg_train = _generate_signals(model, X_train, threshold) signal_test, p_pos_test, p_neg_test = _generate_signals(model, X_test, threshold) # 8. Apply filters (order: direction → session → ATR → trend) direction = config.get("direction", "both") signal_test = _apply_direction_filter(signal_test, direction) signal_train = _apply_direction_filter(signal_train, direction) session_filter = config.get("session_filter") signal_test = _apply_session_filter(signal_test, signal_test.index, session_filter) signal_train = _apply_session_filter(signal_train, signal_train.index, session_filter) min_atr = config.get("min_atr") if min_atr is not None: signal_test = _apply_atr_filter(signal_test, close_test, high_test, low_test, min_atr) trend_filter = config.get("trend_filter") if trend_filter is not None: signal_test = _apply_trend_filter(signal_test, close_test, trend_filter) signal_full = pd.concat([signal_train, signal_test]) # 9. Backtest with SL/TP/cooldown (test + train) high_train = high.iloc[:split_idx] low_train = low.iloc[:split_idx] has_risk = (config.get("stop_loss") is not None or config.get("take_profit") is not None or config.get("cooldown", 0) > 0 or config.get("on_opposite", "reverse") != "reverse") if has_risk: bt = run_backtest_v2(signal_test, close_test, high_test, low_test, config, capital=10000) bt_train = run_backtest_v2(signal_train, close_train, high_train, low_train, config, capital=10000) else: bt = run_backtest(signal_test, close_test, capital=10000) bt_train = run_backtest(signal_train, close_train, capital=10000) # 10. Metrics metrics = compute_metrics(bt, close_test, capital=10000) # 11. Pre-compute all trade stats (single source of truth) pre_stats = { "train_stats": compute_trade_stats(bt_train.get("trade_returns", []), capital=10000), "test_stats": compute_trade_stats(bt.get("trade_returns", []), capital=10000), "long_stats": compute_trade_stats(bt.get("long_returns", []), capital=10000), "short_stats": compute_trade_stats(bt.get("short_returns", []), capital=10000), } # 12. Register model if register_model_fn is not None: register_model_fn(model) # 13. Build return dict return build_return_dict(sp, bt, metrics, model, feature_cols, signal_full, p_pos_test, p_neg_test, custom_figs=[], bt_train_result=bt_train, pre_stats=pre_stats) # ── End strategy_utils ── DATA_PATH = '/root/Desktop/QuantifyMe/data/ohlc/GBPUSD_15min.parquet' START_DATE = '2026-04-15' END_DATE = '2026-05-25' VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── EMA crossover core signals ────────────────────────────────────────── ema9 = close.ewm(span=9, adjust=False).mean() ema21 = close.ewm(span=21, adjust=False).mean() ema50 = close.ewm(span=50, adjust=False).mean() ema200 = close.ewm(span=200, adjust=False).mean() df["ema9"] = ema9 df["ema21"] = ema21 df["ema50"] = ema50 df["ema200"] = ema200 # Raw spread and normalised spread df["ema_diff"] = ema9 - ema21 df["ema_diff_norm"] = (ema9 - ema21) / close # Cross signal: +1 when ema9 > ema21, -1 otherwise df["ema_cross_sign"] = np.where(ema9 > ema21, 1.0, -1.0) # Momentum of the spread (rate of change of spread) df["ema_diff_roc1"] = df["ema_diff"].diff(1) df["ema_diff_roc3"] = df["ema_diff"].diff(3) # Distance of price from ema50 and ema200 (normalised) df["dist_ema50"] = (close - ema50) / close df["dist_ema200"] = (close - ema200) / close # ── RSI (14) ──────────────────────────────────────────────────────────── delta = close.diff() gain = delta.clip(lower=0.0) loss = (-delta).clip(lower=0.0) avg_g = gain.ewm(com=13, adjust=False).mean() avg_l = loss.ewm(com=13, adjust=False).mean() rs = avg_g / avg_l.replace(0.0, np.nan) rsi14 = 100.0 - 100.0 / (1.0 + rs) df["rsi14"] = rsi14 # RSI normalised and centred df["rsi14_norm"] = (rsi14 - 50.0) / 50.0 # ── MACD ──────────────────────────────────────────────────────────────── ema12 = close.ewm(span=12, adjust=False).mean() ema26 = close.ewm(span=26, adjust=False).mean() macd_line = ema12 - ema26 signal_ln = macd_line.ewm(span=9, adjust=False).mean() macd_hist = macd_line - signal_ln df["macd_line"] = macd_line / close df["macd_signal"] = signal_ln / close df["macd_hist"] = macd_hist / close df["macd_cross"] = np.where(macd_line > signal_ln, 1.0, -1.0) # ── Bollinger Bands (20, 2) ────────────────────────────────────────────── bb_mid = close.rolling(20).mean() bb_std = close.rolling(20).std(ddof=0) bb_upper = bb_mid + 2.0 * bb_std bb_lower = bb_mid - 2.0 * bb_std bb_width = (bb_upper - bb_lower) / bb_mid.replace(0.0, np.nan) bb_pct = (close - bb_lower) / (bb_upper - bb_lower).replace(0.0, np.nan) df["bb_width"] = bb_width df["bb_pct"] = bb_pct # ── ATR (14) ───────────────────────────────────────────────────────────── tr = pd.concat([ high - low, (high - close.shift(1)).abs(), (low - close.shift(1)).abs() ], axis=1).max(axis=1) atr14 = tr.ewm(com=13, adjust=False).mean() df["atr14"] = atr14 df["natr14"] = atr14 / close # normalised ATR (volatility proxy) # ── Stochastic %K / %D (14, 3) ────────────────────────────────────────── low14 = low.rolling(14).min() high14 = high.rolling(14).max() stoch_k = 100.0 * (close - low14) / (high14 - low14).replace(0.0, np.nan) stoch_d = stoch_k.rolling(3).mean() df["stoch_k"] = stoch_k / 100.0 df["stoch_d"] = stoch_d / 100.0 df["stoch_diff"] = (stoch_k - stoch_d) / 100.0 # ── Rate of Change ─────────────────────────────────────────────────────── df["roc1"] = close.pct_change(1) df["roc4"] = close.pct_change(4) df["roc8"] = close.pct_change(8) df["roc16"] = close.pct_change(16) # ── Candle features ────────────────────────────────────────────────────── body = (close - open_).abs() candle_rng = (high - low).replace(0.0, np.nan) df["body_ratio"] = body / candle_rng df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle_rng df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle_rng df["candle_dir"] = np.where(close >= open_, 1.0, -1.0) # ── Volume-like proxy: range relative to rolling average ──────────────── df["range_ratio"] = candle_rng / candle_rng.rolling(20).mean() # ── Lagged EMA diff features ───────────────────────────────────────────── for lag in [1, 2, 3, 4]: df[f"ema_diff_lag{lag}"] = df["ema_diff_norm"].shift(lag) # ── Lagged RSI ─────────────────────────────────────────────────────────── for lag in [1, 2, 4]: df[f"rsi14_lag{lag}"] = df["rsi14_norm"].shift(lag) # ── Rolling volatility (std of returns) ────────────────────────────────── ret = close.pct_change() df["vol_8"] = ret.rolling(8).std() df["vol_16"] = ret.rolling(16).std() df["vol_32"] = ret.rolling(32).std() # ── Trend strength: ADX-like (simplified) ──────────────────────────────── plus_dm = (high.diff()).clip(lower=0.0) minus_dm = (-low.diff()).clip(lower=0.0) overlap = pd.concat([plus_dm, minus_dm], axis=1).min(axis=1) plus_dm = plus_dm - overlap minus_dm = minus_dm - overlap smooth_tr = tr.ewm(com=13, adjust=False).mean() plus_di = 100.0 * plus_dm.ewm(com=13, adjust=False).mean() / smooth_tr.replace(0.0, np.nan) minus_di = 100.0 * minus_dm.ewm(com=13, adjust=False).mean() / smooth_tr.replace(0.0, np.nan) di_sum = (plus_di + minus_di).replace(0.0, np.nan) adx = ((plus_di - minus_di).abs() / di_sum * 100.0).ewm(com=13, adjust=False).mean() df["adx"] = adx / 100.0 df["plus_di"] = plus_di / 100.0 df["minus_di"] = minus_di / 100.0 # ── Session hour (UTC) ─────────────────────────────────────────────────── if hasattr(df.index, "hour"): df["hour_sin"] = np.sin(2.0 * np.pi * df.index.hour / 24.0) df["hour_cos"] = np.cos(2.0 * np.pi * df.index.hour / 24.0) else: df["hour_sin"] = 0.0 df["hour_cos"] = 1.0 # ── Fill any NaN from warm-up periods ──────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "EMA 9/21 Crossover + MACD Momentum (XGBoost)", "model_type": "XGBClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.80, "colsample_bytree": 0.75, "min_child_weight": 3, "gamma": 0.10, "reg_alpha": 0.05, "reg_lambda": 1.50, "objective": "binary:logistic", "tree_method": "hist", "random_state": 42, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.0030, "take_profit": 0.0060, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [6, 20], "min_atr": None, "trend_filter": "sma_50", "target_horizon": 4, "objective": ( "Maximize Sharpe ratio on EUR/USD 15-min data. " "Core signal: EMA(9) vs EMA(21) crossover enriched with MACD, RSI, " "Bollinger %B, Stochastic, ATR, ADX, candle structure and rolling " "volatility. XGBoost with moderate depth (4) and strong regularisation " "(gamma, alpha, lambda) prevents overfitting on ~6 weeks of intraday data. " "A 0.55 probability threshold filters low-confidence signals. " "A 2:1 TP:SL ratio (30 bp SL / 60 bp TP) improves the reward-risk " "balance. Session filter [6,20] UTC keeps the model away from the thin " "Asian pre-open. trend_filter sma_50 aligns entries with the prevailing " "short-term trend to reduce chop. Cooldown=0 and reverse-on-opposite " "allow continuous participation in trending EMA crossover moves." ), "notes": ( "round-trip cost 2e-5 is accounted for by the framework. " "target_horizon=4 bars (1 hour ahead) suits EMA crossover which " "generates medium-frequency signals rather than tick-level scalps. " "All features are normalised or expressed as ratios to minimise " "scale sensitivity for the logistic-objective XGBoost." ), } # ── Framework v2: auto-generated wrapper ── def train_and_backtest(): _vd = VALIDATION_DATE if 'VALIDATION_DATE' in globals() else '' _ts = TRAIN_SPLIT if 'TRAIN_SPLIT' in globals() else 0.7 return run_strategy( feature_engineering, strategy_config, DATA_PATH, START_DATE, END_DATE, _vd, _ts, register_model_fn=register_model )
1.62	USD/CAD BB Mean-Reversion + ATR XGBoost Maximise risk-adjusted return (Sharpe/Calmar) on USD/CAD 15-min using Bollinger Band mean-reversion signals augmented by ATR, RSI, MACD, and…	C @candle_owl	USDCAD	15min	59.1%57.0%	+4.84%+3.36%	1.311.14	1.34%1.34%	36279
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-06 02:36:58 # Model : XGBoost # Feature Eng. : BB (20,2.0), ATR 14 + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/USDCAD_15min.parquet" START_DATE = "2025-04-24" END_DATE = "2026-04-24" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── Bollinger Bands (20, 2) ────────────────────────────────────────────── bb_period = 20 bb_std = 2.0 bb_mid = close.rolling(bb_period).mean() bb_sigma = close.rolling(bb_period).std(ddof=0) bb_upper = bb_mid + bb_std * bb_sigma bb_lower = bb_mid - bb_std * bb_sigma df["bb_mid"] = bb_mid df["bb_upper"] = bb_upper df["bb_lower"] = bb_lower df["bb_width"] = (bb_upper - bb_lower) / bb_mid df["bb_pct"] = (close - bb_lower) / (bb_upper - bb_lower) # ── ATR (14) & Normalised ATR ──────────────────────────────────────────── atr_period = 14 tr = pd.concat([ high - low, (high - close.shift(1)).abs(), (low - close.shift(1)).abs() ], axis=1).max(axis=1) atr = tr.ewm(alpha=1.0 / atr_period, min_periods=atr_period, adjust=False).mean() natr = atr / close df["atr"] = atr df["natr"] = natr # ── Price momentum / returns ───────────────────────────────────────────── df["ret_1"] = close.pct_change(1) df["ret_4"] = close.pct_change(4) df["ret_8"] = close.pct_change(8) df["ret_16"] = close.pct_change(16) # ── Distance from Bollinger mid / bands ────────────────────────────────── df["close_minus_mid"] = (close - bb_mid) / bb_mid df["close_minus_upper"] = (close - bb_upper) / bb_mid df["close_minus_lower"] = (close - bb_lower) / bb_mid # ── BB squeeze flag: width below rolling median ─────────────────────────── bb_width_med = df["bb_width"].rolling(50).median() df["bb_squeeze"] = np.where(df["bb_width"] < bb_width_med, 1.0, 0.0) # ── BB mean-reversion z-score ──────────────────────────────────────────── df["bb_z"] = (close - bb_mid) / (bb_sigma + 1e-12) # ── Candle body / wick features ────────────────────────────────────────── body = (close - open_).abs() candle_rng = (high - low).replace(0, np.nan) df["body_ratio"] = body / candle_rng df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle_rng df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle_rng df["bull_candle"] = np.where(close > open_, 1.0, 0.0) # ── RSI (14) built from scratch ────────────────────────────────────────── rsi_period = 14 delta = close.diff() gain = delta.clip(lower=0) loss = (-delta).clip(lower=0) avg_g = gain.ewm(alpha=1.0 / rsi_period, min_periods=rsi_period, adjust=False).mean() avg_l = loss.ewm(alpha=1.0 / rsi_period, min_periods=rsi_period, adjust=False).mean() rs = avg_g / (avg_l + 1e-12) rsi = 100.0 - (100.0 / (1.0 + rs)) df["rsi_14"] = rsi # RSI deviation from neutral 50 df["rsi_dev"] = (rsi - 50.0) / 50.0 # ── MACD (12, 26, 9) ───────────────────────────────────────────────────── ema12 = close.ewm(span=12, adjust=False).mean() ema26 = close.ewm(span=26, adjust=False).mean() macd_line = ema12 - ema26 macd_sig = macd_line.ewm(span=9, adjust=False).mean() df["macd"] = macd_line / close df["macd_hist"] = (macd_line - macd_sig) / close # ── Rolling volatility (realised over 20 bars) ─────────────────────────── df["vol_20"] = df["ret_1"].rolling(20).std() # ── ATR z-score vs 50-bar rolling mean ─────────────────────────────────── atr_mean = atr.rolling(50).mean() atr_std = atr.rolling(50).std(ddof=0) df["atr_z"] = (atr - atr_mean) / (atr_std + 1e-12) # ── Volume-of-BB-touches over last 10 bars ─────────────────────────────── near_upper = (close >= bb_upper * 0.998).astype(float) near_lower = (close <= bb_lower * 1.002).astype(float) df["touch_upper_10"] = near_upper.rolling(10).sum() df["touch_lower_10"] = near_lower.rolling(10).sum() # ── SMA 50 (trend filter helper) ───────────────────────────────────────── df["sma_50"] = close.rolling(50).mean() df["close_vs_sma"] = (close - df["sma_50"]) / df["sma_50"] # ── EMA cross (9 / 21) ─────────────────────────────────────────────────── ema9 = close.ewm(span=9, adjust=False).mean() ema21 = close.ewm(span=21, adjust=False).mean() df["ema_cross"] = (ema9 - ema21) / close # ── Bar-of-day / session ───────────────────────────────────────────────── if hasattr(df.index, "hour"): df["hour_sin"] = np.sin(2 * np.pi * df.index.hour / 24.0) df["hour_cos"] = np.cos(2 * np.pi * df.index.hour / 24.0) else: df["hour_sin"] = 0.0 df["hour_cos"] = 1.0 # ── Lag features on bb_pct and rsi ─────────────────────────────────────── for lag in [1, 2, 4]: df[f"bb_pct_lag{lag}"] = df["bb_pct"].shift(lag) df[f"rsi_14_lag{lag}"] = df["rsi_14"].shift(lag) df[f"macd_hist_lag{lag}"] = df["macd_hist"].shift(lag) # ── Fill NaN from warm-up ───────────────────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "USD/CAD BB Mean-Reversion + ATR XGBoost", "model_type": "XGBClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.75, "colsample_bytree": 0.70, "min_child_weight": 3, "gamma": 0.15, "reg_alpha": 0.10, "reg_lambda": 1.50, "objective": "binary:logistic", "random_state": 42, "n_jobs": -1, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.005, "take_profit": 0.010, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [7, 20], "min_atr": 0.0002, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximise risk-adjusted return (Sharpe/Calmar) on USD/CAD 15-min " "using Bollinger Band mean-reversion signals augmented by ATR, RSI, " "MACD, and EMA-cross features fed into a regularised XGBoost classifier. " "SL=0.5% / TP=1.0% gives a 1:2 RR floor. Conservative depth (4) and " "strong L1/L2 regularisation prevent overfitting on a single year of data." ), "notes": ( "BB squeeze flag and bb_z capture regime; atr_z filters noisy bars. " "Session filter 07-20 UTC covers London + NY overlap for tighter spreads. " "min_atr=0.0002 avoids dead-market whipsaws. Lag features on bb_pct and " "rsi_14 give the model short-term momentum context without look-ahead." ), }
1.59	Bollinger reversion	M @malcolmtan	Bollin		47.9%—	+1.53%—	1.46—	0.67%0.67%	71—
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-25 02:29:29 # Model : XGBoost # Feature Eng. : buy when price closes below the lower Bollinger Band(20,2) and RSI(14) < 35, exit at the middle band + Auto-add features: ON # Signal / Entry : — # Optimization : — # Risk Mgmt : — # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # Bollinger Band Mean-Reversion + RSI Filter (XGBoost, Sharpe) # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd # ── Inlined strategy_utils ── """ strategy_utils.py — Standard utility functions for generated strategies. Claude imports these instead of writing boilerplate from scratch. This ensures consistent behavior across all generated strategies. """ import numpy as np import pandas as pd from sklearn.preprocessing import LabelEncoder # Max backtest window per timeframe. A finer timeframe over a longer window # blows up the results dict / parquet load / Modal train time (the 2026-05-12 # OOM was a 1-min × multi-year sweep) — and a 1-min strategy gains nothing from # 2 years of 1-min bars. Enforced HERE because every training path (UI / API / # Modal) funnels through run_strategy → load_ohlc. Env-overridable so a future # "max plan" / dedicated-server tier can lift it. _TF_MAX_DAYS = { "1min": 30, "5min": 90, "15min": 365, "1h": 730, } def _fetch_ohlc_from_internal(symbol: str, tf: str, start: str, end: str): """Phase 3.2: fetch parquet bytes from Server A's /internal/ohlc endpoint instead of reading a local file. Used inside Modal containers / Mac worker pool (Phase 3.4) so every train sees the same source of truth as the chart. Returns: pd.DataFrame (parquet decoded), or raises on any failure so the caller can fall back / surface a clear error in the job. """ import hashlib as _hashlib, hmac as _hmac, io as _io, os as _os import urllib.request as _ur, urllib.parse as _urp base = (_os.environ.get("QM_INTERNAL_OHLC_BASE") or "").rstrip("/") secret = (_os.environ.get("INTERNAL_WS_SECRET") or "").strip() if not base: raise RuntimeError("QM_INTERNAL_OHLC_BASE not set") if not secret: raise RuntimeError("INTERNAL_WS_SECRET not set") msg = f"{symbol}\|{tf}\|{start}\|{end}".encode("utf-8") sig = _hmac.new(secret.encode("utf-8"), msg, _hashlib.sha256).hexdigest() qs = _urp.urlencode({ "symbol": symbol, "tf": tf, "start": start, "end": end, "sig": sig, }) url = f"{base}/internal/ohlc?{qs}" req = _ur.Request(url, headers={"User-Agent": "qm-worker/1.0"}) with _ur.urlopen(req, timeout=30) as resp: if resp.status != 200: raise RuntimeError(f"/internal/ohlc returned {resp.status}") payload = resp.read() print(f"[load_ohlc:internal] {symbol} {tf} fetched {len(payload)} bytes", flush=True) return pd.read_parquet(_io.BytesIO(payload)) def _parse_symbol_tf_from_path(data_path: str): """Pull SYMBOL + TF out of a path like .../EURUSD_1min.parquet.""" import os as _os, re as _re base = _os.path.basename(str(data_path)) m = _re.match(r"^([A-Z]{6})_(\d+min\|\d+h)\.parquet$", base) if not m: return None, None return m.group(1), m.group(2) def load_ohlc(data_path, start_date="", end_date=""): """Load OHLC parquet, sort index, filter dates. Always returns consistent format. The lower bound is clamped per timeframe (see _TF_MAX_DAYS) — a request for more history than the cap silently starts later. Phase 3.2: when env QM_USE_INTERNAL_OHLC=="1", fetch over HTTP from Server A's /internal/ohlc endpoint instead of pd.read_parquet on a local file (which on Modal is a stale Volume snapshot). The endpoint applies the same day-cap, so the local cap-check below is a defensive no-op in that path. Flag defaults to "0" → unchanged behavior. Returns: (df, close, open_, high, low) """ import os as _os, re as _re _use_internal = _os.environ.get("QM_USE_INTERNAL_OHLC", "0") == "1" if _use_internal: _sym, _tf = _parse_symbol_tf_from_path(data_path) if not _sym or not _tf: raise RuntimeError( f"QM_USE_INTERNAL_OHLC=1 but DATA_PATH basename does not match " f"SYMBOL_TF.parquet: {data_path}" ) df = _fetch_ohlc_from_internal(_sym, _tf, start_date or "", end_date or "") else: df = pd.read_parquet(data_path) df.index = pd.to_datetime(df.index) df = df.sort_index() # Per-timeframe window cap (timeframe inferred from the parquet filename). _m = _re.search(r"_(\d+min\|\d+h)\.parquet$", _os.path.basename(str(data_path))) _tf = _m.group(1) if _m else None _max_days = _TF_MAX_DAYS.get(_tf) if _max_days and _max_days > 0 and len(df): _env_override = _os.environ.get(f"QM_MAX_DAYS_{_tf.upper()}") if _env_override and _env_override.isdigit(): _max_days = int(_env_override) try: _eff_end = pd.Timestamp(end_date) if end_date else df.index.max() _eff_end = min(_eff_end, df.index.max()) _floor = _eff_end - pd.Timedelta(days=_max_days) _req_start = pd.Timestamp(start_date) if start_date else df.index.min() if _req_start < _floor: print(f"[load_ohlc] {_tf} backtest window capped to {_max_days}d: " f"start {_req_start.date()} -> {_floor.date()}", flush=True) start_date = _floor except Exception as _e: print(f"[load_ohlc] window-cap check skipped ({_e})", flush=True) if start_date: df = df[df.index >= start_date] if end_date: df = df[df.index <= end_date] return df, df["close"], df["open"], df["high"], df["low"] def make_target(close, horizon=4): """Create target: direction N bars ahead. Default 4 bars = 1 hour on 15-min data. Returns: target (pd.Series of -1, 0, 1) """ return np.sign(close.shift(-horizon) - close) def split_data(df, target, feature_cols, train_split=0.7, validation_date=""): """Train/test split. Handles both ratio and date-based splits. Drops NaN from target before splitting. Encodes labels to [0,1,2]. Returns: dict with keys: X_train, X_test, y_train, y_test, y_train_enc, y_test_enc, enc, close_train, close_test, split_idx, split_dt, n_train, n_test """ # Drop NaN from target mask = target.notna() df = df[mask].copy() target = target[mask] close = df["close"] # Build feature matrix X = df[feature_cols].copy() X = X.bfill().ffill() X = X.replace([np.inf, -np.inf], np.nan).fillna(0.0) # Split if validation_date: split_idx = len(df[df.index <= validation_date]) else: split_idx = int(len(df) * train_split) split_idx = max(1, min(split_idx, len(df) - 1)) X_train = X.iloc[:split_idx] X_test = X.iloc[split_idx:] y_train = target.iloc[:split_idx] y_test = target.iloc[split_idx:] close_train = close.iloc[:split_idx] close_test = close.iloc[split_idx:] split_dt = str(df.index[split_idx]) # Label encoding — always fit on [-1, 0, 1] enc = LabelEncoder() enc.fit([-1, 0, 1]) y_train_enc = enc.transform(y_train) y_test_enc = enc.transform(y_test) return { "df": df, "X_train": X_train, "X_test": X_test, "y_train": y_train, "y_test": y_test, "y_train_enc": y_train_enc, "y_test_enc": y_test_enc, "enc": enc, "close": close, "close_train": close_train, "close_test": close_test, "split_idx": split_idx, "split_dt": split_dt, "n_train": len(X_train), "n_test": len(X_test), } def compute_overlays(close, df_index): """Compute BB and MA overlays on full dataset. Always consistent. Returns: (bb_dict, ma_dict) """ bb_mid = close.rolling(20).mean() bb_std = close.rolling(20).std() bb_upper = bb_mid + 2 * bb_std bb_lower = bb_mid - 2 * bb_std ma50 = close.rolling(50).mean() ma100 = close.rolling(100).mean() ma200 = close.rolling(200).mean() def _safe(s): s = s.reindex(df_index).bfill().ffill() return [float(x) if (x is not None and not np.isnan(x) and not np.isinf(x)) else None for x in s.values] bb = {"upper": _safe(bb_upper), "mid": _safe(bb_mid), "lower": _safe(bb_lower)} ma = {"ma50": _safe(ma50), "ma100": _safe(ma100), "ma200": _safe(ma200)} return bb, ma def run_backtest(signal, close, capital=10000, cost=2e-5): """Run backtest with transaction costs. Uses price-based trade returns (same as webapp _compute_trades). Signal 0 = hold (keep current position), not close. Returns: dict with equity, trade_returns, long_returns, short_returns, bar_returns """ sig_arr = signal.values price_arr = close.values idx = signal.index n = len(price_arr) # Trade returns — price-based (matches webapp _compute_trades exactly) trade_returns = [] long_returns = [] short_returns = [] trade_log = [] last_dir = None entry_price = None entry_bar = None for i in range(n): s = sig_arr[i] c = price_arr[i] if s != 0.0 and s != last_dir: # Direction change — close previous trade, open new if last_dir is not None and entry_price is not None and entry_price != 0: ret = float(last_dir * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if last_dir == 1: long_returns.append(ret) else: short_returns.append(ret) trade_log.append({ "type": "Buy" if last_dir == 1 else "Sell", "entry_time": str(idx[entry_bar]), "exit_time": str(idx[i]), "entry_price": round(entry_price, 5), "exit_price": round(c, 5), "pnl": round(last_dir * (c - entry_price), 5), "pnl_pct": round(ret * 100, 3), "exit_reason": "signal", }) entry_price = c entry_bar = i last_dir = s # Close last open trade if last_dir is not None and entry_price is not None and n > 0 and entry_price != 0: c = price_arr[-1] ret = float(last_dir * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if last_dir == 1: long_returns.append(ret) else: short_returns.append(ret) trade_log.append({ "type": "Buy" if last_dir == 1 else "Sell", "entry_time": str(idx[entry_bar]), "exit_time": str(idx[-1]), "entry_price": round(entry_price, 5), "exit_price": round(c, 5), "pnl": round(last_dir * (c - entry_price), 5), "pnl_pct": round(ret * 100, 3), "exit_reason": "end", }) # Equity curve from trade returns cumret = 1.0 equity_vals = np.full(n, float(capital)) trade_idx = 0 in_trade = False t_entry_price = None t_dir = None for i in range(n): s = sig_arr[i] c = price_arr[i] if s != 0.0 and s != t_dir: if t_dir is not None and t_entry_price is not None and t_entry_price != 0: t_ret = t_dir * (c - t_entry_price) / t_entry_price - cost cumret = (1 + t_ret) t_entry_price = c t_dir = s equity_vals[i] = capital cumret # Bar returns for Sharpe bar_returns = np.zeros(n) for i in range(1, n): if price_arr[i - 1] != 0 and last_dir is not None: bar_returns[i] = sig_arr[i - 1] * (price_arr[i] - price_arr[i - 1]) / price_arr[i - 1] if sig_arr[i - 1] != 0 else 0.0 return { "equity": pd.Series(equity_vals, index=close.index), "trade_returns": trade_returns, "long_returns": long_returns, "short_returns": short_returns, "bar_returns": bar_returns, "trade_log": trade_log, } def compute_trade_stats(trades, capital=10000): """Single source of truth for trade statistics. Every display path reads from this — no recomputation anywhere. All values are rounded and JSON-safe (no inf/nan). """ if not trades: return {"n": 0, "wins": 0, "losses": 0, "wr": 0, "avg": 0, "best": 0, "worst": 0, "ret": 0, "np": 0, "mdd": 0, "pf": 0, "rr": 0, "expect": 0} w = [r for r in trades if r > 0] l = [r for r in trades if r < 0] cumret = 1.0 for r in trades: cumret = (1 + r) net_p = capital (cumret - 1) # Max drawdown eq = np.cumprod([1.0] + [1 + r for r in trades]) peak = np.maximum.accumulate(eq) mdd = float(((eq - peak) / peak).min()) if len(eq) > 1 else 0.0 # Profit Factor gross_w = sum(w) if w else 0 gross_l = abs(sum(l)) if l else 0 pf = gross_w / gross_l if gross_l > 0 else (9999.0 if gross_w > 0 else 0) # Risk:Reward avg_w = float(np.mean(w)) if w else 0 avg_l = abs(float(np.mean(l))) if l else 0 rr = avg_w / avg_l if avg_l > 0 else (9999.0 if avg_w > 0 else 0) # Expectancy expect = net_p / len(trades) return { "n": len(trades), "wins": len(w), "losses": len(l), "wr": round(len(w) / len(trades), 4), "avg": round(float(np.mean(trades)), 6), "best": round(max(w), 6) if w else 0, "worst": round(min(l), 6) if l else 0, "ret": round(cumret - 1, 6), "np": round(net_p, 2), "mdd": round(mdd, 6), "pf": round(pf, 2), "rr": round(rr, 2), "expect": round(expect, 2), } def compute_metrics(bt_result, close_test, capital=10000): """Compute all standard metrics from backtest result. Uses trade-level compounding (same as webapp _trade_stats) for accuracy. Returns: dict with total_ret, bh_ret, sharpe_strat, sharpe_bh, mdd, n_trades """ equity = bt_result["equity"] trade_returns = bt_result["trade_returns"] # Total return — trade-level compounding (matches webapp) if trade_returns: cumret = 1.0 for r in trade_returns: cumret = (1 + r) total_ret = cumret - 1 else: total_ret = 0.0 # Buy and hold bh_equity = capital (close_test / close_test.iloc[0]) bh_ret = (bh_equity.iloc[-1] - capital) / capital if capital != 0 else 0.0 # Sharpe ratio — trade-level (matches webapp: sqrt(25226) annualization) if len(trade_returns) >= 2 and float(np.std(trade_returns)) > 0: sharpe_strat = float(np.mean(trade_returns) / np.std(trade_returns) np.sqrt(252 * 26)) else: sharpe_strat = 0.0 bh_rets = bh_equity.pct_change().dropna() if len(bh_rets) > 1 and bh_rets.std() != 0: sharpe_bh = float((bh_rets.mean() / bh_rets.std()) * np.sqrt(252 * 24 * 4)) else: sharpe_bh = 0.0 # Max drawdown — trade-level (matches webapp) if trade_returns: eq = np.cumprod([1.0] + [1 + r for r in trade_returns]) peak = np.maximum.accumulate(eq) mdd = float(((eq - peak) / peak).min()) if len(eq) > 1 else 0.0 else: mdd = 0.0 return { "total_ret": float(total_ret), "bh_ret": float(bh_ret), "sharpe_strat": float(sharpe_strat) if not np.isnan(sharpe_strat) else 0.0, "sharpe_bh": float(sharpe_bh) if not np.isnan(sharpe_bh) else 0.0, "mdd": float(mdd), "n_trades": len(trade_returns), } # Diagnostics line/histogram series (equity / drawdown / rolling_acc / conf_hist) # only feed the small Diagnostics charts — they're never used by the price chart # or scroll-back. On a 1-min model trained over the (2.2-capped) window these are # still ~30k points each; downsample to a visually-identical resolution before the # dict leaves the trainer so it doesn't carry that into Server-A RAM / Postgres. _RESULTS_SERIES_MAX = 5000 def _downsample_idx(n, cap=_RESULTS_SERIES_MAX): """Evenly-spaced index list spanning [0, n-1] (first+last always kept), or None when no downsampling is needed (n <= cap).""" if n <= cap: return None return np.unique(np.linspace(0, n - 1, cap).astype(int)).tolist() def _take(arr, idx): """Subset a list by an index list (idx may be None → return arr unchanged).""" if idx is None or not isinstance(arr, list): return arr return [arr[i] for i in idx] # trade_log / train_trade_log are lists of per-trade dicts (display-only — the # Trade Log tab). They scale with TRADE count, not bar count, so the bar-window # cap (Phase 2.2) doesn't bound them — a degenerate near-every-bar model can put # 10k+ trade dicts in the blob (>3 MB). Cap each (independently — a small-N model # keeps every trade) to the most-recent N, recording `_total` + `_truncated` # so the true count is still reported. Real strategies have far fewer than # _TRADE_LOG_MAX trades, so this only ever bites pathological models. _TRADE_LOG_MAX = 5000 def _cap_trade_log(tl): """Return (capped_list, original_len, was_truncated).""" if not isinstance(tl, list) or len(tl) <= _TRADE_LOG_MAX: return tl, (len(tl) if isinstance(tl, list) else 0), False return tl[-_TRADE_LOG_MAX:], len(tl), True def build_return_dict(split_result, bt_result, metrics, model, feature_cols, signal_full, p_pos_test, p_neg_test, custom_figs=None, bt_train_result=None, pre_stats=None): """Assemble the complete return dict. Handles ALL serialization. Never returns Timestamps, numpy arrays, or non-JSON types. Returns: JSON-safe dict with all required keys """ df = split_result["df"] close = split_result["close"] close_test = split_result["close_test"] X_test = split_result["X_test"] y_test = split_result["y_test"] equity = bt_result["equity"] bar_returns = bt_result["bar_returns"] # OHLC ohlc_dates = [str(x) for x in df.index.tolist()] def _safe_list(arr): return [float(x) if (x is not None and not np.isnan(x) and not np.isinf(x)) else None for x in arr] # Overlays bb, ma = compute_overlays(close, df.index) # Buy and hold equity capital = equity.iloc[0] if len(equity) > 0 else 10000 bh_equity = capital * (close_test / close_test.iloc[0]) # Confusion matrix from sklearn.metrics import confusion_matrix pred_test = model.predict(X_test) y_test_arr = np.asarray(y_test) cm = confusion_matrix(y_test_arr, pred_test, labels=[-1, 0, 1]) # Rolling accuracy sig_arr = signal_full.reindex(close_test.index).values correct = pd.Series((pred_test == y_test_arr).astype(float), index=X_test.index) active_test = pd.Series(sig_arr != 0, index=close_test.index) if len(sig_arr) == len(close_test) else pd.Series(True, index=close_test.index) correct_active = correct.where(active_test, other=np.nan) rolling_acc = correct_active.rolling(30, min_periods=1).mean() # Feature importance importances = model.feature_importances_ fi_pairs = sorted(zip(feature_cols, importances), key=lambda x: x[1])[-15:] # Drawdown rolling_max = equity.cummax() drawdown = (equity - rolling_max) / rolling_max.replace(0, np.nan) drawdown = drawdown.fillna(0.0) # ── Downsample the Diagnostics-only series (see _downsample_idx) ────────── _eq_dates = [str(x) for x in close_test.index.tolist()] _eq_strat = _safe_list(equity.values) _eq_bh = _safe_list(bh_equity.values) _eq_idx = _downsample_idx(len(_eq_dates)) _eq_dates, _eq_strat, _eq_bh = _take(_eq_dates, _eq_idx), _take(_eq_strat, _eq_idx), _take(_eq_bh, _eq_idx) _ra_dates = [str(x) for x in rolling_acc.index.tolist()] _ra_vals = [float(x) if (not np.isnan(x) and not np.isinf(x)) else None for x in rolling_acc.values] _ra_idx = _downsample_idx(len(_ra_dates)) _ra_dates, _ra_vals = _take(_ra_dates, _ra_idx), _take(_ra_vals, _ra_idx) _dd_dates = [str(x) for x in drawdown.index.tolist()] _dd_vals = _safe_list(drawdown.values) _dd_idx = _downsample_idx(len(_dd_dates)) _dd_dates, _dd_vals = _take(_dd_dates, _dd_idx), _take(_dd_vals, _dd_idx) _cp_pos = [float(x) for x in (p_pos_test.tolist() if hasattr(p_pos_test, 'tolist') else list(p_pos_test))] _cp_neg = [float(x) for x in (p_neg_test.tolist() if hasattr(p_neg_test, 'tolist') else list(p_neg_test))] _cp_pos = _take(_cp_pos, _downsample_idx(len(_cp_pos))) _cp_neg = _take(_cp_neg, _downsample_idx(len(_cp_neg))) # ── Trade logs — display-only (Trade Log tab); cap to most-recent N with a # `_total` field so the true count is still reported (see _cap_trade_log). # NB: ret_dist arrays are left FULL — a downstream path in callbacks.py # recomputes n_trades/win-rate from len(ret_dist), so a sample would skew # the displayed counts; they're small anyway and gzip handles them. _tl_test, _tl_test_n, _tl_test_tr = _cap_trade_log(bt_result.get("trade_log", [])) _tl_tr, _tl_tr_n, _tl_tr_tr = _cap_trade_log(bt_train_result.get("trade_log", []) if bt_train_result else []) return { "ohlc": { "dates": ohlc_dates, "open": _safe_list(df["open"].values), "high": _safe_list(df["high"].values), "low": _safe_list(df["low"].values), "close": _safe_list(df["close"].values), }, "signals": { "dates": [str(x) for x in signal_full.index.tolist()], "values": [float(x) for x in signal_full.values], }, "bb": bb, "ma": ma, "equity": { "dates": _eq_dates, "strategy": _eq_strat, "bh": _eq_bh, }, "feature_importance": { "names": [p[0] for p in fi_pairs], "values": [float(p[1]) for p in fi_pairs], }, "conf_matrix": cm.tolist(), "conf_hist": { "p_pos": _cp_pos, "p_neg": _cp_neg, }, "rolling_acc": { "dates": _ra_dates, "values": _ra_vals, }, "drawdown": { "dates": _dd_dates, "values": _dd_vals, }, "ret_dist": [float(x) for x in bt_result["trade_returns"]], "ret_dist_long": [float(x) for x in bt_result["long_returns"]], "ret_dist_short": [float(x) for x in bt_result["short_returns"]], "train_ret_dist": [float(x) for x in bt_train_result["trade_returns"]] if bt_train_result else [], "train_ret_dist_long": [float(x) for x in bt_train_result["long_returns"]] if bt_train_result else [], "train_ret_dist_short": [float(x) for x in bt_train_result["short_returns"]] if bt_train_result else [], "trade_log": _tl_test, "train_trade_log": _tl_tr, "trade_log_total": _tl_test_n, "train_trade_log_total": _tl_tr_n, "trade_log_truncated": _tl_test_tr, "train_trade_log_truncated": _tl_tr_tr, (pre_stats or {}), "metrics": metrics, "split_dt": split_result["split_dt"], "split_idx": int(split_result["split_idx"]), "n_train": int(split_result["n_train"]), "n_test": int(split_result["n_test"]), "feature_cols": list(feature_cols), "custom_figs": custom_figs or [], } # ════════════════════════════════════════════════════════════════════════════ # STRATEGY FRAMEWORK v2 — Config-driven architecture # Claude writes feature_engineering() + strategy_config(). Framework does rest. # ════════════════════════════════════════════════════════════════════════════ import importlib _MODEL_REGISTRY = { "XGBClassifier": ("xgboost", "XGBClassifier"), "RandomForestClassifier": ("sklearn.ensemble", "RandomForestClassifier"), "GradientBoostingClassifier": ("sklearn.ensemble", "GradientBoostingClassifier"), "LogisticRegression": ("sklearn.linear_model", "LogisticRegression"), "ExtraTreesClassifier": ("sklearn.ensemble", "ExtraTreesClassifier"), "AdaBoostClassifier": ("sklearn.ensemble", "AdaBoostClassifier"), } def _build_model_from_config(config, X_train, y_train_enc): """Build, fit, and wrap a model from strategy_config dict.""" model_type = config.get("model_type", "RandomForestClassifier") model_params = dict(config.get("model_params", {})) if model_type not in _MODEL_REGISTRY: raise ValueError(f"Unknown model_type '{model_type}'. Valid: {list(_MODEL_REGISTRY.keys())}") module_path, class_name = _MODEL_REGISTRY[model_type] mod = importlib.import_module(module_path) cls = getattr(mod, class_name) # XGBoost defaults if class_name == "XGBClassifier": model_params.setdefault("use_label_encoder", False) model_params.setdefault("eval_metric", "mlogloss") model_params.setdefault("tree_method", "hist") # Determinism > speed (2026-05-25). XGBoost hist with n_jobs=-1 is # NON-reproducible even with random_state set — the parallel histogram # gradient-sum order varies across threads, so the SAME code + data # gives a slightly different model (and backtest) every run. Forcing # single-thread makes training bit-reproducible so: (a) a user who # copies a strategy and reruns it gets identical numbers, (b) the # community "Live" score matches a redeploy, (c) "same code, different # result" support reports go away. Cost: single-threaded XGB (a few # seconds slower on large windows; hist is fast so it's minor). FORCED # (not setdefault) so the guarantee can't be silently broken by a # strategy passing n_jobs. Exact reproducibility holds within the # platform (pinned versions / same Modal image); a user's own machine # with different xgboost/numpy/CPU can still differ in low-order bits. model_params["n_jobs"] = 1 # Common defaults model_params.setdefault("random_state", 42) from model_wrapper import ModelWrapper clf = cls(model_params) clf.fit(X_train, y_train_enc) enc = LabelEncoder() enc.fit([-1, 0, 1]) return ModelWrapper(clf, original_classes=enc.classes_, n_features=X_train.shape[1]) def _generate_signals(model, X, threshold): """Framework-owned signal generation. Deterministic threshold logic.""" proba = model.predict_proba(X) classes = list(model.classes_) idx_pos = classes.index(1) if 1 in classes else None idx_neg = classes.index(-1) if -1 in classes else None p_pos = proba[:, idx_pos] if idx_pos is not None else np.zeros(len(X)) p_neg = proba[:, idx_neg] if idx_neg is not None else np.zeros(len(X)) signal_vals = np.zeros(len(X)) signal_vals = np.where(p_pos >= threshold, 1.0, signal_vals) signal_vals = np.where(p_neg >= threshold, -1.0, signal_vals) # Both exceed: pick stronger both = (p_pos >= threshold) & (p_neg >= threshold) signal_vals[both] = np.where(p_pos[both] >= p_neg[both], 1.0, -1.0) return pd.Series(signal_vals, index=X.index), p_pos, p_neg # ── Filter functions (all no-ops when config value is None) ────────────── def _apply_direction_filter(signal, direction): """Zero out signals that don't match allowed direction.""" if direction is None or direction == "both": return signal s = signal.copy() if direction == "long": s[s < 0] = 0.0 elif direction == "short": s[s > 0] = 0.0 return s def _apply_session_filter(signal, index, session_hours): """Zero out signals outside session hours [start, end] UTC.""" if session_hours is None: return signal s = signal.copy() start_h, end_h = session_hours[0], session_hours[1] hours = index.hour if start_h <= end_h: mask = (hours >= start_h) & (hours < end_h) else: # wrap around midnight, e.g. [22, 6] mask = (hours >= start_h) \| (hours < end_h) s[~mask] = 0.0 return s def _apply_atr_filter(signal, close, high, low, min_atr): """Zero out signals when NATR(14) is below threshold.""" if min_atr is None: return signal hl = high - low hc = (high - close.shift(1)).abs() lc = (low - close.shift(1)).abs() tr = pd.concat([hl, hc, lc], axis=1).max(axis=1) atr14 = tr.ewm(com=13, adjust=False).mean() natr = atr14 / close.replace(0, np.nan) s = signal.copy() s[natr < min_atr] = 0.0 return s def _apply_trend_filter(signal, close, trend_filter): """Only allow signals aligned with trend. e.g. 'sma_50': longs above SMA, shorts below.""" if trend_filter is None: return signal # Parse: "sma_50" → SMA with period 50 parts = trend_filter.lower().replace("-", "_").split("_") if len(parts) >= 2 and parts[0] in ("sma", "ema"): period = int(parts[1]) else: return signal # unknown filter, skip if parts[0] == "sma": trend_line = close.rolling(period).mean() else: trend_line = close.ewm(span=period, adjust=False).mean() s = signal.copy() # Longs only above trend, shorts only below s[(s > 0) & (close < trend_line)] = 0.0 s[(s < 0) & (close > trend_line)] = 0.0 return s # ── run_backtest_v2: framework-owned SL/TP/cooldown/position management ── def run_backtest_v2(signal, close, high, low, config, capital=10000, cost=2e-5): """Backtest with SL/TP/cooldown/direction handling built into the engine. Unlike run_backtest (v1), this function handles position exits internally. Returns: same dict shape as run_backtest() """ stop_loss = config.get("stop_loss") take_profit = config.get("take_profit") cooldown = config.get("cooldown", 0) on_opposite = config.get("on_opposite", "reverse") sig_arr = signal.values close_arr = close.values high_arr = high.values low_arr = low.values idx = signal.index n = len(close_arr) trade_returns = [] long_returns = [] short_returns = [] trade_log = [] equity_vals = np.full(n, float(capital)) cumret = 1.0 position = 0.0 # current direction: 1.0, -1.0, or 0.0 (flat) entry_price = None entry_bar = None # index into arrays for entry time cooldown_remaining = 0 def _log_trade(exit_bar, exit_px, ret, reason): trade_log.append({ "type": "Buy" if position == 1.0 else "Sell", "entry_time": str(idx[entry_bar]), "exit_time": str(idx[exit_bar]), "entry_price": round(entry_price, 5), "exit_price": round(exit_px, 5), "pnl": round(position * (exit_px - entry_price), 5), "pnl_pct": round(ret * 100, 3), "exit_reason": reason, }) for i in range(n): c = close_arr[i] h = high_arr[i] lo = low_arr[i] s = sig_arr[i] # 1. Check SL/TP if in trade if position != 0.0 and entry_price is not None: hit_sl = False hit_tp = False exit_price = None if position == 1.0: # long if stop_loss is not None and lo <= entry_price * (1 - stop_loss): hit_sl = True exit_price = entry_price * (1 - stop_loss) elif take_profit is not None and h >= entry_price * (1 + take_profit): hit_tp = True exit_price = entry_price * (1 + take_profit) else: # short if stop_loss is not None and h >= entry_price * (1 + stop_loss): hit_sl = True exit_price = entry_price * (1 + stop_loss) elif take_profit is not None and lo <= entry_price * (1 - take_profit): hit_tp = True exit_price = entry_price * (1 - take_profit) if hit_sl or hit_tp: ret = float(position * (exit_price - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(i, exit_price, ret, "SL" if hit_sl else "TP") cumret = (1 + ret) position = 0.0 entry_price = None entry_bar = None cooldown_remaining = cooldown equity_vals[i] = capital cumret continue # 2. Cooldown if cooldown_remaining > 0: cooldown_remaining -= 1 equity_vals[i] = capital * cumret continue # 3. Signal processing if s != 0.0: if position == 0.0: # Open new trade position = s entry_price = c entry_bar = i elif s != position: # Opposite signal if on_opposite == "reverse": # Close current + open opposite ret = float(position * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(i, c, ret, "signal") cumret = (1 + ret) position = s entry_price = c entry_bar = i else: # close_only # Close current, go flat ret = float(position (c - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(i, c, ret, "close_only") cumret = (1 + ret) position = 0.0 entry_price = None entry_bar = None cooldown_remaining = cooldown equity_vals[i] = capital cumret # Close last open trade at final close if position != 0.0 and entry_price is not None and n > 0 and entry_price != 0: c = close_arr[-1] ret = float(position * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(n - 1, c, ret, "end") cumret = (1 + ret) equity_vals[-1] = capital cumret # Bar returns for Sharpe (approximate) bar_returns = np.zeros(n) for i in range(1, n): if close_arr[i - 1] != 0 and sig_arr[i - 1] != 0: bar_returns[i] = sig_arr[i - 1] * (close_arr[i] - close_arr[i - 1]) / close_arr[i - 1] return { "equity": pd.Series(equity_vals, index=close.index), "trade_returns": trade_returns, "long_returns": long_returns, "short_returns": short_returns, "bar_returns": bar_returns, "trade_log": trade_log, } # ── run_strategy: the v2 orchestrator ──────────────────────────────────── def run_strategy(feature_fn, config_fn, data_path, start_date="", end_date="", validation_date="", train_split=0.7, register_model_fn=None): """Config-driven strategy execution. Claude writes feature_fn + config_fn, framework does everything else. Returns: results dict (same format as webapp expects) """ config = config_fn() # Auto-correct SL/TP if Claude passed percentage instead of decimal for _key in ("stop_loss", "take_profit"): _val = config.get(_key) if _val is not None and _val > 0.1: # >10% is almost certainly a percentage config[_key] = _val / 100.0 print(f"[strategy] Auto-corrected {_key}: {_val} -> {config[_key]} (was percentage, converted to decimal)") # 1. Load data df, close, open_, high, low = load_ohlc(data_path, start_date, end_date) # 2. Feature engineering (Claude's function) df = feature_fn(df, close, open_, high, low) close = df["close"] open_ = df["open"] high = df["high"] low = df["low"] # 3. Warm-up detection: drop rows where features have NaN BEFORE any fill feature_cols = [c for c in df.columns if c not in ("open", "high", "low", "close")] raw_nans = df[feature_cols].isna().any(axis=1) valid_rows = ~raw_nans if valid_rows.any(): first_valid = valid_rows.idxmax() if raw_nans.loc[:first_valid].any(): df = df.loc[first_valid:].copy() close = df["close"] open_ = df["open"] high = df["high"] low = df["low"] # 4. Target horizon = config.get("target_horizon", 4) target = make_target(close, horizon=horizon) # 5. Split (ffill only within each partition — no bfill leak) mask = target.notna() df = df[mask].copy() target = target[mask] close = df["close"] high = df["high"] low = df["low"] X = df[feature_cols].copy() X = X.replace([np.inf, -np.inf], np.nan) if validation_date: split_idx = len(df[df.index <= validation_date]) else: split_idx = int(len(df) * train_split) split_idx = max(1, min(split_idx, len(df) - 1)) # ffill within train and test separately (no leak) X_train = X.iloc[:split_idx].ffill().fillna(0.0) X_test = X.iloc[split_idx:].ffill().fillna(0.0) X = pd.concat([X_train, X_test]) y_train = target.iloc[:split_idx] y_test = target.iloc[split_idx:] close_train = close.iloc[:split_idx] close_test = close.iloc[split_idx:] high_test = high.iloc[split_idx:] low_test = low.iloc[split_idx:] enc = LabelEncoder() enc.fit([-1, 0, 1]) y_train_enc = enc.transform(y_train) y_test_enc = enc.transform(y_test) split_dt = str(df.index[split_idx]) sp = { "df": df, "X_train": X_train, "X_test": X_test, "y_train": y_train, "y_test": y_test, "y_train_enc": y_train_enc, "y_test_enc": y_test_enc, "enc": enc, "close": close, "close_train": close_train, "close_test": close_test, "split_idx": split_idx, "split_dt": split_dt, "n_train": len(X_train), "n_test": len(X_test), } # 6. Build model from config model = _build_model_from_config(config, X_train, y_train_enc) # 7. Generate signals threshold = config.get("signal_threshold", 0.55) signal_train, p_pos_train, p_neg_train = _generate_signals(model, X_train, threshold) signal_test, p_pos_test, p_neg_test = _generate_signals(model, X_test, threshold) # 8. Apply filters (order: direction → session → ATR → trend) direction = config.get("direction", "both") signal_test = _apply_direction_filter(signal_test, direction) signal_train = _apply_direction_filter(signal_train, direction) session_filter = config.get("session_filter") signal_test = _apply_session_filter(signal_test, signal_test.index, session_filter) signal_train = _apply_session_filter(signal_train, signal_train.index, session_filter) min_atr = config.get("min_atr") if min_atr is not None: signal_test = _apply_atr_filter(signal_test, close_test, high_test, low_test, min_atr) trend_filter = config.get("trend_filter") if trend_filter is not None: signal_test = _apply_trend_filter(signal_test, close_test, trend_filter) signal_full = pd.concat([signal_train, signal_test]) # 9. Backtest with SL/TP/cooldown (test + train) high_train = high.iloc[:split_idx] low_train = low.iloc[:split_idx] has_risk = (config.get("stop_loss") is not None or config.get("take_profit") is not None or config.get("cooldown", 0) > 0 or config.get("on_opposite", "reverse") != "reverse") if has_risk: bt = run_backtest_v2(signal_test, close_test, high_test, low_test, config, capital=10000) bt_train = run_backtest_v2(signal_train, close_train, high_train, low_train, config, capital=10000) else: bt = run_backtest(signal_test, close_test, capital=10000) bt_train = run_backtest(signal_train, close_train, capital=10000) # 10. Metrics metrics = compute_metrics(bt, close_test, capital=10000) # 11. Pre-compute all trade stats (single source of truth) pre_stats = { "train_stats": compute_trade_stats(bt_train.get("trade_returns", []), capital=10000), "test_stats": compute_trade_stats(bt.get("trade_returns", []), capital=10000), "long_stats": compute_trade_stats(bt.get("long_returns", []), capital=10000), "short_stats": compute_trade_stats(bt.get("short_returns", []), capital=10000), } # 12. Register model if register_model_fn is not None: register_model_fn(model) # 13. Build return dict return build_return_dict(sp, bt, metrics, model, feature_cols, signal_full, p_pos_test, p_neg_test, custom_figs=[], bt_train_result=bt_train, pre_stats=pre_stats) # ── End strategy_utils ── DATA_PATH = '/root/Desktop/QuantifyMe/data/ohlc/AUDUSD_15min.parquet' START_DATE = '2026-04-15' END_DATE = '2026-05-25' VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── Bollinger Bands (20, 2) ────────────────────────────────────────────── bb_period = 20 bb_std = 2.0 bb_mid = close.rolling(bb_period).mean() bb_sigma = close.rolling(bb_period).std(ddof=0) bb_upper = bb_mid + bb_std * bb_sigma bb_lower = bb_mid - bb_std * bb_sigma df["bb_mid"] = bb_mid df["bb_upper"] = bb_upper df["bb_lower"] = bb_lower # %B — position of close within the band (0 = lower, 1 = upper) bb_range = bb_upper - bb_lower df["bb_pct_b"] = np.where(bb_range > 0, (close - bb_lower) / bb_range, 0.5) # Bandwidth — normalised band width (regime filter) df["bb_bandwidth"] = np.where(bb_mid > 0, bb_range / bb_mid, 0.0) # Distance from each band (signed, normalised by sigma) df["dist_lower"] = np.where(bb_sigma > 0, (close - bb_lower) / bb_sigma, 0.0) df["dist_upper"] = np.where(bb_sigma > 0, (bb_upper - close) / bb_sigma, 0.0) df["dist_mid"] = np.where(bb_sigma > 0, (close - bb_mid) / bb_sigma, 0.0) # Below lower band flag df["below_lower"] = np.where(close < bb_lower, 1, 0) # Above upper band flag df["above_upper"] = np.where(close > bb_upper, 1, 0) # ── RSI (14) ───────────────────────────────────────────────────────────── rsi_period = 14 delta = close.diff() gain = delta.clip(lower=0) loss = (-delta).clip(lower=0) avg_gain = gain.ewm(com=rsi_period - 1, min_periods=rsi_period).mean() avg_loss = loss.ewm(com=rsi_period - 1, min_periods=rsi_period).mean() rs = np.where(avg_loss > 0, avg_gain / avg_loss, 100.0) rsi = 100.0 - 100.0 / (1.0 + rs) df["rsi"] = rsi # RSI-derived flags and distances df["rsi_oversold"] = np.where(rsi < 35, 1, 0) df["rsi_overbought"] = np.where(rsi > 65, 1, 0) df["rsi_dist_35"] = rsi - 35.0 # negative when oversold df["rsi_dist_65"] = rsi - 65.0 # positive when overbought df["rsi_norm"] = (rsi - 50.0) / 50.0 # centred, ±1 range # ── Core entry condition features ──────────────────────────────────────── # Buy setup: close < lower BB AND RSI < 35 df["long_setup"] = np.where((close < bb_lower) & (rsi < 35), 1, 0) # Sell setup: close > upper BB AND RSI > 65 df["short_setup"] = np.where((close > bb_upper) & (rsi > 65), 1, 0) # ── ATR (14) — volatility context ──────────────────────────────────────── atr_period = 14 hl = high - low hc = (high - close.shift(1)).abs() lc = (low - close.shift(1)).abs() tr = pd.concat([hl, hc, lc], axis=1).max(axis=1) atr = tr.ewm(com=atr_period - 1, min_periods=atr_period).mean() df["atr"] = atr df["natr"] = np.where(close > 0, atr / close, 0.0) # ── Momentum / Rate-of-Change ───────────────────────────────────────────── for n in [1, 3, 5, 10]: df[f"roc_{n}"] = np.where( close.shift(n) > 0, (close - close.shift(n)) / close.shift(n), 0.0 ) # ── EMA trend context (fast / slow) ────────────────────────────────────── ema_fast = close.ewm(span=9, min_periods=9).mean() ema_slow = close.ewm(span=21, min_periods=21).mean() df["ema_fast"] = ema_fast df["ema_slow"] = ema_slow df["ema_diff"] = np.where(ema_slow > 0, (ema_fast - ema_slow) / ema_slow, 0.0) df["ema_bull"] = np.where(ema_fast > ema_slow, 1, 0) # SMA-50 trend filter helper (used by framework trend_filter) df["sma_50"] = close.rolling(50).mean() # ── Candle body & wick features ─────────────────────────────────────────── body = (close - open_).abs() candle_rng = (high - low).replace(0, np.nan) df["body_ratio"] = (body / candle_rng).fillna(0.0) df["upper_wick"] = np.where(candle_rng.notna(), (high - close.clip(lower=open_)) / candle_rng.fillna(1), 0.0) df["lower_wick"] = np.where(candle_rng.notna(), (close.clip(upper=open_) - low) / candle_rng.fillna(1), 0.0) df["bull_candle"] = np.where(close > open_, 1, 0) # ── Volume-like proxy — true range z-score ──────────────────────────────── tr_mean = tr.rolling(20).mean() tr_std = tr.rolling(20).std(ddof=0).replace(0, np.nan) df["tr_zscore"] = ((tr - tr_mean) / tr_std).fillna(0.0) # ── Lagged RSI and %B (1, 2, 3 bars back) ──────────────────────────────── for lag in [1, 2, 3]: df[f"rsi_lag{lag}"] = df["rsi"].shift(lag) df[f"bb_pct_b_lag{lag}"] = df["bb_pct_b"].shift(lag) # ── RSI slope ──────────────────────────────────────────────────────────── df["rsi_slope3"] = df["rsi"] - df["rsi"].shift(3) # ── Mean-reversion proximity: how far price is from middle band ─────────── df["pct_to_mid"] = np.where(close > 0, (bb_mid - close) / close, 0.0) # ── Fill any NaNs from warm-up ──────────────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "BB Mean-Reversion + RSI Oversold/Overbought (XGBoost)", "model_type": "XGBClassifier", "model_params": { "n_estimators": 500, "max_depth": 4, "learning_rate": 0.03, "subsample": 0.75, "colsample_bytree": 0.70, "min_child_weight": 5, "gamma": 0.1, "reg_alpha": 0.05, "reg_lambda": 1.5, "objective": "binary:logistic", "random_state": 42, "n_jobs": -1, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.0010, "take_profit": 0.0020, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [7, 17], "min_atr": 0.00005, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximize Sharpe ratio by exploiting Bollinger Band mean-reversion " "with RSI confirmation. Entry conditions (close < lower BB, RSI < 35 " "for longs; close > upper BB, RSI > 65 for shorts) are encoded as " "features together with momentum, ATR volatility, candle structure, " "and lagged indicators. XGBoost with strong regularisation " "(reg_lambda=1.5, gamma=0.1, min_child_weight=5) and a low learning " "rate avoids overfitting on the 6-week window. Session filter " "[7,17] UTC targets liquid London/NY overlap, reducing noise. " "TP:SL ratio of 2:1 supports positive expected value even at " "moderate win rates, pushing Sharpe higher." ), "notes": ( "Features: %B position, RSI (raw + flags + slope + lags), " "EMA cross, ATR/NATR, ROC(1/3/5/10), candle body/wick ratios, " "TR z-score, distance-to-midband, long/short setup flags. " "Round-trip cost ~2e-5 is implicitly absorbed by the 10-pip TP target. " "Cooldown=0 allows immediate re-entry after mean-reversion completes." ), } # ── Framework v2: auto-generated wrapper ── def train_and_backtest(): _vd = VALIDATION_DATE if 'VALIDATION_DATE' in globals() else '' _ts = TRAIN_SPLIT if 'TRAIN_SPLIT' in globals() else 0.7 return run_strategy( feature_engineering, strategy_config, DATA_PATH, START_DATE, END_DATE, _vd, _ts, register_model_fn=register_model )
1.57	GBP/USD BB Squeeze Breakout (GradientBoosting) Maximize risk-adjusted return (Sharpe / Calmar). GradientBoostingClassifier chosen for its strong performance on tabular financial data with…	E @elastic-moose-350	GBPUSD	15min	53.4%58.6%	+1.03%+10.77%	1.041.29	5.20%5.20%	34858
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-06 01:53:28 # Model : Gradient Boosting # Feature Eng. : BB (20,2.0), ATR 14 + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # Bollinger Bands Squeeze Breakout — GBP/USD 15-min # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/GBPUSD_15min.parquet" START_DATE = "2025-04-24" END_DATE = "2026-04-24" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── Bollinger Bands (20, 2) ────────────────────────────────────────────── bb_period = 20 bb_std = 2.0 bb_mid = close.rolling(bb_period).mean() bb_sigma = close.rolling(bb_period).std(ddof=0) bb_upper = bb_mid + bb_std * bb_sigma bb_lower = bb_mid - bb_std * bb_sigma bb_width = (bb_upper - bb_lower) / bb_mid bb_pct = (close - bb_lower) / (bb_upper - bb_lower) df["bb_mid"] = bb_mid df["bb_upper"] = bb_upper df["bb_lower"] = bb_lower df["bb_width"] = bb_width df["bb_pct"] = bb_pct # ── ATR (14) & NATR ───────────────────────────────────────────────────── atr_period = 14 prev_close = close.shift(1) tr = pd.concat([ high - low, (high - prev_close).abs(), (low - prev_close).abs() ], axis=1).max(axis=1) atr = tr.ewm(span=atr_period, min_periods=atr_period, adjust=False).mean() natr = atr / close df["atr"] = atr df["natr"] = natr # ── Squeeze detection ──────────────────────────────────────────────────── # Squeeze = BB width is in the bottom quartile over a 50-bar lookback bb_width_min = bb_width.rolling(50).min() bb_width_max = bb_width.rolling(50).max() bb_width_norm = (bb_width - bb_width_min) / (bb_width_max - bb_width_min + 1e-12) df["bb_width_norm"] = bb_width_norm df["squeeze"] = np.where(bb_width_norm < 0.25, 1.0, 0.0) # Squeeze released: was in squeeze 1 bar ago, now width is expanding bb_width_chg = bb_width.diff() df["squeeze_release"] = np.where( (df["squeeze"].shift(1) == 1.0) & (bb_width_chg > 0), 1.0, 0.0 ) # ── BB width momentum ──────────────────────────────────────────────────── df["bb_width_chg"] = bb_width_chg df["bb_width_chg_2"] = bb_width.diff(2) df["bb_width_chg_5"] = bb_width.diff(5) # ── Price position relative to bands ───────────────────────────────────── df["close_vs_mid"] = close - bb_mid df["close_vs_upper"] = close - bb_upper df["close_vs_lower"] = close - bb_lower # ── Momentum & returns ─────────────────────────────────────────────────── df["ret_1"] = close.pct_change(1) df["ret_3"] = close.pct_change(3) df["ret_5"] = close.pct_change(5) df["ret_10"] = close.pct_change(10) df["ret_20"] = close.pct_change(20) # ── RSI (14) ───────────────────────────────────────────────────────────── rsi_period = 14 delta = close.diff() gain = delta.clip(lower=0) loss = (-delta).clip(lower=0) avg_gain = gain.ewm(span=rsi_period, min_periods=rsi_period, adjust=False).mean() avg_loss = loss.ewm(span=rsi_period, min_periods=rsi_period, adjust=False).mean() rs = avg_gain / (avg_loss + 1e-12) rsi = 100.0 - 100.0 / (1.0 + rs) df["rsi"] = rsi # RSI divergence proxy: price makes new low/high but RSI does not df["rsi_5_min"] = rsi.rolling(5).min() df["close_5_min"] = close.rolling(5).min() df["rsi_5_max"] = rsi.rolling(5).max() df["close_5_max"] = close.rolling(5).max() # ── MACD ───────────────────────────────────────────────────────────────── ema_fast = close.ewm(span=12, adjust=False).mean() ema_slow = close.ewm(span=26, adjust=False).mean() macd_line = ema_fast - ema_slow macd_signal = macd_line.ewm(span=9, adjust=False).mean() macd_hist = macd_line - macd_signal df["macd_line"] = macd_line df["macd_signal"] = macd_signal df["macd_hist"] = macd_hist df["macd_hist_chg"] = macd_hist.diff() # ── Volume-like proxy: bar range ───────────────────────────────────────── bar_range = high - low df["bar_range"] = bar_range df["bar_range_norm"] = bar_range / (atr + 1e-12) # ── Candle body direction & size ───────────────────────────────────────── body = close - open_ df["body"] = body df["body_norm"] = body / (atr + 1e-12) df["body_dir"] = np.where(body > 0, 1.0, np.where(body < 0, -1.0, 0.0)) # ── Upper / lower wick ─────────────────────────────────────────────────── df["upper_wick"] = high - pd.concat([close, open_], axis=1).max(axis=1) df["lower_wick"] = pd.concat([close, open_], axis=1).min(axis=1) - low # ── SMA trend context ───────────────────────────────────────────────────── sma_50 = close.rolling(50).mean() sma_200 = close.rolling(200).mean() df["sma_50"] = sma_50 df["sma_200"] = sma_200 df["close_vs_sma50"] = (close - sma_50) / (sma_50 + 1e-12) df["sma50_vs_sma200"] = (sma_50 - sma_200) / (sma_200 + 1e-12) # ── Volatility regime ──────────────────────────────────────────────────── natr_ma = natr.rolling(50).mean() df["natr_ratio"] = natr / (natr_ma + 1e-12) # >1 = above-avg vol # ── Mean-reversion distance ─────────────────────────────────────────────── df["z_score_20"] = (close - bb_mid) / (bb_sigma + 1e-12) # ── Rolling realized vol ───────────────────────────────────────────────── df["rvol_10"] = df["ret_1"].rolling(10).std() df["rvol_20"] = df["ret_1"].rolling(20).std() # ── ATR-normalised returns ──────────────────────────────────────────────── df["ret_1_natr"] = df["ret_1"] / (natr + 1e-12) # ── Lagged features ─────────────────────────────────────────────────────── for lag in [1, 2, 3, 5]: df[f"bb_pct_lag{lag}"] = bb_pct.shift(lag) df[f"bb_width_lag{lag}"] = bb_width.shift(lag) df[f"rsi_lag{lag}"] = rsi.shift(lag) df[f"macd_hist_lag{lag}"] = macd_hist.shift(lag) # ── Hour-of-day (cyclical) ──────────────────────────────────────────────── hour = pd.Series(df.index.hour, index=df.index).astype(float) df["hour_sin"] = np.sin(2 * np.pi * hour / 24.0) df["hour_cos"] = np.cos(2 * np.pi * hour / 24.0) # ── Day-of-week (cyclical) ──────────────────────────────────────────────── dow = pd.Series(df.index.dayofweek, index=df.index).astype(float) df["dow_sin"] = np.sin(2 * np.pi * dow / 5.0) df["dow_cos"] = np.cos(2 * np.pi * dow / 5.0) # ── Fill NaN from warm-up ──────────────────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "GBP/USD BB Squeeze Breakout (GradientBoosting)", "model_type": "GradientBoostingClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.75, "min_samples_leaf": 20, "min_samples_split": 40, "max_features": "sqrt", "n_iter_no_change": 30, "validation_fraction": 0.1, "tol": 1e-4, "random_state": 42, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.005, "take_profit": 0.010, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [6, 20], "min_atr": 0.0002, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximize risk-adjusted return (Sharpe / Calmar). " "GradientBoostingClassifier chosen for its strong performance on " "tabular financial data with noisy labels. Shallow trees (max_depth=4) " "with shrinkage (lr=0.04) and subsample=0.75 reduce overfitting. " "Early stopping (n_iter_no_change=30) prevents over-training. " "SL=0.5%, TP=1.0% gives a 1:2 risk/reward ratio. " "Session filter 06-20 UTC captures London + New York overlap for GBP/USD." ), "notes": ( "Core signal: BB squeeze (narrow band width) followed by expansion " "breakout, confirmed by MACD histogram direction and RSI. " "ATR filter ensures minimum volatility for entries. " "Lagged BB features capture the squeeze build-up dynamic. " "Z-score and normalized returns give the model mean-reversion context. " "Cyclical time features allow the model to learn intraday seasonality." ), }
0.71	USD/CAD SMA Trend + Momentum XGBoost Scalper Maximise risk-adjusted return on USD/CAD 15-min bars. XGBoost with deep feature set (multi-period SMA distances and crossovers, RSI, MACD, B…	D @delta-atlas-858	USDCAD	15min	45.6%60.0%	+3.05%+1.86%	1.461.26	1.99%1.99%	5710
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-06 02:13:41 # Model : XGBoost # Feature Eng. : SMA (20,50,200) + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/USDCAD_15min.parquet" START_DATE = "2025-04-24" END_DATE = "2026-04-24" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── SMA features (required) ────────────────────────────────────────── for p in [20, 50, 200]: sma = close.rolling(p).mean() df[f"sma_{p}"] = sma df[f"dm_sma_{p}"] = (close - sma) / sma # ── SMA slope (momentum of the moving average itself) ──────────────── for p in [20, 50, 200]: df[f"sma_{p}_slope"] = df[f"sma_{p}"].diff(5) / df[f"sma_{p}"].shift(5) # ── SMA crossover signals ──────────────────────────────────────────── df["sma_20_50_cross"] = df["sma_20"] - df["sma_50"] df["sma_50_200_cross"] = df["sma_50"] - df["sma_200"] df["sma_20_200_cross"] = df["sma_20"] - df["sma_200"] # ── Price momentum / rate of change ────────────────────────────────── for p in [4, 8, 16, 32]: df[f"roc_{p}"] = close.pct_change(p) # ── RSI (manual, no external libs) ─────────────────────────────────── def calc_rsi(series, period=14): delta = series.diff() gain = delta.clip(lower=0) loss = (-delta).clip(lower=0) avg_gain = gain.ewm(alpha=1.0 / period, min_periods=period, adjust=False).mean() avg_loss = loss.ewm(alpha=1.0 / period, min_periods=period, adjust=False).mean() rs = avg_gain / avg_loss.replace(0, np.nan) rsi = 100 - (100 / (1 + rs)) return rsi for p in [9, 14, 21]: df[f"rsi_{p}"] = calc_rsi(close, p) df[f"rsi_{p}_norm"] = (df[f"rsi_{p}"] - 50) / 50 # centre around 0 # ── MACD (manual) ──────────────────────────────────────────────────── ema12 = close.ewm(span=12, adjust=False).mean() ema26 = close.ewm(span=26, adjust=False).mean() macd_line = ema12 - ema26 macd_signal = macd_line.ewm(span=9, adjust=False).mean() df["macd"] = macd_line df["macd_signal"] = macd_signal df["macd_hist"] = macd_line - macd_signal df["macd_hist_chg"] = df["macd_hist"].diff() # ── Bollinger Bands ─────────────────────────────────────────────────── for p in [20, 50]: mid = close.rolling(p).mean() std = close.rolling(p).std() df[f"bb_upper_{p}"] = mid + 2 * std df[f"bb_lower_{p}"] = mid - 2 * std denom = (df[f"bb_upper_{p}"] - df[f"bb_lower_{p}"]).replace(0, np.nan) df[f"bb_pct_{p}"] = (close - df[f"bb_lower_{p}"]) / denom df[f"bb_width_{p}"] = denom / mid # ── ATR (manual) ───────────────────────────────────────────────────── def calc_atr(h, l, c, period=14): prev_c = c.shift(1) tr = pd.concat([ h - l, (h - prev_c).abs(), (l - prev_c).abs() ], axis=1).max(axis=1) return tr.ewm(alpha=1.0 / period, min_periods=period, adjust=False).mean() for p in [7, 14]: atr = calc_atr(high, low, close, p) df[f"atr_{p}"] = atr df[f"natr_{p}"] = atr / close # normalised ATR # ── Candle body / wick features ─────────────────────────────────────── body = (close - open_).abs() candle_rng = (high - low).replace(0, np.nan) df["body_ratio"] = body / candle_rng df["upper_wick"] = (high - np.maximum(close, open_)) / candle_rng df["lower_wick"] = (np.minimum(close, open_) - low) / candle_rng df["candle_dir"] = np.sign(close - open_) # ── Rolling volatility ──────────────────────────────────────────────── log_ret = np.log(close / close.shift(1)) for p in [8, 16, 32]: df[f"vol_{p}"] = log_ret.rolling(p).std() # ── Volume (if available) — graceful fallback ───────────────────────── if "volume" in df.columns and df["volume"].sum() > 0: vol_ma = df["volume"].rolling(20).mean() df["vol_ratio"] = df["volume"] / vol_ma.replace(0, np.nan) else: df["vol_ratio"] = 1.0 # ── Lagged returns ──────────────────────────────────────────────────── for lag in [1, 2, 3, 4, 8]: df[f"ret_lag_{lag}"] = log_ret.shift(lag) # ── Higher-timeframe SMA context (4-bar = 1h proxy) ────────────────── close_1h = close.rolling(4).mean() for p in [20, 50]: sma_1h = close_1h.rolling(p).mean() df[f"1h_dm_sma_{p}"] = (close_1h - sma_1h) / sma_1h # ── Fill NaN from indicator warm-up ────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "USD/CAD SMA Trend + Momentum XGBoost Scalper", "model_type": "XGBClassifier", "model_params": { "n_estimators": 600, "max_depth": 4, "learning_rate": 0.03, "subsample": 0.75, "colsample_bytree": 0.70, "min_child_weight": 5, "gamma": 0.2, "reg_alpha": 0.1, "reg_lambda": 1.5, "objective": "binary:logistic", "tree_method": "hist", "random_state": 42, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.005, "take_profit": 0.010, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [7, 20], "min_atr": 0.0002, "trend_filter": "sma_50", "target_horizon": 4, "objective": ( "Maximise risk-adjusted return on USD/CAD 15-min bars. " "XGBoost with deep feature set (multi-period SMA distances and crossovers, " "RSI, MACD, Bollinger Bands, ATR, candle structure, lagged returns). " "Regularised tree ensemble (gamma, L1/L2, min_child_weight) prevents " "overfitting on the ~1-year window. 2:1 TP:SL ratio locks in positive " "expectancy; session filter restricts trading to liquid London/NY overlap." ), "notes": ( "SMA-trio (20/50/200) distances are the primary trend-context features. " "MACD histogram momentum + RSI multi-period confirm entry timing. " "ATR normalisation makes volatility features scale-invariant. " "sma_50 trend filter ensures long trades only above 50-SMA and shorts below, " "aligning ML signals with dominant trend and improving Sharpe ratio." ), }
0.59	USD/JPY Multi-MA + RSI/BB XGBoost Sharpe Maximize Sharpe ratio on USD/JPY 1-min data using XGBoost with returns, RSI, Bollinger Bands, multiple MAs (50/100/200), MACD, ATR, and cand…	M @malcolmtan	USD/JP		60.7%—	+0.42%—	1.22—	0.53%0.53%	84—
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-08 02:08:02 # Model : XGBoost # Feature Eng. : Auto-add features: ON # Signal / Entry : — # Optimization : — # Risk Mgmt : — # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/USDJPY_1min.parquet" START_DATE = "2026-05-04 00:00:00" END_DATE = "2026-05-07 00:00:00" VALIDATION_DATE = "" TRAIN_SPLIT = 0.6993736951983298 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # --- Returns over multiple horizons --- for n in [1, 3, 5, 10, 20]: df[f"ret_{n}"] = close.pct_change(n) # --- RSI 14 --- delta = close.diff() gain = delta.clip(lower=0) loss = -delta.clip(upper=0) avg_gain = gain.ewm(com=13, min_periods=14).mean() avg_loss = loss.ewm(com=13, min_periods=14).mean() rs = avg_gain / (avg_loss + 1e-10) df["rsi_14"] = 100.0 - (100.0 / (1.0 + rs)) # --- RSI derived features --- df["rsi_14_zscore"] = (df["rsi_14"] - df["rsi_14"].rolling(50).mean()) / (df["rsi_14"].rolling(50).std() + 1e-10) df["rsi_overbought"] = np.where(df["rsi_14"] > 70, 1, 0) df["rsi_oversold"] = np.where(df["rsi_14"] < 30, 1, 0) # --- Bollinger Bands 20, 2 --- bb_mid = close.rolling(20).mean() bb_std = close.rolling(20).std() bb_upper = bb_mid + 2.0 * bb_std bb_lower = bb_mid - 2.0 * bb_std df["bb_mid"] = bb_mid df["bb_upper"] = bb_upper df["bb_lower"] = bb_lower df["bb_width"] = (bb_upper - bb_lower) / (bb_mid + 1e-10) df["bb_pct_b"] = (close - bb_lower) / (bb_upper - bb_lower + 1e-10) df["bb_above"] = np.where(close > bb_upper, 1, 0) df["bb_below"] = np.where(close < bb_lower, 1, 0) # --- Moving Averages --- for w in [50, 100, 200]: df[f"sma_{w}"] = close.rolling(w).mean() df[f"price_vs_sma_{w}"] = (close - df[f"sma_{w}"]) / (df[f"sma_{w}"] + 1e-10) # --- MA crossover signals --- df["sma50_vs_sma100"] = np.where(df["sma_50"] > df["sma_100"], 1, -1) df["sma50_vs_sma200"] = np.where(df["sma_50"] > df["sma_200"], 1, -1) df["sma100_vs_sma200"] = np.where(df["sma_100"] > df["sma_200"], 1, -1) # --- ATR 14 --- tr = pd.concat([ high - low, (high - close.shift(1)).abs(), (low - close.shift(1)).abs() ], axis=1).max(axis=1) df["atr_14"] = tr.ewm(com=13, min_periods=14).mean() df["natr_14"] = df["atr_14"] / (close + 1e-10) # --- Momentum / rate of change --- for n in [5, 10, 20]: df[f"mom_{n}"] = close - close.shift(n) df[f"roc_{n}"] = (close - close.shift(n)) / (close.shift(n) + 1e-10) # --- Volume features (if volume exists) --- if "volume" in df.columns: vol = df["volume"].replace(0, np.nan) df["vol_sma_20"] = vol.rolling(20).mean() df["vol_ratio_20"] = vol / (df["vol_sma_20"] + 1e-10) else: df["vol_ratio_20"] = 1.0 # --- Price spread & body features --- df["hl_spread"] = (high - low) / (close + 1e-10) df["body_ratio"] = (close - open_).abs() / (high - low + 1e-10) df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / (high - low + 1e-10) df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / (high - low + 1e-10) df["bull_candle"] = np.where(close > open_, 1, 0) # --- Lagged returns for autocorrelation signal --- for lag in [1, 2, 3, 5]: df[f"ret1_lag{lag}"] = df["ret_1"].shift(lag) # --- Rolling volatility --- df["vol_10"] = df["ret_1"].rolling(10).std() df["vol_20"] = df["ret_1"].rolling(20).std() df["vol_50"] = df["ret_1"].rolling(50).std() # --- Z-score of close over 20 and 50 bars --- df["zscore_20"] = (close - close.rolling(20).mean()) / (close.rolling(20).std() + 1e-10) df["zscore_50"] = (close - close.rolling(50).mean()) / (close.rolling(50).std() + 1e-10) # --- Relative distance of price from BB bands --- df["dist_upper"] = (bb_upper - close) / (close + 1e-10) df["dist_lower"] = (close - bb_lower) / (close + 1e-10) # --- EMA 9 and 21 for short-term momentum --- df["ema_9"] = close.ewm(span=9, min_periods=9).mean() df["ema_21"] = close.ewm(span=21, min_periods=21).mean() df["ema9_vs_ema21"] = np.where(df["ema_9"] > df["ema_21"], 1, -1) df["price_vs_ema9"] = (close - df["ema_9"]) / (df["ema_9"] + 1e-10) df["price_vs_ema21"] = (close - df["ema_21"]) / (df["ema_21"] + 1e-10) # --- MACD-like signal --- ema_12 = close.ewm(span=12, min_periods=12).mean() ema_26 = close.ewm(span=26, min_periods=26).mean() macd_line = ema_12 - ema_26 signal_line = macd_line.ewm(span=9, min_periods=9).mean() df["macd"] = macd_line df["macd_signal"] = signal_line df["macd_hist"] = macd_line - signal_line df["macd_cross"] = np.where(macd_line > signal_line, 1, -1) # --- Fill NaN from warm-up periods --- df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "USD/JPY Multi-MA + RSI/BB XGBoost Sharpe", "model_type": "XGBClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.75, "colsample_bytree": 0.75, "min_child_weight": 5, "gamma": 0.1, "reg_alpha": 0.1, "reg_lambda": 1.5, "objective": "binary:logistic", "tree_method": "hist", "random_state": 42, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.0008, "take_profit": 0.0016, "cooldown": 0, "max_positions": 1, "on_opposite": "close_only", "session_filter": None, "min_atr": None, "trend_filter": None, "target_horizon": 5, "objective": ( "Maximize Sharpe ratio on USD/JPY 1-min data using XGBoost with " "returns, RSI, Bollinger Bands, multiple MAs (50/100/200), MACD, " "ATR, and candle-body features. Stop-loss and take-profit set at " "a 1:2 risk/reward to filter noise and improve Sharpe. n_estimators " "and moderate depth balance bias-variance. Regularization (alpha/lambda) " "reduces overfitting on short date range." ), "notes": ( "Target horizon of 5 bars (5 minutes) is chosen to capture short-term " "directional moves on 1-min data without excessive label noise. " "colsample_bytree and subsample add stochasticity to reduce variance. " "close_only on opposite signal avoids whipsaw from rapid reversals. " "No session filter applied since USD/JPY has liquidity around the clock." ), }
0.59	USD/CAD BB + ATR Gradient Boosting Mean-Rev Maximize risk-adjusted return (Sharpe/Calmar) on USD/CAD 15-min data. GradientBoostingClassifier chosen for strong generalisation on noisy F…	S @silver-bull-130	USDCAD	15min	62.6%61.2%	+2.56%+1.57%	1.151.07	1.75%1.75%	35685
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-06 01:50:17 # Model : Gradient Boosting # Feature Eng. : BB (20,2.0), ATR 14 + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/USDCAD_15min.parquet" START_DATE = "2025-04-24" END_DATE = "2026-04-24" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── Bollinger Bands (20, 2) ────────────────────────────────────────────── bb_period = 20 bb_std = 2.0 bb_mid = close.rolling(bb_period).mean() bb_std_s = close.rolling(bb_period).std(ddof=0) bb_upper = bb_mid + bb_std * bb_std_s bb_lower = bb_mid - bb_std * bb_std_s df["bb_mid"] = bb_mid df["bb_upper"] = bb_upper df["bb_lower"] = bb_lower df["bb_width"] = (bb_upper - bb_lower) / bb_mid df["bb_pct"] = (close - bb_lower) / (bb_upper - bb_lower) # ── ATR (14) & Normalised ATR ──────────────────────────────────────────── atr_period = 14 prev_close = close.shift(1) tr = pd.concat([ high - low, (high - prev_close).abs(), (low - prev_close).abs() ], axis=1).max(axis=1) atr = tr.ewm(span=atr_period, adjust=False).mean() natr = atr / close df["atr"] = atr df["natr"] = natr # ── RSI (14) ───────────────────────────────────────────────────────────── rsi_period = 14 delta = close.diff() gain = delta.clip(lower=0) loss = (-delta).clip(lower=0) avg_gain = gain.ewm(span=rsi_period, adjust=False).mean() avg_loss = loss.ewm(span=rsi_period, adjust=False).mean() rs = avg_gain / avg_loss.replace(0, np.nan) df["rsi"] = 100 - (100 / (1 + rs)) # ── MACD (12, 26, 9) ───────────────────────────────────────────────────── ema12 = close.ewm(span=12, adjust=False).mean() ema26 = close.ewm(span=26, adjust=False).mean() macd_line = ema12 - ema26 macd_signal = macd_line.ewm(span=9, adjust=False).mean() df["macd"] = macd_line df["macd_sig"] = macd_signal df["macd_hist"]= macd_line - macd_signal # ── SMA filters (50, 200) ──────────────────────────────────────────────── df["sma_20"] = close.rolling(20).mean() df["sma_50"] = close.rolling(50).mean() df["sma_200"] = close.rolling(200).mean() # Price relative to moving averages df["close_vs_sma20"] = (close - df["sma_20"]) / df["sma_20"] df["close_vs_sma50"] = (close - df["sma_50"]) / df["sma_50"] df["close_vs_sma200"] = (close - df["sma_200"]) / df["sma_200"] # ── Price momentum / returns ───────────────────────────────────────────── df["ret_1"] = close.pct_change(1) df["ret_4"] = close.pct_change(4) df["ret_8"] = close.pct_change(8) df["ret_16"] = close.pct_change(16) df["ret_32"] = close.pct_change(32) # ── Candle body & wick features ────────────────────────────────────────── body = (close - open_).abs() candle_range = (high - low).replace(0, np.nan) df["body_ratio"] = body / candle_range df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle_range df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle_range df["close_dir"] = np.sign(close - open_) # ── Volatility regime ──────────────────────────────────────────────────── df["vol_ratio"] = natr / natr.rolling(50).mean() # ATR vs its own average df["bb_squeeze"] = np.where(df["bb_width"] < df["bb_width"].rolling(50).quantile(0.25), 1.0, 0.0) # ── Stochastic %K / %D (14, 3) ─────────────────────────────────────────── low14 = low.rolling(14).min() high14 = high.rolling(14).max() stoch_k = 100 * (close - low14) / (high14 - low14).replace(0, np.nan) stoch_d = stoch_k.rolling(3).mean() df["stoch_k"] = stoch_k df["stoch_d"] = stoch_d # ── Rate-of-change ─────────────────────────────────────────────────────── df["roc_10"] = (close - close.shift(10)) / close.shift(10) # ── Rolling z-score of close (20-bar) ──────────────────────────────────── roll_mean = close.rolling(20).mean() roll_std = close.rolling(20).std(ddof=0).replace(0, np.nan) df["zscore_20"] = (close - roll_mean) / roll_std # ── Volume-related (if volume column exists) ───────────────────────────── if "volume" in df.columns and df["volume"].sum() > 0: vol_ma = df["volume"].rolling(20).mean().replace(0, np.nan) df["vol_ratio_20"] = df["volume"] / vol_ma # ── Fill NaNs from warm-up ─────────────────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "USD/CAD BB + ATR Gradient Boosting Mean-Rev", "model_type": "GradientBoostingClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.8, "min_samples_leaf": 20, "max_features": "sqrt", "validation_fraction": 0.1, "n_iter_no_change": 30, "tol": 1e-4, "random_state": 42, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.005, "take_profit": 0.010, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [7, 20], "min_atr": 0.0002, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximize risk-adjusted return (Sharpe/Calmar) on USD/CAD 15-min data. " "GradientBoostingClassifier chosen for strong generalisation on noisy FX " "price data; moderate depth (4) and learning rate (0.04) with early stopping " "prevent overfitting. Features: Bollinger Bands (mean-reversion signal via " "bb_pct and bb_width), ATR/NATR (volatility filter), RSI, MACD, Stochastic, " "z-score, momentum returns, and candle-body ratios. 2:1 R:R (SL 0.5%, TP 1.0%) " "with session filter (07-20 UTC) to avoid illiquid overnight hours." ), "notes": ( "session_filter [7,20] captures London + NY overlap on USD/CAD. " "min_atr 0.0002 avoids flat/choppy markets. on_opposite=reverse ensures " "the model flips direction quickly when sentiment changes. " "target_horizon=4 bars (1 hour) aligns with typical intraday FX moves." ), }
0.58	EUR/USD XGBoost Multi-Feature Sharpe Maximiser Maximise Sharpe ratio on 15-min EUR/USD. XGBoost with moderate depth (4) and heavy regularisation (reg_lambda=1.5, min_child_weight=5, gamma…	A @alpha-viper-151	EUR/US		54.4%—	+0.53%—	1.27—	0.63%0.63%	68—
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-24 01:27:47 # Model : XGBoost # Feature Eng. : deploy a 15min EURUSD model + Auto-add features: ON # Signal / Entry : — # Optimization : — # Risk Mgmt : — # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/EURUSD_15min.parquet" START_DATE = "2026-04-14" END_DATE = "2026-05-12" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── Price returns ────────────────────────────────────────────────────── df["ret_1"] = close.pct_change(1) df["ret_2"] = close.pct_change(2) df["ret_4"] = close.pct_change(4) df["ret_8"] = close.pct_change(8) df["ret_16"] = close.pct_change(16) df["ret_32"] = close.pct_change(32) # ── Candle body / wick features ──────────────────────────────────────── hl = (high - low).replace(0, np.nan) body = (close - open_).abs() df["body_ratio"] = body / hl df["upper_wick"] = (high - np.maximum(close, open_)) / hl df["lower_wick"] = (np.minimum(close, open_) - low) / hl df["candle_dir"] = np.sign(close - open_) # ── RSI (14) ─────────────────────────────────────────────────────────── def _rsi(src, n=14): delta = src.diff() gain = delta.clip(lower=0) loss = (-delta).clip(lower=0) avg_g = gain.ewm(com=n - 1, min_periods=n).mean() avg_l = loss.ewm(com=n - 1, min_periods=n).mean() rs = avg_g / avg_l.replace(0, np.nan) return 100 - (100 / (1 + rs)) df["rsi_14"] = _rsi(close, 14) df["rsi_7"] = _rsi(close, 7) df["rsi_21"] = _rsi(close, 21) # RSI normalised distance from 50 df["rsi_14_dev"] = (df["rsi_14"] - 50) / 50 # ── EMA crossovers ───────────────────────────────────────────────────── ema8 = close.ewm(span=8, adjust=False).mean() ema21 = close.ewm(span=21, adjust=False).mean() ema50 = close.ewm(span=50, adjust=False).mean() ema100 = close.ewm(span=100, adjust=False).mean() ema200 = close.ewm(span=200, adjust=False).mean() df["ema8"] = ema8 df["ema21"] = ema21 df["ema50"] = ema50 df["ema8_21_xo"] = (ema8 - ema21) / close df["ema21_50_xo"] = (ema21 - ema50) / close df["ema50_200_xo"] = (ema50 - ema200) / close # Price distance from EMAs (normalised) df["dist_ema8"] = (close - ema8) / close df["dist_ema21"] = (close - ema21) / close df["dist_ema50"] = (close - ema50) / close df["dist_ema200"] = (close - ema200) / close # ── MACD ─────────────────────────────────────────────────────────────── macd_line = close.ewm(span=12, adjust=False).mean() - close.ewm(span=26, adjust=False).mean() macd_signal = macd_line.ewm(span=9, adjust=False).mean() df["macd_line"] = macd_line / close df["macd_signal"] = macd_signal / close df["macd_hist"] = (macd_line - macd_signal) / close df["macd_hist_chg"] = df["macd_hist"].diff() # ── Bollinger Bands (20, 2) ──────────────────────────────────────────── bb_mid = close.rolling(20).mean() bb_std = close.rolling(20).std(ddof=0) bb_upper = bb_mid + 2 * bb_std bb_lower = bb_mid - 2 * bb_std bb_width = (bb_upper - bb_lower) / bb_mid.replace(0, np.nan) df["bb_pct"] = (close - bb_lower) / (bb_upper - bb_lower).replace(0, np.nan) df["bb_width"] = bb_width df["bb_width_chg"] = bb_width.diff() # ── ATR (14) ─────────────────────────────────────────────────────────── tr = pd.concat([ high - low, (high - close.shift(1)).abs(), (low - close.shift(1)).abs() ], axis=1).max(axis=1) atr14 = tr.ewm(com=13, min_periods=14).mean() natr = atr14 / close df["atr14"] = atr14 df["natr14"] = natr df["natr14_chg"] = natr.diff() # ── Momentum / Rate of Change ────────────────────────────────────────── df["mom_4"] = close - close.shift(4) df["mom_8"] = close - close.shift(8) df["roc_10"] = (close / close.shift(10).replace(0, np.nan)) - 1 df["roc_20"] = (close / close.shift(20).replace(0, np.nan)) - 1 # ── Stochastic Oscillator (14,3) ─────────────────────────────────────── low14 = low.rolling(14).min() high14 = high.rolling(14).max() stoch_k = (close - low14) / (high14 - low14).replace(0, np.nan) * 100 stoch_d = stoch_k.rolling(3).mean() df["stoch_k"] = stoch_k df["stoch_d"] = stoch_d df["stoch_kd"] = stoch_k - stoch_d # ── Volume (if available, else dummy) ────────────────────────────────── if "volume" in df.columns: vol = df["volume"].replace(0, np.nan) vol_ma20 = vol.rolling(20).mean() df["vol_ratio"] = vol / vol_ma20 df["vol_chg"] = vol.pct_change() else: df["vol_ratio"] = 1.0 df["vol_chg"] = 0.0 # ── Rolling volatility ───────────────────────────────────────────────── df["realvol_8"] = df["ret_1"].rolling(8).std() df["realvol_20"] = df["ret_1"].rolling(20).std() df["vol_ratio_short_long"] = df["realvol_8"] / df["realvol_20"].replace(0, np.nan) # ── Highs/Lows distance (support/resistance proxy) ───────────────────── df["high_20_dist"] = (high.rolling(20).max() - close) / close df["low_20_dist"] = (close - low.rolling(20).min()) / close df["high_5_dist"] = (high.rolling(5).max() - close) / close df["low_5_dist"] = (close - low.rolling(5).min()) / close # ── Lagged features ──────────────────────────────────────────────────── for col in ["rsi_14_dev", "macd_hist", "bb_pct", "natr14", "stoch_kd"]: df[f"{col}_lag1"] = df[col].shift(1) df[f"{col}_lag2"] = df[col].shift(2) df[f"{col}_lag4"] = df[col].shift(4) # ── Session dummies (hour-of-day in UTC) ─────────────────────────────── hour = close.index.hour df["session_london"] = np.where((hour >= 7) & (hour < 16), 1, 0) df["session_ny"] = np.where((hour >= 13) & (hour < 21), 1, 0) df["session_overlap"] = np.where((hour >= 13) & (hour < 16), 1, 0) df["session_asia"] = np.where((hour >= 0) & (hour < 7), 1, 0) # Day-of-week dow = close.index.dayofweek df["dow_mon"] = np.where(dow == 0, 1, 0) df["dow_fri"] = np.where(dow == 4, 1, 0) # ── Interaction features ─────────────────────────────────────────────── df["rsi_macd"] = df["rsi_14_dev"] * df["macd_hist"] df["rsi_bbpct"] = df["rsi_14_dev"] * df["bb_pct"] df["macd_vol"] = df["macd_hist"] * df["vol_ratio"] df["natr_bbwid"] = df["natr14"] * df["bb_width"] # ── SMA50 for trend filter ───────────────────────────────────────────── df["sma_50"] = close.rolling(50).mean() # ── Fill NaN from indicator warm-up ─────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "EUR/USD XGBoost Multi-Feature Sharpe Maximiser", "model_type": "XGBClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.03, "subsample": 0.75, "colsample_bytree": 0.70, "colsample_bylevel": 0.80, "min_child_weight": 5, "gamma": 0.1, "reg_alpha": 0.05, "reg_lambda": 1.5, "scale_pos_weight": 1.0, "objective": "binary:logistic", "tree_method": "hist", "random_state": 42, "n_jobs": -1, }, "signal_threshold": 0.55, "direction": "both", "max_positions": 1, "on_opposite": "reverse", "cooldown": 0, "stop_loss": 0.0008, "take_profit": 0.0016, "session_filter": [7, 21], "min_atr": 0.00015, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximise Sharpe ratio on 15-min EUR/USD. " "XGBoost with moderate depth (4) and heavy regularisation " "(reg_lambda=1.5, min_child_weight=5, gamma=0.1) controls overfitting " "on the short date window. Low learning_rate (0.03) with 400 trees " "for stable convergence. SL=0.8 pip / TP=1.6 pip gives 1:2 RR to " "preserve Sharpe. Session filter [7,21] removes illiquid Asia opens. " "min_atr filters flat, low-volatility bars that degrade signal quality." ), "notes": ( "Feature set covers trend (EMA crosses, MACD), mean-reversion (RSI, BB%B), " "volatility (ATR, realvol), momentum (ROC, Stochastic), microstructure " "(candle body/wick ratios), session dummies, and lagged versions of key " "signals to give the model temporal context without lookahead. " "Interaction terms (rsimacd, rsibb_pct) capture combined regime signals. " "target_horizon=4 bars (1 hour) balances enough price movement to overcome " "2e-5 round-trip cost while avoiding excessive label noise." ), }
0.00	RSI mean-reversion	M @malcolmtan	RSI me		65.2%—	+0.01%—	1.01—	0.82%0.82%	23—
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-25 02:26:36 # Model : XGBoost # Feature Eng. : buy when RSI(14) crosses up from below 30, sell when it crosses down from above 70 + Auto-add features: ON # Signal / Entry : — # Optimization : — # Risk Mgmt : — # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd # ── Inlined strategy_utils ── """ strategy_utils.py — Standard utility functions for generated strategies. Claude imports these instead of writing boilerplate from scratch. This ensures consistent behavior across all generated strategies. """ import numpy as np import pandas as pd from sklearn.preprocessing import LabelEncoder # Max backtest window per timeframe. A finer timeframe over a longer window # blows up the results dict / parquet load / Modal train time (the 2026-05-12 # OOM was a 1-min × multi-year sweep) — and a 1-min strategy gains nothing from # 2 years of 1-min bars. Enforced HERE because every training path (UI / API / # Modal) funnels through run_strategy → load_ohlc. Env-overridable so a future # "max plan" / dedicated-server tier can lift it. _TF_MAX_DAYS = { "1min": 30, "5min": 90, "15min": 365, "1h": 730, } def _fetch_ohlc_from_internal(symbol: str, tf: str, start: str, end: str): """Phase 3.2: fetch parquet bytes from Server A's /internal/ohlc endpoint instead of reading a local file. Used inside Modal containers / Mac worker pool (Phase 3.4) so every train sees the same source of truth as the chart. Returns: pd.DataFrame (parquet decoded), or raises on any failure so the caller can fall back / surface a clear error in the job. """ import hashlib as _hashlib, hmac as _hmac, io as _io, os as _os import urllib.request as _ur, urllib.parse as _urp base = (_os.environ.get("QM_INTERNAL_OHLC_BASE") or "").rstrip("/") secret = (_os.environ.get("INTERNAL_WS_SECRET") or "").strip() if not base: raise RuntimeError("QM_INTERNAL_OHLC_BASE not set") if not secret: raise RuntimeError("INTERNAL_WS_SECRET not set") msg = f"{symbol}\|{tf}\|{start}\|{end}".encode("utf-8") sig = _hmac.new(secret.encode("utf-8"), msg, _hashlib.sha256).hexdigest() qs = _urp.urlencode({ "symbol": symbol, "tf": tf, "start": start, "end": end, "sig": sig, }) url = f"{base}/internal/ohlc?{qs}" req = _ur.Request(url, headers={"User-Agent": "qm-worker/1.0"}) with _ur.urlopen(req, timeout=30) as resp: if resp.status != 200: raise RuntimeError(f"/internal/ohlc returned {resp.status}") payload = resp.read() print(f"[load_ohlc:internal] {symbol} {tf} fetched {len(payload)} bytes", flush=True) return pd.read_parquet(_io.BytesIO(payload)) def _parse_symbol_tf_from_path(data_path: str): """Pull SYMBOL + TF out of a path like .../EURUSD_1min.parquet.""" import os as _os, re as _re base = _os.path.basename(str(data_path)) m = _re.match(r"^([A-Z]{6})_(\d+min\|\d+h)\.parquet$", base) if not m: return None, None return m.group(1), m.group(2) def load_ohlc(data_path, start_date="", end_date=""): """Load OHLC parquet, sort index, filter dates. Always returns consistent format. The lower bound is clamped per timeframe (see _TF_MAX_DAYS) — a request for more history than the cap silently starts later. Phase 3.2: when env QM_USE_INTERNAL_OHLC=="1", fetch over HTTP from Server A's /internal/ohlc endpoint instead of pd.read_parquet on a local file (which on Modal is a stale Volume snapshot). The endpoint applies the same day-cap, so the local cap-check below is a defensive no-op in that path. Flag defaults to "0" → unchanged behavior. Returns: (df, close, open_, high, low) """ import os as _os, re as _re _use_internal = _os.environ.get("QM_USE_INTERNAL_OHLC", "0") == "1" if _use_internal: _sym, _tf = _parse_symbol_tf_from_path(data_path) if not _sym or not _tf: raise RuntimeError( f"QM_USE_INTERNAL_OHLC=1 but DATA_PATH basename does not match " f"SYMBOL_TF.parquet: {data_path}" ) df = _fetch_ohlc_from_internal(_sym, _tf, start_date or "", end_date or "") else: df = pd.read_parquet(data_path) df.index = pd.to_datetime(df.index) df = df.sort_index() # Per-timeframe window cap (timeframe inferred from the parquet filename). _m = _re.search(r"_(\d+min\|\d+h)\.parquet$", _os.path.basename(str(data_path))) _tf = _m.group(1) if _m else None _max_days = _TF_MAX_DAYS.get(_tf) if _max_days and _max_days > 0 and len(df): _env_override = _os.environ.get(f"QM_MAX_DAYS_{_tf.upper()}") if _env_override and _env_override.isdigit(): _max_days = int(_env_override) try: _eff_end = pd.Timestamp(end_date) if end_date else df.index.max() _eff_end = min(_eff_end, df.index.max()) _floor = _eff_end - pd.Timedelta(days=_max_days) _req_start = pd.Timestamp(start_date) if start_date else df.index.min() if _req_start < _floor: print(f"[load_ohlc] {_tf} backtest window capped to {_max_days}d: " f"start {_req_start.date()} -> {_floor.date()}", flush=True) start_date = _floor except Exception as _e: print(f"[load_ohlc] window-cap check skipped ({_e})", flush=True) if start_date: df = df[df.index >= start_date] if end_date: df = df[df.index <= end_date] return df, df["close"], df["open"], df["high"], df["low"] def make_target(close, horizon=4): """Create target: direction N bars ahead. Default 4 bars = 1 hour on 15-min data. Returns: target (pd.Series of -1, 0, 1) """ return np.sign(close.shift(-horizon) - close) def split_data(df, target, feature_cols, train_split=0.7, validation_date=""): """Train/test split. Handles both ratio and date-based splits. Drops NaN from target before splitting. Encodes labels to [0,1,2]. Returns: dict with keys: X_train, X_test, y_train, y_test, y_train_enc, y_test_enc, enc, close_train, close_test, split_idx, split_dt, n_train, n_test """ # Drop NaN from target mask = target.notna() df = df[mask].copy() target = target[mask] close = df["close"] # Build feature matrix X = df[feature_cols].copy() X = X.bfill().ffill() X = X.replace([np.inf, -np.inf], np.nan).fillna(0.0) # Split if validation_date: split_idx = len(df[df.index <= validation_date]) else: split_idx = int(len(df) * train_split) split_idx = max(1, min(split_idx, len(df) - 1)) X_train = X.iloc[:split_idx] X_test = X.iloc[split_idx:] y_train = target.iloc[:split_idx] y_test = target.iloc[split_idx:] close_train = close.iloc[:split_idx] close_test = close.iloc[split_idx:] split_dt = str(df.index[split_idx]) # Label encoding — always fit on [-1, 0, 1] enc = LabelEncoder() enc.fit([-1, 0, 1]) y_train_enc = enc.transform(y_train) y_test_enc = enc.transform(y_test) return { "df": df, "X_train": X_train, "X_test": X_test, "y_train": y_train, "y_test": y_test, "y_train_enc": y_train_enc, "y_test_enc": y_test_enc, "enc": enc, "close": close, "close_train": close_train, "close_test": close_test, "split_idx": split_idx, "split_dt": split_dt, "n_train": len(X_train), "n_test": len(X_test), } def compute_overlays(close, df_index): """Compute BB and MA overlays on full dataset. Always consistent. Returns: (bb_dict, ma_dict) """ bb_mid = close.rolling(20).mean() bb_std = close.rolling(20).std() bb_upper = bb_mid + 2 * bb_std bb_lower = bb_mid - 2 * bb_std ma50 = close.rolling(50).mean() ma100 = close.rolling(100).mean() ma200 = close.rolling(200).mean() def _safe(s): s = s.reindex(df_index).bfill().ffill() return [float(x) if (x is not None and not np.isnan(x) and not np.isinf(x)) else None for x in s.values] bb = {"upper": _safe(bb_upper), "mid": _safe(bb_mid), "lower": _safe(bb_lower)} ma = {"ma50": _safe(ma50), "ma100": _safe(ma100), "ma200": _safe(ma200)} return bb, ma def run_backtest(signal, close, capital=10000, cost=2e-5): """Run backtest with transaction costs. Uses price-based trade returns (same as webapp _compute_trades). Signal 0 = hold (keep current position), not close. Returns: dict with equity, trade_returns, long_returns, short_returns, bar_returns """ sig_arr = signal.values price_arr = close.values idx = signal.index n = len(price_arr) # Trade returns — price-based (matches webapp _compute_trades exactly) trade_returns = [] long_returns = [] short_returns = [] trade_log = [] last_dir = None entry_price = None entry_bar = None for i in range(n): s = sig_arr[i] c = price_arr[i] if s != 0.0 and s != last_dir: # Direction change — close previous trade, open new if last_dir is not None and entry_price is not None and entry_price != 0: ret = float(last_dir * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if last_dir == 1: long_returns.append(ret) else: short_returns.append(ret) trade_log.append({ "type": "Buy" if last_dir == 1 else "Sell", "entry_time": str(idx[entry_bar]), "exit_time": str(idx[i]), "entry_price": round(entry_price, 5), "exit_price": round(c, 5), "pnl": round(last_dir * (c - entry_price), 5), "pnl_pct": round(ret * 100, 3), "exit_reason": "signal", }) entry_price = c entry_bar = i last_dir = s # Close last open trade if last_dir is not None and entry_price is not None and n > 0 and entry_price != 0: c = price_arr[-1] ret = float(last_dir * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if last_dir == 1: long_returns.append(ret) else: short_returns.append(ret) trade_log.append({ "type": "Buy" if last_dir == 1 else "Sell", "entry_time": str(idx[entry_bar]), "exit_time": str(idx[-1]), "entry_price": round(entry_price, 5), "exit_price": round(c, 5), "pnl": round(last_dir * (c - entry_price), 5), "pnl_pct": round(ret * 100, 3), "exit_reason": "end", }) # Equity curve from trade returns cumret = 1.0 equity_vals = np.full(n, float(capital)) trade_idx = 0 in_trade = False t_entry_price = None t_dir = None for i in range(n): s = sig_arr[i] c = price_arr[i] if s != 0.0 and s != t_dir: if t_dir is not None and t_entry_price is not None and t_entry_price != 0: t_ret = t_dir * (c - t_entry_price) / t_entry_price - cost cumret = (1 + t_ret) t_entry_price = c t_dir = s equity_vals[i] = capital cumret # Bar returns for Sharpe bar_returns = np.zeros(n) for i in range(1, n): if price_arr[i - 1] != 0 and last_dir is not None: bar_returns[i] = sig_arr[i - 1] * (price_arr[i] - price_arr[i - 1]) / price_arr[i - 1] if sig_arr[i - 1] != 0 else 0.0 return { "equity": pd.Series(equity_vals, index=close.index), "trade_returns": trade_returns, "long_returns": long_returns, "short_returns": short_returns, "bar_returns": bar_returns, "trade_log": trade_log, } def compute_trade_stats(trades, capital=10000): """Single source of truth for trade statistics. Every display path reads from this — no recomputation anywhere. All values are rounded and JSON-safe (no inf/nan). """ if not trades: return {"n": 0, "wins": 0, "losses": 0, "wr": 0, "avg": 0, "best": 0, "worst": 0, "ret": 0, "np": 0, "mdd": 0, "pf": 0, "rr": 0, "expect": 0} w = [r for r in trades if r > 0] l = [r for r in trades if r < 0] cumret = 1.0 for r in trades: cumret = (1 + r) net_p = capital (cumret - 1) # Max drawdown eq = np.cumprod([1.0] + [1 + r for r in trades]) peak = np.maximum.accumulate(eq) mdd = float(((eq - peak) / peak).min()) if len(eq) > 1 else 0.0 # Profit Factor gross_w = sum(w) if w else 0 gross_l = abs(sum(l)) if l else 0 pf = gross_w / gross_l if gross_l > 0 else (9999.0 if gross_w > 0 else 0) # Risk:Reward avg_w = float(np.mean(w)) if w else 0 avg_l = abs(float(np.mean(l))) if l else 0 rr = avg_w / avg_l if avg_l > 0 else (9999.0 if avg_w > 0 else 0) # Expectancy expect = net_p / len(trades) return { "n": len(trades), "wins": len(w), "losses": len(l), "wr": round(len(w) / len(trades), 4), "avg": round(float(np.mean(trades)), 6), "best": round(max(w), 6) if w else 0, "worst": round(min(l), 6) if l else 0, "ret": round(cumret - 1, 6), "np": round(net_p, 2), "mdd": round(mdd, 6), "pf": round(pf, 2), "rr": round(rr, 2), "expect": round(expect, 2), } def compute_metrics(bt_result, close_test, capital=10000): """Compute all standard metrics from backtest result. Uses trade-level compounding (same as webapp _trade_stats) for accuracy. Returns: dict with total_ret, bh_ret, sharpe_strat, sharpe_bh, mdd, n_trades """ equity = bt_result["equity"] trade_returns = bt_result["trade_returns"] # Total return — trade-level compounding (matches webapp) if trade_returns: cumret = 1.0 for r in trade_returns: cumret = (1 + r) total_ret = cumret - 1 else: total_ret = 0.0 # Buy and hold bh_equity = capital (close_test / close_test.iloc[0]) bh_ret = (bh_equity.iloc[-1] - capital) / capital if capital != 0 else 0.0 # Sharpe ratio — trade-level (matches webapp: sqrt(25226) annualization) if len(trade_returns) >= 2 and float(np.std(trade_returns)) > 0: sharpe_strat = float(np.mean(trade_returns) / np.std(trade_returns) np.sqrt(252 * 26)) else: sharpe_strat = 0.0 bh_rets = bh_equity.pct_change().dropna() if len(bh_rets) > 1 and bh_rets.std() != 0: sharpe_bh = float((bh_rets.mean() / bh_rets.std()) * np.sqrt(252 * 24 * 4)) else: sharpe_bh = 0.0 # Max drawdown — trade-level (matches webapp) if trade_returns: eq = np.cumprod([1.0] + [1 + r for r in trade_returns]) peak = np.maximum.accumulate(eq) mdd = float(((eq - peak) / peak).min()) if len(eq) > 1 else 0.0 else: mdd = 0.0 return { "total_ret": float(total_ret), "bh_ret": float(bh_ret), "sharpe_strat": float(sharpe_strat) if not np.isnan(sharpe_strat) else 0.0, "sharpe_bh": float(sharpe_bh) if not np.isnan(sharpe_bh) else 0.0, "mdd": float(mdd), "n_trades": len(trade_returns), } # Diagnostics line/histogram series (equity / drawdown / rolling_acc / conf_hist) # only feed the small Diagnostics charts — they're never used by the price chart # or scroll-back. On a 1-min model trained over the (2.2-capped) window these are # still ~30k points each; downsample to a visually-identical resolution before the # dict leaves the trainer so it doesn't carry that into Server-A RAM / Postgres. _RESULTS_SERIES_MAX = 5000 def _downsample_idx(n, cap=_RESULTS_SERIES_MAX): """Evenly-spaced index list spanning [0, n-1] (first+last always kept), or None when no downsampling is needed (n <= cap).""" if n <= cap: return None return np.unique(np.linspace(0, n - 1, cap).astype(int)).tolist() def _take(arr, idx): """Subset a list by an index list (idx may be None → return arr unchanged).""" if idx is None or not isinstance(arr, list): return arr return [arr[i] for i in idx] # trade_log / train_trade_log are lists of per-trade dicts (display-only — the # Trade Log tab). They scale with TRADE count, not bar count, so the bar-window # cap (Phase 2.2) doesn't bound them — a degenerate near-every-bar model can put # 10k+ trade dicts in the blob (>3 MB). Cap each (independently — a small-N model # keeps every trade) to the most-recent N, recording `_total` + `_truncated` # so the true count is still reported. Real strategies have far fewer than # _TRADE_LOG_MAX trades, so this only ever bites pathological models. _TRADE_LOG_MAX = 5000 def _cap_trade_log(tl): """Return (capped_list, original_len, was_truncated).""" if not isinstance(tl, list) or len(tl) <= _TRADE_LOG_MAX: return tl, (len(tl) if isinstance(tl, list) else 0), False return tl[-_TRADE_LOG_MAX:], len(tl), True def build_return_dict(split_result, bt_result, metrics, model, feature_cols, signal_full, p_pos_test, p_neg_test, custom_figs=None, bt_train_result=None, pre_stats=None): """Assemble the complete return dict. Handles ALL serialization. Never returns Timestamps, numpy arrays, or non-JSON types. Returns: JSON-safe dict with all required keys """ df = split_result["df"] close = split_result["close"] close_test = split_result["close_test"] X_test = split_result["X_test"] y_test = split_result["y_test"] equity = bt_result["equity"] bar_returns = bt_result["bar_returns"] # OHLC ohlc_dates = [str(x) for x in df.index.tolist()] def _safe_list(arr): return [float(x) if (x is not None and not np.isnan(x) and not np.isinf(x)) else None for x in arr] # Overlays bb, ma = compute_overlays(close, df.index) # Buy and hold equity capital = equity.iloc[0] if len(equity) > 0 else 10000 bh_equity = capital * (close_test / close_test.iloc[0]) # Confusion matrix from sklearn.metrics import confusion_matrix pred_test = model.predict(X_test) y_test_arr = np.asarray(y_test) cm = confusion_matrix(y_test_arr, pred_test, labels=[-1, 0, 1]) # Rolling accuracy sig_arr = signal_full.reindex(close_test.index).values correct = pd.Series((pred_test == y_test_arr).astype(float), index=X_test.index) active_test = pd.Series(sig_arr != 0, index=close_test.index) if len(sig_arr) == len(close_test) else pd.Series(True, index=close_test.index) correct_active = correct.where(active_test, other=np.nan) rolling_acc = correct_active.rolling(30, min_periods=1).mean() # Feature importance importances = model.feature_importances_ fi_pairs = sorted(zip(feature_cols, importances), key=lambda x: x[1])[-15:] # Drawdown rolling_max = equity.cummax() drawdown = (equity - rolling_max) / rolling_max.replace(0, np.nan) drawdown = drawdown.fillna(0.0) # ── Downsample the Diagnostics-only series (see _downsample_idx) ────────── _eq_dates = [str(x) for x in close_test.index.tolist()] _eq_strat = _safe_list(equity.values) _eq_bh = _safe_list(bh_equity.values) _eq_idx = _downsample_idx(len(_eq_dates)) _eq_dates, _eq_strat, _eq_bh = _take(_eq_dates, _eq_idx), _take(_eq_strat, _eq_idx), _take(_eq_bh, _eq_idx) _ra_dates = [str(x) for x in rolling_acc.index.tolist()] _ra_vals = [float(x) if (not np.isnan(x) and not np.isinf(x)) else None for x in rolling_acc.values] _ra_idx = _downsample_idx(len(_ra_dates)) _ra_dates, _ra_vals = _take(_ra_dates, _ra_idx), _take(_ra_vals, _ra_idx) _dd_dates = [str(x) for x in drawdown.index.tolist()] _dd_vals = _safe_list(drawdown.values) _dd_idx = _downsample_idx(len(_dd_dates)) _dd_dates, _dd_vals = _take(_dd_dates, _dd_idx), _take(_dd_vals, _dd_idx) _cp_pos = [float(x) for x in (p_pos_test.tolist() if hasattr(p_pos_test, 'tolist') else list(p_pos_test))] _cp_neg = [float(x) for x in (p_neg_test.tolist() if hasattr(p_neg_test, 'tolist') else list(p_neg_test))] _cp_pos = _take(_cp_pos, _downsample_idx(len(_cp_pos))) _cp_neg = _take(_cp_neg, _downsample_idx(len(_cp_neg))) # ── Trade logs — display-only (Trade Log tab); cap to most-recent N with a # `_total` field so the true count is still reported (see _cap_trade_log). # NB: ret_dist arrays are left FULL — a downstream path in callbacks.py # recomputes n_trades/win-rate from len(ret_dist), so a sample would skew # the displayed counts; they're small anyway and gzip handles them. _tl_test, _tl_test_n, _tl_test_tr = _cap_trade_log(bt_result.get("trade_log", [])) _tl_tr, _tl_tr_n, _tl_tr_tr = _cap_trade_log(bt_train_result.get("trade_log", []) if bt_train_result else []) return { "ohlc": { "dates": ohlc_dates, "open": _safe_list(df["open"].values), "high": _safe_list(df["high"].values), "low": _safe_list(df["low"].values), "close": _safe_list(df["close"].values), }, "signals": { "dates": [str(x) for x in signal_full.index.tolist()], "values": [float(x) for x in signal_full.values], }, "bb": bb, "ma": ma, "equity": { "dates": _eq_dates, "strategy": _eq_strat, "bh": _eq_bh, }, "feature_importance": { "names": [p[0] for p in fi_pairs], "values": [float(p[1]) for p in fi_pairs], }, "conf_matrix": cm.tolist(), "conf_hist": { "p_pos": _cp_pos, "p_neg": _cp_neg, }, "rolling_acc": { "dates": _ra_dates, "values": _ra_vals, }, "drawdown": { "dates": _dd_dates, "values": _dd_vals, }, "ret_dist": [float(x) for x in bt_result["trade_returns"]], "ret_dist_long": [float(x) for x in bt_result["long_returns"]], "ret_dist_short": [float(x) for x in bt_result["short_returns"]], "train_ret_dist": [float(x) for x in bt_train_result["trade_returns"]] if bt_train_result else [], "train_ret_dist_long": [float(x) for x in bt_train_result["long_returns"]] if bt_train_result else [], "train_ret_dist_short": [float(x) for x in bt_train_result["short_returns"]] if bt_train_result else [], "trade_log": _tl_test, "train_trade_log": _tl_tr, "trade_log_total": _tl_test_n, "train_trade_log_total": _tl_tr_n, "trade_log_truncated": _tl_test_tr, "train_trade_log_truncated": _tl_tr_tr, (pre_stats or {}), "metrics": metrics, "split_dt": split_result["split_dt"], "split_idx": int(split_result["split_idx"]), "n_train": int(split_result["n_train"]), "n_test": int(split_result["n_test"]), "feature_cols": list(feature_cols), "custom_figs": custom_figs or [], } # ════════════════════════════════════════════════════════════════════════════ # STRATEGY FRAMEWORK v2 — Config-driven architecture # Claude writes feature_engineering() + strategy_config(). Framework does rest. # ════════════════════════════════════════════════════════════════════════════ import importlib _MODEL_REGISTRY = { "XGBClassifier": ("xgboost", "XGBClassifier"), "RandomForestClassifier": ("sklearn.ensemble", "RandomForestClassifier"), "GradientBoostingClassifier": ("sklearn.ensemble", "GradientBoostingClassifier"), "LogisticRegression": ("sklearn.linear_model", "LogisticRegression"), "ExtraTreesClassifier": ("sklearn.ensemble", "ExtraTreesClassifier"), "AdaBoostClassifier": ("sklearn.ensemble", "AdaBoostClassifier"), } def _build_model_from_config(config, X_train, y_train_enc): """Build, fit, and wrap a model from strategy_config dict.""" model_type = config.get("model_type", "RandomForestClassifier") model_params = dict(config.get("model_params", {})) if model_type not in _MODEL_REGISTRY: raise ValueError(f"Unknown model_type '{model_type}'. Valid: {list(_MODEL_REGISTRY.keys())}") module_path, class_name = _MODEL_REGISTRY[model_type] mod = importlib.import_module(module_path) cls = getattr(mod, class_name) # XGBoost defaults if class_name == "XGBClassifier": model_params.setdefault("use_label_encoder", False) model_params.setdefault("eval_metric", "mlogloss") model_params.setdefault("tree_method", "hist") # Determinism > speed (2026-05-25). XGBoost hist with n_jobs=-1 is # NON-reproducible even with random_state set — the parallel histogram # gradient-sum order varies across threads, so the SAME code + data # gives a slightly different model (and backtest) every run. Forcing # single-thread makes training bit-reproducible so: (a) a user who # copies a strategy and reruns it gets identical numbers, (b) the # community "Live" score matches a redeploy, (c) "same code, different # result" support reports go away. Cost: single-threaded XGB (a few # seconds slower on large windows; hist is fast so it's minor). FORCED # (not setdefault) so the guarantee can't be silently broken by a # strategy passing n_jobs. Exact reproducibility holds within the # platform (pinned versions / same Modal image); a user's own machine # with different xgboost/numpy/CPU can still differ in low-order bits. model_params["n_jobs"] = 1 # Common defaults model_params.setdefault("random_state", 42) from model_wrapper import ModelWrapper clf = cls(model_params) clf.fit(X_train, y_train_enc) enc = LabelEncoder() enc.fit([-1, 0, 1]) return ModelWrapper(clf, original_classes=enc.classes_, n_features=X_train.shape[1]) def _generate_signals(model, X, threshold): """Framework-owned signal generation. Deterministic threshold logic.""" proba = model.predict_proba(X) classes = list(model.classes_) idx_pos = classes.index(1) if 1 in classes else None idx_neg = classes.index(-1) if -1 in classes else None p_pos = proba[:, idx_pos] if idx_pos is not None else np.zeros(len(X)) p_neg = proba[:, idx_neg] if idx_neg is not None else np.zeros(len(X)) signal_vals = np.zeros(len(X)) signal_vals = np.where(p_pos >= threshold, 1.0, signal_vals) signal_vals = np.where(p_neg >= threshold, -1.0, signal_vals) # Both exceed: pick stronger both = (p_pos >= threshold) & (p_neg >= threshold) signal_vals[both] = np.where(p_pos[both] >= p_neg[both], 1.0, -1.0) return pd.Series(signal_vals, index=X.index), p_pos, p_neg # ── Filter functions (all no-ops when config value is None) ────────────── def _apply_direction_filter(signal, direction): """Zero out signals that don't match allowed direction.""" if direction is None or direction == "both": return signal s = signal.copy() if direction == "long": s[s < 0] = 0.0 elif direction == "short": s[s > 0] = 0.0 return s def _apply_session_filter(signal, index, session_hours): """Zero out signals outside session hours [start, end] UTC.""" if session_hours is None: return signal s = signal.copy() start_h, end_h = session_hours[0], session_hours[1] hours = index.hour if start_h <= end_h: mask = (hours >= start_h) & (hours < end_h) else: # wrap around midnight, e.g. [22, 6] mask = (hours >= start_h) \| (hours < end_h) s[~mask] = 0.0 return s def _apply_atr_filter(signal, close, high, low, min_atr): """Zero out signals when NATR(14) is below threshold.""" if min_atr is None: return signal hl = high - low hc = (high - close.shift(1)).abs() lc = (low - close.shift(1)).abs() tr = pd.concat([hl, hc, lc], axis=1).max(axis=1) atr14 = tr.ewm(com=13, adjust=False).mean() natr = atr14 / close.replace(0, np.nan) s = signal.copy() s[natr < min_atr] = 0.0 return s def _apply_trend_filter(signal, close, trend_filter): """Only allow signals aligned with trend. e.g. 'sma_50': longs above SMA, shorts below.""" if trend_filter is None: return signal # Parse: "sma_50" → SMA with period 50 parts = trend_filter.lower().replace("-", "_").split("_") if len(parts) >= 2 and parts[0] in ("sma", "ema"): period = int(parts[1]) else: return signal # unknown filter, skip if parts[0] == "sma": trend_line = close.rolling(period).mean() else: trend_line = close.ewm(span=period, adjust=False).mean() s = signal.copy() # Longs only above trend, shorts only below s[(s > 0) & (close < trend_line)] = 0.0 s[(s < 0) & (close > trend_line)] = 0.0 return s # ── run_backtest_v2: framework-owned SL/TP/cooldown/position management ── def run_backtest_v2(signal, close, high, low, config, capital=10000, cost=2e-5): """Backtest with SL/TP/cooldown/direction handling built into the engine. Unlike run_backtest (v1), this function handles position exits internally. Returns: same dict shape as run_backtest() """ stop_loss = config.get("stop_loss") take_profit = config.get("take_profit") cooldown = config.get("cooldown", 0) on_opposite = config.get("on_opposite", "reverse") sig_arr = signal.values close_arr = close.values high_arr = high.values low_arr = low.values idx = signal.index n = len(close_arr) trade_returns = [] long_returns = [] short_returns = [] trade_log = [] equity_vals = np.full(n, float(capital)) cumret = 1.0 position = 0.0 # current direction: 1.0, -1.0, or 0.0 (flat) entry_price = None entry_bar = None # index into arrays for entry time cooldown_remaining = 0 def _log_trade(exit_bar, exit_px, ret, reason): trade_log.append({ "type": "Buy" if position == 1.0 else "Sell", "entry_time": str(idx[entry_bar]), "exit_time": str(idx[exit_bar]), "entry_price": round(entry_price, 5), "exit_price": round(exit_px, 5), "pnl": round(position * (exit_px - entry_price), 5), "pnl_pct": round(ret * 100, 3), "exit_reason": reason, }) for i in range(n): c = close_arr[i] h = high_arr[i] lo = low_arr[i] s = sig_arr[i] # 1. Check SL/TP if in trade if position != 0.0 and entry_price is not None: hit_sl = False hit_tp = False exit_price = None if position == 1.0: # long if stop_loss is not None and lo <= entry_price * (1 - stop_loss): hit_sl = True exit_price = entry_price * (1 - stop_loss) elif take_profit is not None and h >= entry_price * (1 + take_profit): hit_tp = True exit_price = entry_price * (1 + take_profit) else: # short if stop_loss is not None and h >= entry_price * (1 + stop_loss): hit_sl = True exit_price = entry_price * (1 + stop_loss) elif take_profit is not None and lo <= entry_price * (1 - take_profit): hit_tp = True exit_price = entry_price * (1 - take_profit) if hit_sl or hit_tp: ret = float(position * (exit_price - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(i, exit_price, ret, "SL" if hit_sl else "TP") cumret = (1 + ret) position = 0.0 entry_price = None entry_bar = None cooldown_remaining = cooldown equity_vals[i] = capital cumret continue # 2. Cooldown if cooldown_remaining > 0: cooldown_remaining -= 1 equity_vals[i] = capital * cumret continue # 3. Signal processing if s != 0.0: if position == 0.0: # Open new trade position = s entry_price = c entry_bar = i elif s != position: # Opposite signal if on_opposite == "reverse": # Close current + open opposite ret = float(position * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(i, c, ret, "signal") cumret = (1 + ret) position = s entry_price = c entry_bar = i else: # close_only # Close current, go flat ret = float(position (c - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(i, c, ret, "close_only") cumret = (1 + ret) position = 0.0 entry_price = None entry_bar = None cooldown_remaining = cooldown equity_vals[i] = capital cumret # Close last open trade at final close if position != 0.0 and entry_price is not None and n > 0 and entry_price != 0: c = close_arr[-1] ret = float(position * (c - entry_price) / entry_price - cost) trade_returns.append(ret) if position == 1.0: long_returns.append(ret) else: short_returns.append(ret) _log_trade(n - 1, c, ret, "end") cumret = (1 + ret) equity_vals[-1] = capital cumret # Bar returns for Sharpe (approximate) bar_returns = np.zeros(n) for i in range(1, n): if close_arr[i - 1] != 0 and sig_arr[i - 1] != 0: bar_returns[i] = sig_arr[i - 1] * (close_arr[i] - close_arr[i - 1]) / close_arr[i - 1] return { "equity": pd.Series(equity_vals, index=close.index), "trade_returns": trade_returns, "long_returns": long_returns, "short_returns": short_returns, "bar_returns": bar_returns, "trade_log": trade_log, } # ── run_strategy: the v2 orchestrator ──────────────────────────────────── def run_strategy(feature_fn, config_fn, data_path, start_date="", end_date="", validation_date="", train_split=0.7, register_model_fn=None): """Config-driven strategy execution. Claude writes feature_fn + config_fn, framework does everything else. Returns: results dict (same format as webapp expects) """ config = config_fn() # Auto-correct SL/TP if Claude passed percentage instead of decimal for _key in ("stop_loss", "take_profit"): _val = config.get(_key) if _val is not None and _val > 0.1: # >10% is almost certainly a percentage config[_key] = _val / 100.0 print(f"[strategy] Auto-corrected {_key}: {_val} -> {config[_key]} (was percentage, converted to decimal)") # 1. Load data df, close, open_, high, low = load_ohlc(data_path, start_date, end_date) # 2. Feature engineering (Claude's function) df = feature_fn(df, close, open_, high, low) close = df["close"] open_ = df["open"] high = df["high"] low = df["low"] # 3. Warm-up detection: drop rows where features have NaN BEFORE any fill feature_cols = [c for c in df.columns if c not in ("open", "high", "low", "close")] raw_nans = df[feature_cols].isna().any(axis=1) valid_rows = ~raw_nans if valid_rows.any(): first_valid = valid_rows.idxmax() if raw_nans.loc[:first_valid].any(): df = df.loc[first_valid:].copy() close = df["close"] open_ = df["open"] high = df["high"] low = df["low"] # 4. Target horizon = config.get("target_horizon", 4) target = make_target(close, horizon=horizon) # 5. Split (ffill only within each partition — no bfill leak) mask = target.notna() df = df[mask].copy() target = target[mask] close = df["close"] high = df["high"] low = df["low"] X = df[feature_cols].copy() X = X.replace([np.inf, -np.inf], np.nan) if validation_date: split_idx = len(df[df.index <= validation_date]) else: split_idx = int(len(df) * train_split) split_idx = max(1, min(split_idx, len(df) - 1)) # ffill within train and test separately (no leak) X_train = X.iloc[:split_idx].ffill().fillna(0.0) X_test = X.iloc[split_idx:].ffill().fillna(0.0) X = pd.concat([X_train, X_test]) y_train = target.iloc[:split_idx] y_test = target.iloc[split_idx:] close_train = close.iloc[:split_idx] close_test = close.iloc[split_idx:] high_test = high.iloc[split_idx:] low_test = low.iloc[split_idx:] enc = LabelEncoder() enc.fit([-1, 0, 1]) y_train_enc = enc.transform(y_train) y_test_enc = enc.transform(y_test) split_dt = str(df.index[split_idx]) sp = { "df": df, "X_train": X_train, "X_test": X_test, "y_train": y_train, "y_test": y_test, "y_train_enc": y_train_enc, "y_test_enc": y_test_enc, "enc": enc, "close": close, "close_train": close_train, "close_test": close_test, "split_idx": split_idx, "split_dt": split_dt, "n_train": len(X_train), "n_test": len(X_test), } # 6. Build model from config model = _build_model_from_config(config, X_train, y_train_enc) # 7. Generate signals threshold = config.get("signal_threshold", 0.55) signal_train, p_pos_train, p_neg_train = _generate_signals(model, X_train, threshold) signal_test, p_pos_test, p_neg_test = _generate_signals(model, X_test, threshold) # 8. Apply filters (order: direction → session → ATR → trend) direction = config.get("direction", "both") signal_test = _apply_direction_filter(signal_test, direction) signal_train = _apply_direction_filter(signal_train, direction) session_filter = config.get("session_filter") signal_test = _apply_session_filter(signal_test, signal_test.index, session_filter) signal_train = _apply_session_filter(signal_train, signal_train.index, session_filter) min_atr = config.get("min_atr") if min_atr is not None: signal_test = _apply_atr_filter(signal_test, close_test, high_test, low_test, min_atr) trend_filter = config.get("trend_filter") if trend_filter is not None: signal_test = _apply_trend_filter(signal_test, close_test, trend_filter) signal_full = pd.concat([signal_train, signal_test]) # 9. Backtest with SL/TP/cooldown (test + train) high_train = high.iloc[:split_idx] low_train = low.iloc[:split_idx] has_risk = (config.get("stop_loss") is not None or config.get("take_profit") is not None or config.get("cooldown", 0) > 0 or config.get("on_opposite", "reverse") != "reverse") if has_risk: bt = run_backtest_v2(signal_test, close_test, high_test, low_test, config, capital=10000) bt_train = run_backtest_v2(signal_train, close_train, high_train, low_train, config, capital=10000) else: bt = run_backtest(signal_test, close_test, capital=10000) bt_train = run_backtest(signal_train, close_train, capital=10000) # 10. Metrics metrics = compute_metrics(bt, close_test, capital=10000) # 11. Pre-compute all trade stats (single source of truth) pre_stats = { "train_stats": compute_trade_stats(bt_train.get("trade_returns", []), capital=10000), "test_stats": compute_trade_stats(bt.get("trade_returns", []), capital=10000), "long_stats": compute_trade_stats(bt.get("long_returns", []), capital=10000), "short_stats": compute_trade_stats(bt.get("short_returns", []), capital=10000), } # 12. Register model if register_model_fn is not None: register_model_fn(model) # 13. Build return dict return build_return_dict(sp, bt, metrics, model, feature_cols, signal_full, p_pos_test, p_neg_test, custom_figs=[], bt_train_result=bt_train, pre_stats=pre_stats) # ── End strategy_utils ── DATA_PATH = '/root/Desktop/QuantifyMe/data/ohlc/EURUSD_15min.parquet' START_DATE = '2026-04-15' END_DATE = '2026-05-25' VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # --- RSI(14) --- period = 14 delta = close.diff() gain = delta.clip(lower=0) loss = -delta.clip(upper=0) avg_gain = gain.ewm(alpha=1.0 / period, min_periods=period, adjust=False).mean() avg_loss = loss.ewm(alpha=1.0 / period, min_periods=period, adjust=False).mean() rs = avg_gain / avg_loss.replace(0, np.nan) df["rsi_14"] = 100.0 - (100.0 / (1.0 + rs)) # --- RSI crossover signals: cross up from below 30, cross down from above 70 --- rsi_prev = df["rsi_14"].shift(1) df["rsi_cross_up30"] = np.where( (rsi_prev < 30) & (df["rsi_14"] >= 30), 1.0, 0.0 ) df["rsi_cross_dn70"] = np.where( (rsi_prev > 70) & (df["rsi_14"] <= 70), 1.0, 0.0 ) # --- RSI distance from thresholds (signed) --- df["rsi_dist_30"] = df["rsi_14"] - 30.0 df["rsi_dist_70"] = df["rsi_14"] - 70.0 df["rsi_dist_50"] = df["rsi_14"] - 50.0 # --- RSI(5) for short-term momentum --- period5 = 5 delta5 = close.diff() gain5 = delta5.clip(lower=0) loss5 = -delta5.clip(upper=0) avg_gain5 = gain5.ewm(alpha=1.0 / period5, min_periods=period5, adjust=False).mean() avg_loss5 = loss5.ewm(alpha=1.0 / period5, min_periods=period5, adjust=False).mean() rs5 = avg_gain5 / avg_loss5.replace(0, np.nan) df["rsi_5"] = 100.0 - (100.0 / (1.0 + rs5)) # --- RSI(28) for longer-term regime --- period28 = 28 delta28 = close.diff() gain28 = delta28.clip(lower=0) loss28 = -delta28.clip(upper=0) avg_gain28 = gain28.ewm(alpha=1.0 / period28, min_periods=period28, adjust=False).mean() avg_loss28 = loss28.ewm(alpha=1.0 / period28, min_periods=period28, adjust=False).mean() rs28 = avg_gain28 / avg_loss28.replace(0, np.nan) df["rsi_28"] = 100.0 - (100.0 / (1.0 + rs28)) # --- Bollinger Bands (20, 2) --- bb_period = 20 bb_mid = close.rolling(bb_period).mean() bb_std = close.rolling(bb_period).std() bb_upper = bb_mid + 2.0 * bb_std bb_lower = bb_mid - 2.0 * bb_std df["bb_mid"] = bb_mid df["bb_width"] = np.where(bb_mid != 0, (bb_upper - bb_lower) / bb_mid, np.nan) df["bb_pct_b"] = np.where( (bb_upper - bb_lower) != 0, (close - bb_lower) / (bb_upper - bb_lower), 0.5 ) df["price_vs_bb_mid"] = close - bb_mid # --- ATR(14) for volatility --- tr1 = high - low tr2 = (high - close.shift(1)).abs() tr3 = (low - close.shift(1)).abs() true_range = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1) df["atr_14"] = true_range.ewm(alpha=1.0 / 14, min_periods=14, adjust=False).mean() df["natr_14"] = np.where(close != 0, df["atr_14"] / close, np.nan) # --- MACD (12, 26, 9) --- ema12 = close.ewm(span=12, min_periods=12, adjust=False).mean() ema26 = close.ewm(span=26, min_periods=26, adjust=False).mean() macd_line = ema12 - ema26 signal_line = macd_line.ewm(span=9, min_periods=9, adjust=False).mean() df["macd"] = macd_line df["macd_signal"] = signal_line df["macd_hist"] = macd_line - signal_line df["macd_cross_up"] = np.where( (macd_line.shift(1) < signal_line.shift(1)) & (macd_line >= signal_line), 1.0, 0.0 ) df["macd_cross_dn"] = np.where( (macd_line.shift(1) > signal_line.shift(1)) & (macd_line <= signal_line), 1.0, 0.0 ) # --- Stochastic Oscillator (14, 3) --- stoch_period = 14 lowest_low = low.rolling(stoch_period).min() highest_high = high.rolling(stoch_period).max() denom = (highest_high - lowest_low).replace(0, np.nan) stoch_k = 100.0 * (close - lowest_low) / denom stoch_d = stoch_k.rolling(3).mean() df["stoch_k"] = stoch_k df["stoch_d"] = stoch_d df["stoch_cross_up"] = np.where( (stoch_k.shift(1) < stoch_d.shift(1)) & (stoch_k >= stoch_d) & (stoch_k < 30), 1.0, 0.0 ) df["stoch_cross_dn"] = np.where( (stoch_k.shift(1) > stoch_d.shift(1)) & (stoch_k <= stoch_d) & (stoch_k > 70), 1.0, 0.0 ) # --- SMA trend features --- df["sma_20"] = close.rolling(20).mean() df["sma_50"] = close.rolling(50).mean() df["sma_200"] = close.rolling(200).mean() df["price_vs_sma20"] = (close - df["sma_20"]) / df["sma_20"].replace(0, np.nan) df["price_vs_sma50"] = (close - df["sma_50"]) / df["sma_50"].replace(0, np.nan) df["sma20_vs_sma50"] = (df["sma_20"] - df["sma_50"]) / df["sma_50"].replace(0, np.nan) # --- Price momentum (returns) --- df["ret_1"] = close.pct_change(1) df["ret_4"] = close.pct_change(4) df["ret_8"] = close.pct_change(8) df["ret_16"] = close.pct_change(16) # --- Candle body and wick features --- body = (close - open_).abs() upper_wick = high - pd.concat([close, open_], axis=1).max(axis=1) lower_wick = pd.concat([close, open_], axis=1).min(axis=1) - low candle_range = (high - low).replace(0, np.nan) df["body_ratio"] = body / candle_range df["upper_wick_ratio"] = upper_wick / candle_range df["lower_wick_ratio"] = lower_wick / candle_range df["candle_direction"] = np.where(close >= open_, 1.0, -1.0) # --- Volume of consecutive bars in same direction --- df["consec_up"] = ( df["candle_direction"] .groupby((df["candle_direction"] != df["candle_direction"].shift(1)).cumsum()) .cumcount() + 1 ) * np.where(df["candle_direction"] > 0, 1.0, 0.0) df["consec_dn"] = ( df["candle_direction"] .groupby((df["candle_direction"] != df["candle_direction"].shift(1)).cumsum()) .cumcount() + 1 ) * np.where(df["candle_direction"] < 0, 1.0, 0.0) # --- RSI slope (rate of change) --- df["rsi_slope_3"] = df["rsi_14"].diff(3) df["rsi_slope_5"] = df["rsi_14"].diff(5) # --- Rolling high/low channel breakout context --- df["high_20"] = high.rolling(20).max() df["low_20"] = low.rolling(20).min() df["price_pos_in_range"] = np.where( (df["high_20"] - df["low_20"]) != 0, (close - df["low_20"]) / (df["high_20"] - df["low_20"]), 0.5 ) # --- RSI oversold/overbought binary flags --- df["rsi_oversold"] = np.where(df["rsi_14"] < 30, 1.0, 0.0) df["rsi_overbought"] = np.where(df["rsi_14"] > 70, 1.0, 0.0) df["rsi_neutral"] = np.where((df["rsi_14"] >= 40) & (df["rsi_14"] <= 60), 1.0, 0.0) # --- Rolling RSI min/max to track extremes --- df["rsi_min_10"] = df["rsi_14"].rolling(10).min() df["rsi_max_10"] = df["rsi_14"].rolling(10).max() df["rsi_range_10"] = df["rsi_max_10"] - df["rsi_min_10"] # --- Time-of-day features (sin/cos encoding for session awareness) --- if hasattr(df.index, "hour"): hour = df.index.hour + df.index.minute / 60.0 df["hour_sin"] = np.sin(2.0 * np.pi * hour / 24.0) df["hour_cos"] = np.cos(2.0 * np.pi * hour / 24.0) else: df["hour_sin"] = 0.0 df["hour_cos"] = 1.0 # --- Fill NaN from warm-up periods --- df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "RSI Crossover Mean-Reversion (XGBoost, Sharpe)", "model_type": "XGBClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.8, "colsample_bytree": 0.7, "min_child_weight": 3, "gamma": 0.1, "reg_alpha": 0.05, "reg_lambda": 1.2, "objective": "binary:logistic", "n_jobs": -1, "random_state": 42, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.004, "take_profit": 0.008, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [7, 18], "min_atr": None, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximize Sharpe ratio by capturing mean-reversion when RSI crosses " "back from oversold (<30) or overbought (>70) extremes. XGBoost with " "moderate depth and shrinkage prevents overfitting on the short EUR/USD " "window. A 2:1 TP:SL ratio (0.8%/0.4%) on 15-min bars targets clean " "risk-adjusted returns. Session filter restricts to liquid London/NY hours." ), "notes": ( "Feature set combines the core RSI crossover signal with multi-period RSI, " "MACD histogram, Stochastic, Bollinger %B, ATR normalised volatility, " "price momentum across 4 horizons, candle structure ratios, and time encoding. " "colsample_bytree=0.7 adds diversity across trees; subsample=0.8 reduces " "variance. min_child_weight=3 avoids splitting on noisy one-off RSI spikes. " "No trend_filter so the model can express both long and short mean-reversion " "signals symmetrically via the 'both' direction setting." ), } # ── Framework v2: auto-generated wrapper ── def train_and_backtest(): _vd = VALIDATION_DATE if 'VALIDATION_DATE' in globals() else '' _ts = TRAIN_SPLIT if 'TRAIN_SPLIT' in globals() else 0.7 return run_strategy( feature_engineering, strategy_config, DATA_PATH, START_DATE, END_DATE, _vd, _ts, register_model_fn=register_model )
—	EMA crossover (9/21) + RSI 14 confirmation Claude-generated EMA 9/21 trend filter with RSI 14 momentum gate on EURUSD 15min. Test holdout: WR 71%, PF 2.36, 77 trades, +1.7% over ~8 da…	P @pivot_kid	EURUSD	15min	71.4%51.7%	+1.72%-2.09%	2.360.96	0.37%0.37%	7789
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-07 01:38:25 # Model : XGBoost # Feature Eng. : EMA crossover trend (9/21) with RSI 14 confirmation on EURUSD 15min + Auto-add features: ON # Signal / Entry : — # Optimization : — # Risk Mgmt : — # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # QUANTIFY ME — STRATEGY MODULE # EMA Crossover + RSI Confirmation (XGBoost, Sharpe Optimization) # ============================================================ import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/EURUSD_15min.parquet" START_DATE = "2026-03-28" END_DATE = "2026-04-25" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # ============================================================ # SECTION 1 — FEATURE ENGINEERING # ============================================================ def feature_engineering(df, close, open_, high, low): """ Add EMA crossover trend features + RSI confirmation. Features: - EMA 9 and EMA 21 for trend direction - RSI 14 for momentum confirmation - EMA crossover signal - Price deviation from EMA 21 - High/Low proximity ratios - Volume-based volatility (NATR) """ # EMA 9 and EMA 21 for trend df['ema_9'] = close.ewm(span=9, adjust=False).mean() df['ema_21'] = close.ewm(span=21, adjust=False).mean() df['ema_crossover'] = np.where(df['ema_9'] > df['ema_21'], 1, -1) # EMA crossover signal (1 when 9 crosses above 21, -1 when crosses below) df['ema_cross_signal'] = df['ema_crossover'].diff().fillna(0) df['ema_cross_signal'] = np.where(df['ema_cross_signal'] != 0, df['ema_cross_signal'], 0) # Price deviation from EMA 21 (normalized) df['price_ema_deviation'] = (close - df['ema_21']) / df['ema_21'] # RSI 14 for momentum confirmation delta = close.diff() gain = np.where(delta > 0, delta, 0) loss = np.where(delta < 0, -delta, 0) avg_gain = pd.Series(gain, index=close.index).ewm(span=14, adjust=False).mean() avg_loss = pd.Series(loss, index=close.index).ewm(span=14, adjust=False).mean() rs = avg_gain / (avg_loss + 1e-10) df['rsi_14'] = 100 - (100 / (1 + rs)) # RSI signal: overbought/oversold df['rsi_overbought'] = np.where(df['rsi_14'] > 70, 1, 0) df['rsi_oversold'] = np.where(df['rsi_14'] < 30, 1, 0) # High/Low proximity (distance from recent extremes) df['high_20'] = high.rolling(window=20).max() df['low_20'] = low.rolling(window=20).min() df['price_position'] = (close - df['low_20']) / (df['high_20'] - df['low_20'] + 1e-10) # NATR (Normalized ATR) for volatility atr_period = 14 tr1 = high - low tr2 = np.abs(high - close.shift(1)) tr3 = np.abs(low - close.shift(1)) tr = np.maximum(tr1, np.maximum(tr2, tr3)) atr = pd.Series(tr, index=close.index).rolling(window=atr_period).mean() df['natr'] = (atr / close) * 100 # EMA momentum (rate of change in EMA) df['ema_9_roc'] = df['ema_9'].pct_change(periods=3) df['ema_21_roc'] = df['ema_21'].pct_change(periods=3) # Close relative to open (intrabar direction) df['close_above_open'] = np.where(close > open_, 1, 0) # Volume-based features (if available; otherwise skip) if 'volume' in df.columns: df['volume_ma'] = df['volume'].rolling(window=20).mean() df['volume_ratio'] = df['volume'] / (df['volume_ma'] + 1e-10) else: df['volume_ratio'] = 1.0 # Fill NaN from indicator warm-up df = df.bfill().ffill() return df # ============================================================ # SECTION 2 — STRATEGY CONFIG # ============================================================ def strategy_config(): """ XGBoost strategy optimized for Sharpe ratio on EMA/RSI signals. Hyperparameters tuned for: - Fast learning (learning_rate=0.08) - Shallow trees (max_depth=4) to avoid overfitting on 15min data - Moderate boosting (n_estimators=250) for good generalization - Regularization (subsample=0.85, colsample_bytree=0.8) - Balanced class weights via scale_pos_weight Signal threshold 0.55 chosen to be moderately selective while maintaining good trade frequency on the EMA crossover setup. """ return { # Model specification "model_type": "XGBClassifier", "model_params": { "n_estimators": 250, "max_depth": 4, "learning_rate": 0.08, "subsample": 0.85, "colsample_bytree": 0.8, "min_child_weight": 1, "gamma": 0.5, "reg_alpha": 0.1, "reg_lambda": 1.0, "random_state": 42, "verbosity": 0, }, # Entry signal "signal_threshold": 0.55, # Position management "direction": "both", "max_positions": 1, "on_opposite": "reverse", "cooldown": 0, # Risk management "stop_loss": 0.008, "take_profit": 0.015, # Filters "session_filter": None, "min_atr": None, "trend_filter": None, # Target "target_horizon": 4, # Metadata "title": "EMA Crossover + RSI Confirmation (XGBoost)", "objective": "Maximize Sharpe ratio with EMA 9/21 trend + RSI 14 momentum confirmation", "notes": ( "Strategy uses fast EMA (9) crossover above/below slow EMA (21) " "as primary trend signal, confirmed by RSI 14 momentum. " "XGBoost learns non-linear interactions between these features. " "Moderate SL/TP (0.8%/1.5%) and bidirectional trading for scalping efficiency. " "Optimized for EURUSD 15min with 70/30 train/test split." ), }
—	NZD/USD MACD+RSI Gradient Boosting Risk-Adjusted Maximize risk-adjusted return (Sharpe/Calmar) on NZD/USD 15-min data. GradientBoostingClassifier chosen for strong generalisation with tabul…	V @vol_drifter	NZDUSD	15min	60.9%53.0%	+18.36%-6.31%	1.350.87	3.80%3.80%	732134
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-06 01:16:23 # Model : Gradient Boosting # Feature Eng. : RSI 14, MACD (12,26,9) + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/NZDUSD_15min.parquet" START_DATE = "2025-04-24" END_DATE = "2026-04-24" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── RSI 14 ────────────────────────────────────────────────────────────── delta = close.diff() gain = delta.clip(lower=0) loss = -delta.clip(upper=0) avg_gain = gain.ewm(com=13, min_periods=14, adjust=False).mean() avg_loss = loss.ewm(com=13, min_periods=14, adjust=False).mean() rs = avg_gain / avg_loss.replace(0, np.nan) df["rsi_14"] = 100 - (100 / (1 + rs)) # RSI derived signals df["rsi_overbought"] = np.where(df["rsi_14"] > 70, 1, 0) df["rsi_oversold"] = np.where(df["rsi_14"] < 30, 1, 0) df["rsi_mid_cross"] = np.where(df["rsi_14"] > 50, 1, -1) # RSI momentum (rate of change of RSI) df["rsi_roc"] = df["rsi_14"].diff(3) # ── MACD (12, 26, 9) ──────────────────────────────────────────────────── ema_12 = close.ewm(span=12, adjust=False).mean() ema_26 = close.ewm(span=26, adjust=False).mean() macd_line = ema_12 - ema_26 signal_line = macd_line.ewm(span=9, adjust=False).mean() macd_hist = macd_line - signal_line df["macd_line"] = macd_line df["macd_signal"] = signal_line df["macd_hist"] = macd_hist # MACD derived signals df["macd_cross"] = np.where(macd_line > signal_line, 1, -1) df["macd_hist_sign"] = np.where(macd_hist > 0, 1, -1) df["macd_hist_accel"] = macd_hist.diff() # ── ATR (14) ───────────────────────────────────────────────────────────── tr = pd.concat([ high - low, (high - close.shift(1)).abs(), (low - close.shift(1)).abs() ], axis=1).max(axis=1) df["atr_14"] = tr.ewm(com=13, min_periods=14, adjust=False).mean() df["natr_14"] = df["atr_14"] / close # ── Bollinger Bands (20, 2) ────────────────────────────────────────────── sma_20 = close.rolling(20).mean() std_20 = close.rolling(20).std() bb_upper = sma_20 + 2 * std_20 bb_lower = sma_20 - 2 * std_20 df["bb_width"] = (bb_upper - bb_lower) / sma_20 df["bb_position"] = (close - bb_lower) / (bb_upper - bb_lower).replace(0, np.nan) df["bb_squeeze"] = np.where(df["bb_width"] < df["bb_width"].rolling(50).mean(), 1, 0) # ── Trend / SMA filters ────────────────────────────────────────────────── df["sma_20"] = sma_20 df["sma_50"] = close.rolling(50).mean() df["sma_200"] = close.rolling(200).mean() df["price_above_sma50"] = np.where(close > df["sma_50"], 1, -1) df["price_above_sma200"] = np.where(close > df["sma_200"], 1, -1) df["sma50_above_sma200"] = np.where(df["sma_50"] > df["sma_200"], 1, -1) # ── Momentum / ROC ─────────────────────────────────────────────────────── df["roc_4"] = close.pct_change(4) df["roc_8"] = close.pct_change(8) df["roc_16"] = close.pct_change(16) # ── Candlestick / price structure ──────────────────────────────────────── df["body"] = (close - open_) / close df["upper_wick"] = (high - close.clip(lower=open_)) / close df["lower_wick"] = (open_.clip(upper=close) - low) / close df["hl_range"] = (high - low) / close df["gap"] = (open_ - close.shift(1)) / close.shift(1) # ── Volume-free spread proxy ───────────────────────────────────────────── df["spread_ratio"] = df["hl_range"] / df["atr_14"].replace(0, np.nan) # ── Stochastic RSI proxy ───────────────────────────────────────────────── rsi_min = df["rsi_14"].rolling(14).min() rsi_max = df["rsi_14"].rolling(14).max() df["stoch_rsi"] = (df["rsi_14"] - rsi_min) / (rsi_max - rsi_min).replace(0, np.nan) # ── Williams %R (14) ───────────────────────────────────────────────────── highest_high = high.rolling(14).max() lowest_low = low.rolling(14).min() df["williams_r"] = -100 * (highest_high - close) / (highest_high - lowest_low).replace(0, np.nan) # ── Lagged features ────────────────────────────────────────────────────── for lag in [1, 2, 3, 4]: df[f"rsi_lag{lag}"] = df["rsi_14"].shift(lag) df[f"macd_hist_lag{lag}"] = df["macd_hist"].shift(lag) df[f"roc4_lag{lag}"] = df["roc_4"].shift(lag) # ── Fill NaN from indicator warm-up ───────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "NZD/USD MACD+RSI Gradient Boosting Risk-Adjusted", "model_type": "GradientBoostingClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.03, "subsample": 0.8, "min_samples_leaf": 20, "max_features": "sqrt", "validation_fraction": 0.1, "n_iter_no_change": 30, "tol": 1e-4, "random_state": 42, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.005, "take_profit": 0.010, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [0, 23], "min_atr": None, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximize risk-adjusted return (Sharpe/Calmar) on NZD/USD 15-min data. " "GradientBoostingClassifier chosen for strong generalisation with tabular features, " "low learning rate + early stopping prevents overfitting. " "SL=0.5%, TP=1.0% gives 1:2 R:R ratio. Threshold=0.55 filters marginal signals." ), "notes": ( "Core features: RSI-14, MACD(12,26,9) with histogram momentum. " "Supplementary: ATR, Bollinger Bands, SMA trend, Williams %R, StochRSI, " "candlestick structure, lagged RSI/MACD/ROC. " "reverse on opposite signal captures trend continuation. " "target_horizon=4 bars (1 hour) aligns with typical MACD/RSI signal duration." ), }
—	EUR/USD SMA Trend + Multi-Indicator XGBoost Maximize risk-adjusted return (Sharpe/Calmar) via XGBoost with deep SMA-based trend features (20/50/200), momentum, volatility, RSI, MACD, B…	E @elastic-moose-350	EURUSD	15min	53.6%50.0%	+5.89%-3.02%	1.970.69	1.64%1.64%	5610
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-05 10:44:36 # Model : XGBoost # Feature Eng. : SMA (20,50,200) + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/EURUSD_15min.parquet" START_DATE = "2025-04-23" END_DATE = "2026-04-23" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── SMA core features (required) ────────────────────────────────────────── for p in [20, 50, 200]: sma = close.rolling(p).mean() df[f"sma_{p}"] = sma df[f"dm_sma_{p}"] = (close - sma) / sma # ── SMA cross-ratios ────────────────────────────────────────────────────── df["sma_20_50_ratio"] = df["sma_20"] / df["sma_50"] df["sma_50_200_ratio"] = df["sma_50"] / df["sma_200"] df["sma_20_200_ratio"] = df["sma_20"] / df["sma_200"] # ── SMA slope (rate of change of SMA over N bars) ───────────────────────── for p in [20, 50, 200]: df[f"sma_{p}_slope5"] = df[f"sma_{p}"].diff(5) / df[f"sma_{p}"].shift(5) # ── Price momentum / returns ─────────────────────────────────────────────── for lag in [1, 2, 3, 4, 8, 16]: df[f"ret_{lag}"] = close.pct_change(lag) # ── Volatility (rolling std of returns) ─────────────────────────────────── ret1 = close.pct_change(1) for w in [8, 20, 50]: df[f"vol_std_{w}"] = ret1.rolling(w).std() # ── ATR (manual) ────────────────────────────────────────────────────────── tr = pd.concat([ high - low, (high - close.shift(1)).abs(), (low - close.shift(1)).abs() ], axis=1).max(axis=1) for w in [8, 14, 20]: atr = tr.rolling(w).mean() df[f"atr_{w}"] = atr df[f"natr_{w}"] = atr / close # ── RSI (manual) ────────────────────────────────────────────────────────── for period in [7, 14, 21]: delta = close.diff(1) gain = delta.clip(lower=0).rolling(period).mean() loss = (-delta.clip(upper=0)).rolling(period).mean() rs = gain / loss.replace(0, np.nan) df[f"rsi_{period}"] = 100 - (100 / (1 + rs)) # ── MACD (manual) ───────────────────────────────────────────────────────── ema12 = close.ewm(span=12, adjust=False).mean() ema26 = close.ewm(span=26, adjust=False).mean() macd_line = ema12 - ema26 signal_line = macd_line.ewm(span=9, adjust=False).mean() df["macd_line"] = macd_line df["macd_signal"] = signal_line df["macd_hist"] = macd_line - signal_line df["macd_norm"] = macd_line / close # ── Bollinger Bands (manual) ────────────────────────────────────────────── for w in [20]: mid = close.rolling(w).mean() std = close.rolling(w).std() upper = mid + 2 * std lower = mid - 2 * std bw = (upper - lower) / mid pct_b = (close - lower) / (upper - lower).replace(0, np.nan) df[f"bb_upper_{w}"] = upper df[f"bb_lower_{w}"] = lower df[f"bb_width_{w}"] = bw df[f"bb_pct_{w}"] = pct_b # ── Stochastic oscillator (manual) ──────────────────────────────────────── for k_period in [14]: lo_k = low.rolling(k_period).min() hi_k = high.rolling(k_period).max() stoch_k = 100 * (close - lo_k) / (hi_k - lo_k).replace(0, np.nan) stoch_d = stoch_k.rolling(3).mean() df[f"stoch_k_{k_period}"] = stoch_k df[f"stoch_d_{k_period}"] = stoch_d # ── CCI (manual) ────────────────────────────────────────────────────────── for w in [14, 20]: tp = (high + low + close) / 3 tp_ma = tp.rolling(w).mean() tp_md = tp.rolling(w).apply(lambda x: np.mean(np.abs(x - x.mean())), raw=True) df[f"cci_{w}"] = (tp - tp_ma) / (0.015 * tp_md.replace(0, np.nan)) # ── Williams %R (manual) ────────────────────────────────────────────────── for w in [14]: hi_w = high.rolling(w).max() lo_w = low.rolling(w).min() df[f"willr_{w}"] = -100 * (hi_w - close) / (hi_w - lo_w).replace(0, np.nan) # ── Donchian channel position ────────────────────────────────────────────── for w in [20, 50]: hi_d = high.rolling(w).max() lo_d = low.rolling(w).min() df[f"donch_pos_{w}"] = (close - lo_d) / (hi_d - lo_d).replace(0, np.nan) # ── Rolling high / low distances ────────────────────────────────────────── for w in [8, 20]: df[f"dist_hi_{w}"] = (high.rolling(w).max() - close) / close df[f"dist_lo_{w}"] = (close - low.rolling(w).min()) / close # ── Bar body / wick features ─────────────────────────────────────────────── body = (close - open_).abs() bar_range = (high - low).replace(0, np.nan) df["body_ratio"] = body / bar_range df["upper_wick_ratio"] = (high - close.clip(lower=open_)) / bar_range df["lower_wick_ratio"] = (close.clip(upper=open_) - low) / bar_range df["bar_direction"] = np.where(close >= open_, 1.0, -1.0) # ── Trend regime flags (binary) ──────────────────────────────────────────── df["above_sma20"] = np.where(close > df["sma_20"], 1.0, 0.0) df["above_sma50"] = np.where(close > df["sma_50"], 1.0, 0.0) df["above_sma200"] = np.where(close > df["sma_200"], 1.0, 0.0) df["sma20_above50"] = np.where(df["sma_20"] > df["sma_50"], 1.0, 0.0) df["sma50_above200"] = np.where(df["sma_50"] > df["sma_200"], 1.0, 0.0) # ── Lagged returns for autoregressive signal ─────────────────────────────── for lag in [1, 2, 3, 4, 5]: df[f"close_lag_{lag}"] = close.shift(lag) df[f"ret_lag_{lag}"] = ret1.shift(lag) # ── Rolling correlation: price vs SMA distance ───────────────────────────── for w in [20]: df[f"autocorr_ret_{w}"] = ret1.rolling(w).apply( lambda x: pd.Series(x).autocorr(lag=1) if len(x) > 1 else 0.0, raw=False ) # ── Volume-proxy: bar range z-score ─────────────────────────────────────── for w in [20]: rng_mean = bar_range.rolling(w).mean() rng_std = bar_range.rolling(w).std().replace(0, np.nan) df[f"range_zscore_{w}"] = (bar_range - rng_mean) / rng_std # ── Fill NaN from warm-up ───────────────────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "EUR/USD SMA Trend + Multi-Indicator XGBoost", "model_type": "XGBClassifier", "model_params": { "n_estimators": 500, "max_depth": 4, "learning_rate": 0.03, "subsample": 0.8, "colsample_bytree": 0.7, "colsample_bylevel": 0.8, "min_child_weight": 5, "gamma": 0.1, "reg_alpha": 0.1, "reg_lambda": 2.0, "scale_pos_weight": 1, "objective": "binary:logistic", "tree_method": "hist", "random_state": 42, "n_jobs": -1, }, "signal_threshold": 0.56, "direction": "both", "stop_loss": 0.005, "take_profit": 0.01, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [6, 18], "min_atr": 0.0002, "trend_filter": "sma_50", "target_horizon": 4, "objective": ( "Maximize risk-adjusted return (Sharpe/Calmar) via XGBoost with deep " "SMA-based trend features (20/50/200), momentum, volatility, RSI, MACD, " "Bollinger Bands, Stochastics, CCI, Donchian channels, and bar microstructure. " "Regularised tree ensemble (L1+L2, subsampling) prevents overfit on 15-min FX data. " "2:1 reward-to-risk with 0.5% SL / 1.0% TP targets consistent positive expectancy. " "Session filter [6,18] UTC focuses on liquid London+NY overlap. " "Trend filter sma_50 suppresses counter-trend noise." ), "notes": ( "n_estimators=500 with low learning_rate=0.03 gives stable generalisation. " "max_depth=4 limits tree complexity to avoid overfit on 15-min EURUSD. " "min_child_weight=5 and gamma=0.1 add conservative splitting constraints. " "reg_lambda=2.0 strong L2 regularisation for stable leaf weights. " "colsample_bytree=0.7 adds feature bagging diversity. " "target_horizon=4 (1 hour ahead) balances signal frequency and predictability. " "signal_threshold=0.56 filters marginal predictions, improving precision." ), }
—	EMA Cross 50/200 + ATR Momentum (XGBoost) Maximize risk-adjusted return (Sharpe/Calmar) on EUR/USD 15-min data. EMA 50/200 cross provides the primary trend regime filter. ATR 14 gate…	D @delta-atlas-858	EURUSD	15min	48.5%33.3%	+8.52%-7.94%	2.410.55	0.83%0.83%	6818
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-05 10:27:10 # Model : XGBoost # Feature Eng. : EMA (50,200), ATR 14 + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/EURUSD_15min.parquet" START_DATE = "2025-04-23" END_DATE = "2026-04-23" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── EMA 50 and EMA 200 ────────────────────────────────────────────────── ema_50 = close.ewm(span=50, adjust=False).mean() ema_200 = close.ewm(span=200, adjust=False).mean() df["ema_50"] = ema_50 df["ema_200"] = ema_200 df["dm_ema_50"] = (close - ema_50) / ema_50 df["dm_ema_200"] = (close - ema_200) / ema_200 # EMA cross signal: positive when fast > slow df["ema_cross"] = ema_50 - ema_200 df["ema_cross_norm"] = df["ema_cross"] / ema_200 # Cross direction change (momentum of the spread) df["ema_cross_delta"] = df["ema_cross"].diff(1) df["ema_cross_accel"] = df["ema_cross_delta"].diff(1) # ── ATR 14 ────────────────────────────────────────────────────────────── prev_close = close.shift(1) tr = pd.concat([ high - low, (high - prev_close).abs(), (low - prev_close).abs() ], axis=1).max(axis=1) atr = tr.ewm(span=14, adjust=False).mean() df["atr"] = atr df["natr"] = atr / close # ── Price momentum features ───────────────────────────────────────────── for lag in [1, 2, 4, 8, 16]: df[f"ret_{lag}"] = close.pct_change(lag) # ── Volatility regime ─────────────────────────────────────────────────── df["atr_ratio"] = atr / atr.rolling(50).mean() # ATR vs its own MA df["natr_ma20"] = df["natr"].rolling(20).mean() # ── RSI 14 ────────────────────────────────────────────────────────────── delta = close.diff(1) gain = delta.clip(lower=0) loss = (-delta).clip(lower=0) avg_g = gain.ewm(span=14, adjust=False).mean() avg_l = loss.ewm(span=14, adjust=False).mean() rs = avg_g / avg_l.replace(0, np.nan) df["rsi_14"] = 100 - (100 / (1 + rs)) df["rsi_delta"] = df["rsi_14"].diff(1) # ── MACD (12/26/9) ────────────────────────────────────────────────────── ema_12 = close.ewm(span=12, adjust=False).mean() ema_26 = close.ewm(span=26, adjust=False).mean() macd = ema_12 - ema_26 signal = macd.ewm(span=9, adjust=False).mean() df["macd"] = macd df["macd_signal"] = signal df["macd_hist"] = macd - signal df["macd_hist_delta"] = df["macd_hist"].diff(1) # ── Bollinger Bands (20, 2σ) ───────────────────────────────────────────── bb_mid = close.rolling(20).mean() bb_std = close.rolling(20).std() bb_up = bb_mid + 2 * bb_std bb_lo = bb_mid - 2 * bb_std df["bb_pos"] = (close - bb_lo) / (bb_up - bb_lo).replace(0, np.nan) df["bb_width"] = (bb_up - bb_lo) / bb_mid # ── Stochastic %K / %D (14, 3) ────────────────────────────────────────── low14 = low.rolling(14).min() high14 = high.rolling(14).max() stoch_k = 100 * (close - low14) / (high14 - low14).replace(0, np.nan) stoch_d = stoch_k.rolling(3).mean() df["stoch_k"] = stoch_k df["stoch_d"] = stoch_d df["stoch_kd"] = stoch_k - stoch_d # ── Volume / body / wick features ─────────────────────────────────────── body = (close - open_).abs() candle = (high - low).replace(0, np.nan) df["body_ratio"] = body / candle df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle df["bull_candle"] = np.where(close > open_, 1, 0) # ── Rolling z-score of close vs SMA 50 ────────────────────────────────── sma_50 = close.rolling(50).mean() sma_50_std = close.rolling(50).std() df["zscore_50"] = (close - sma_50) / sma_50_std.replace(0, np.nan) # ── High/Low breakout flags ────────────────────────────────────────────── df["high_20_break"] = np.where(close > high.rolling(20).max().shift(1), 1, 0) df["low_20_break"] = np.where(close < low.rolling(20).min().shift(1), 1, 0) # ── Time-of-day features (cyclical encoding) ───────────────────────────── if hasattr(df.index, 'hour'): hour = df.index.hour + df.index.minute / 60.0 df["hour_sin"] = np.sin(2 * np.pi * hour / 24.0) df["hour_cos"] = np.cos(2 * np.pi * hour / 24.0) # ── Fill NaN from indicator warm-up ───────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "EMA Cross 50/200 + ATR Momentum (XGBoost)", "model_type": "XGBClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.75, "colsample_bytree": 0.70, "min_child_weight": 3, "gamma": 0.15, "reg_alpha": 0.10, "reg_lambda": 1.50, "objective": "binary:logistic", "random_state": 42, "n_jobs": -1, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.005, "take_profit": 0.010, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [6, 20], "min_atr": 0.0002, "trend_filter": "sma_50", "target_horizon": 4, "objective": ( "Maximize risk-adjusted return (Sharpe/Calmar) on EUR/USD 15-min data. " "EMA 50/200 cross provides the primary trend regime filter. " "ATR 14 gates entries by volatility (min_atr avoids dead-market noise). " "XGBoost chosen for its ability to capture non-linear feature interactions. " "Conservative depth=4 and regularisation (alpha/lambda) prevent overfitting " "on the relatively short 1-year window. 2:1 reward/risk (SL=0.5%, TP=1.0%) " "ensures positive expectancy even at modest hit-rates. Session filter 06-20 UTC " "keeps the strategy in liquid London/NY hours only." ), "notes": ( "Feature set combines trend (EMA cross, z-score), momentum (RSI, MACD, returns), " "volatility (ATR ratio, BB width), and price structure (body/wick ratios, " "stochastic). Cyclical hour encoding captures intraday seasonality without " "introducing lookahead. bfill().ffill() handles EMA warm-up NaNs gracefully." ), }
—	NZD/USD MACD+RSI Momentum (XGBoost, Risk-Adj) Maximise risk-adjusted return (Sharpe/Calmar) on NZD/USD 15-min data. XGBoost chosen for its strong performance on tabular financial data. M…	D @delta_one	NZDUSD	15min	63.8%53.8%	+18.16%-3.16%	1.320.94	2.46%2.46%	845130
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-06 02:51:04 # Model : XGBoost # Feature Eng. : RSI 14, MACD (12,26,9) + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/NZDUSD_15min.parquet" START_DATE = "2025-04-24" END_DATE = "2026-04-24" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── RSI 14 ────────────────────────────────────────────────────────────── period = 14 delta = close.diff() gain = delta.clip(lower=0) loss = -delta.clip(upper=0) avg_gain = gain.ewm(com=period - 1, min_periods=period).mean() avg_loss = loss.ewm(com=period - 1, min_periods=period).mean() rs = avg_gain / avg_loss.replace(0, np.nan) df["rsi_14"] = 100 - (100 / (1 + rs)) # RSI derived features df["rsi_14_norm"] = (df["rsi_14"] - 50) / 50 # centred & scaled df["rsi_14_ob"] = np.where(df["rsi_14"] > 70, 1, 0) # overbought flag df["rsi_14_os"] = np.where(df["rsi_14"] < 30, 1, 0) # oversold flag df["rsi_14_mom"] = df["rsi_14"].diff(3) # 3-bar momentum # ── MACD (12, 26, 9) ──────────────────────────────────────────────────── ema_12 = close.ewm(span=12, adjust=False).mean() ema_26 = close.ewm(span=26, adjust=False).mean() macd_line = ema_12 - ema_26 signal_line = macd_line.ewm(span=9, adjust=False).mean() macd_hist = macd_line - signal_line df["macd_line"] = macd_line df["macd_signal"] = signal_line df["macd_hist"] = macd_hist # MACD derived features df["macd_hist_mom"] = macd_hist.diff(2) # histogram momentum df["macd_cross_bull"] = np.where( (macd_line > signal_line) & (macd_line.shift(1) <= signal_line.shift(1)), 1, 0 ) df["macd_cross_bear"] = np.where( (macd_line < signal_line) & (macd_line.shift(1) >= signal_line.shift(1)), 1, 0 ) df["macd_zero_cross"] = np.where( (macd_line > 0) & (macd_line.shift(1) <= 0), 1, np.where((macd_line < 0) & (macd_line.shift(1) >= 0), -1, 0) ) # ── Additional price-action features ──────────────────────────────────── # ATR (14) for volatility context tr = pd.concat([ high - low, (high - close.shift(1)).abs(), (low - close.shift(1)).abs() ], axis=1).max(axis=1) atr_14 = tr.ewm(span=14, adjust=False).mean() df["atr_14"] = atr_14 df["natr_14"] = atr_14 / close # normalised ATR # Bollinger Bands (20, 2) — mean-reversion context sma_20 = close.rolling(20).mean() std_20 = close.rolling(20).std() bb_up = sma_20 + 2 * std_20 bb_lo = sma_20 - 2 * std_20 df["bb_pct"] = (close - bb_lo) / (bb_up - bb_lo + 1e-12) # 0-1 position df["bb_width"] = (bb_up - bb_lo) / sma_20 # band width # SMA filters df["sma_20"] = sma_20 df["sma_50"] = close.rolling(50).mean() df["price_vs_sma20"] = (close - sma_20) / (sma_20 + 1e-12) df["price_vs_sma50"] = (close - df["sma_50"]) / (df["sma_50"] + 1e-12) # Rate of change df["roc_5"] = close.pct_change(5) df["roc_10"] = close.pct_change(10) df["roc_20"] = close.pct_change(20) # Candlestick body / shadow ratios body = (close - open_).abs() candle_rng = (high - low).replace(0, np.nan) df["body_ratio"] = body / candle_rng df["upper_shadow"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle_rng df["lower_shadow"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle_rng df["bar_direction"] = np.where(close >= open_, 1, -1) # Volume-proxy: realised range / ATR ratio df["range_vs_atr"] = candle_rng / (atr_14 + 1e-12) # Stochastic %K (14) lowest_14 = low.rolling(14).min() highest_14 = high.rolling(14).max() stoch_k = 100 * (close - lowest_14) / (highest_14 - lowest_14 + 1e-12) df["stoch_k"] = stoch_k df["stoch_d"] = stoch_k.rolling(3).mean() df["stoch_diff"] = df["stoch_k"] - df["stoch_d"] # RSI × MACD interaction df["rsi_macd_interact"] = df["rsi_14_norm"] * macd_hist # Lagged features (1 and 2 bars back) for key signals for col in ["rsi_14_norm", "macd_hist", "bb_pct", "roc_5"]: df[f"{col}_lag1"] = df[col].shift(1) df[f"{col}_lag2"] = df[col].shift(2) # ── Fill NaN from warm-up ──────────────────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "NZD/USD MACD+RSI Momentum (XGBoost, Risk-Adj)", "model_type": "XGBClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.75, "colsample_bytree": 0.70, "min_child_weight": 3, "gamma": 0.15, "reg_alpha": 0.10, "reg_lambda": 1.50, "objective": "binary:logistic", "tree_method": "hist", "random_state": 42, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.005, "take_profit": 0.010, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [0, 23], "min_atr": None, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximise risk-adjusted return (Sharpe/Calmar) on NZD/USD 15-min data. " "XGBoost chosen for its strong performance on tabular financial data. " "Moderate depth (4) and high regularisation (gamma, alpha, lambda) prevent " "overfitting on a relatively small forex dataset. Subsample + colsample_bytree " "add stochastic diversity. SL 0.5% / TP 1.0% gives a 1:2 R:R ratio to support " "positive expectancy even with a sub-60% win rate. Threshold 0.55 filters marginal " "signals while keeping trade frequency acceptable. Target horizon of 4 bars (1 hour) " "aligns with typical MACD/RSI signal resolution on 15-min charts." ), "notes": ( "Features: RSI-14 (raw, normalised, OB/OS flags, momentum), MACD(12,26,9) " "(line, signal, histogram, crosses, zero-cross), Bollinger Bands %B & width, " "ATR/NATR, SMA20/50 price deviations, Stochastic %K/%D, ROC(5/10/20), " "candlestick body/shadow ratios, RSI×MACD interaction term, and lagged " "versions (lag1, lag2) of key signals to capture short-term persistence." ), }
—	GBP/USD Gradient Boosting Trend + Mean-Reversion Maximize risk-adjusted return (Sharpe/Calmar) on GBP/USD 15-min. GradientBoostingClassifier with 400 shallow trees (depth 4) and a conservat…	C @candid-owl-125	GBPUSD	15min	53.5%57.5%	+3.70%-15.53%	1.160.64	2.46%2.46%	31247
# ╔══════════════════════════════════════════════════════════════╗ # ║ STRATEGY REQUEST LOG ║ # ╚══════════════════════════════════════════════════════════════╝ # Generated : 2026-05-06 01:41:21 # Model : Gradient Boosting # Feature Eng. : SMA (20,50,200), BB (20,2.0), RSI 14, MACD (12,26,9), ATR 14 + Auto-add features: ON # Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP # Optimization : Maximize risk-adjusted return # Risk Mgmt : Stop loss 0.5%, Take profit 1.0% # Risk Filter : — # ══════════════════════════════════════════════════════════════ # ============================================================ # SECTION 0 — IMPORTS & CONSTANTS import numpy as np import pandas as pd DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/GBPUSD_15min.parquet" START_DATE = "2025-04-24" END_DATE = "2026-04-24" VALIDATION_DATE = "" TRAIN_SPLIT = 0.7 # SECTION 1 — FEATURE ENGINEERING def feature_engineering(df, close, open_, high, low): # ── SMA 20, 50, 200 + distance from close ────────────────────────────── for period in [20, 50, 200]: sma = close.rolling(period).mean() df[f"sma_{period}"] = sma df[f"dm_sma_{period}"] = (close - sma) / sma # ── Bollinger Bands (20, 2) ───────────────────────────────────────────── bb_mid = close.rolling(20).mean() bb_std = close.rolling(20).std(ddof=0) bb_upper = bb_mid + 2.0 * bb_std bb_lower = bb_mid - 2.0 * bb_std df["bb_mid"] = bb_mid df["bb_upper"] = bb_upper df["bb_lower"] = bb_lower df["bb_width"] = (bb_upper - bb_lower) / bb_mid bb_range = bb_upper - bb_lower df["bb_pct"] = np.where(bb_range != 0, (close - bb_lower) / bb_range, 0.5) # ── RSI 14 ───────────────────────────────────────────────────────────── delta = close.diff() gain = delta.clip(lower=0) loss = (-delta).clip(lower=0) avg_gain = gain.ewm(com=13, min_periods=14, adjust=False).mean() avg_loss = loss.ewm(com=13, min_periods=14, adjust=False).mean() rs = np.where(avg_loss != 0, avg_gain / avg_loss, 100.0) df["rsi_14"] = 100.0 - (100.0 / (1.0 + rs)) # ── MACD (12, 26, 9) ─────────────────────────────────────────────────── ema_12 = close.ewm(span=12, adjust=False).mean() ema_26 = close.ewm(span=26, adjust=False).mean() macd_line = ema_12 - ema_26 signal_line = macd_line.ewm(span=9, adjust=False).mean() df["macd_line"] = macd_line df["macd_signal"] = signal_line df["macd_hist"] = macd_line - signal_line # ── ATR 14 + NATR ────────────────────────────────────────────────────── prev_close = close.shift(1) tr = pd.concat([ high - low, (high - prev_close).abs(), (low - prev_close).abs() ], axis=1).max(axis=1) atr = tr.ewm(com=13, min_periods=14, adjust=False).mean() df["atr_14"] = atr df["natr"] = np.where(close != 0, atr / close, 0.0) # ── Price momentum (returns over multiple horizons) ──────────────────── for lag in [1, 4, 8, 16]: df[f"ret_{lag}"] = close.pct_change(lag) # ── Candle body & wick features ──────────────────────────────────────── body = (close - open_).abs() candle_range = (high - low).replace(0, np.nan) df["body_ratio"] = body / candle_range df["upper_wick"] = np.where( candle_range.notna(), (high - close.combine(open_, max)) / candle_range, 0.0 ) df["lower_wick"] = np.where( candle_range.notna(), (close.combine(open_, min) - low) / candle_range, 0.0 ) df["candle_dir"] = np.where(close >= open_, 1.0, -1.0) # ── Volume proxy: normalised candle range ────────────────────────────── rolling_range = candle_range.rolling(20).mean() df["norm_range"] = np.where( rolling_range != 0, (high - low) / rolling_range, 1.0 ) # ── RSI derived features ─────────────────────────────────────────────── df["rsi_ob"] = np.where(df["rsi_14"] > 70, 1.0, 0.0) df["rsi_os"] = np.where(df["rsi_14"] < 30, 1.0, 0.0) df["rsi_mid"] = df["rsi_14"] - 50.0 df["rsi_slope"] = df["rsi_14"].diff(3) # ── MACD histogram slope ─────────────────────────────────────────────── df["macd_hist_slope"] = df["macd_hist"].diff(2) df["macd_cross"] = np.where( (df["macd_hist"] > 0) & (df["macd_hist"].shift(1) <= 0), 1.0, np.where( (df["macd_hist"] < 0) & (df["macd_hist"].shift(1) >= 0), -1.0, 0.0 ) ) # ── Bollinger squeeze (low volatility precursor) ─────────────────────── bb_width_ma = df["bb_width"].rolling(20).mean() df["bb_squeeze"] = np.where(df["bb_width"] < bb_width_ma, 1.0, 0.0) # ── SMA slope features ───────────────────────────────────────────────── df["sma_20_slope"] = df["sma_20"].pct_change(4) df["sma_50_slope"] = df["sma_50"].pct_change(8) # ── Cross-SMA alignment (trend structure) ────────────────────────────── df["sma20_above_50"] = np.where(df["sma_20"] > df["sma_50"], 1.0, 0.0) df["sma50_above_200"] = np.where(df["sma_50"] > df["sma_200"], 1.0, 0.0) df["close_above_200"] = np.where(close > df["sma_200"], 1.0, 0.0) # ── Lagged close returns as additional features ──────────────────────── for lag in [1, 2, 3]: df[f"close_lag_{lag}"] = close.shift(lag) # ── Rolling volatility (std of returns) ──────────────────────────────── df["vol_10"] = close.pct_change().rolling(10).std() df["vol_20"] = close.pct_change().rolling(20).std() df["vol_ratio"] = np.where( df["vol_20"] != 0, df["vol_10"] / df["vol_20"], 1.0 ) # ── Hour-of-day (London/NY session proxy) ────────────────────────────── if hasattr(df.index, "hour"): df["hour_sin"] = np.sin(2 * np.pi * df.index.hour / 24.0) df["hour_cos"] = np.cos(2 * np.pi * df.index.hour / 24.0) else: df["hour_sin"] = 0.0 df["hour_cos"] = 1.0 # ── Fill NaN from warm-up periods ────────────────────────────────────── df = df.bfill().ffill() return df # SECTION 2 — STRATEGY CONFIG def strategy_config(): return { "title": "GBP/USD Gradient Boosting Trend + Mean-Reversion", "model_type": "GradientBoostingClassifier", "model_params": { "n_estimators": 400, "max_depth": 4, "learning_rate": 0.04, "subsample": 0.8, "min_samples_leaf": 20, "max_features": "sqrt", "validation_fraction": 0.1, "n_iter_no_change": 30, "tol": 1e-4, "random_state": 42, }, "signal_threshold": 0.55, "direction": "both", "stop_loss": 0.005, "take_profit": 0.010, "cooldown": 0, "max_positions": 1, "on_opposite": "reverse", "session_filter": [6, 20], "min_atr": 0.0002, "trend_filter": None, "target_horizon": 4, "objective": ( "Maximize risk-adjusted return (Sharpe/Calmar) on GBP/USD 15-min. " "GradientBoostingClassifier with 400 shallow trees (depth 4) and a " "conservative learning rate of 0.04 avoids overfitting while capturing " "non-linear interactions between trend (SMA alignment, slope), momentum " "(MACD histogram, RSI), and volatility (ATR, BB squeeze) features. " "SL=0.5% / TP=1.0% gives 1:2 R/R. Session filter 06-20 UTC covers " "London open through NY close where GBP/USD liquidity is highest. " "min_atr filter avoids flat/illiquid bars." ), "notes": ( "Features include multi-period SMA distances, Bollinger Band pct/width, " "RSI with overbought/oversold flags, MACD histogram slope and crossover, " "ATR-normalised volatility, candle body/wick ratios, rolling vol ratio, " "and hour-of-day cyclical encoding. target_horizon=4 (1-hour forward) " "balances signal frequency against predictability at 15-min resolution." ), }