import json import random import math from datetime import datetime, timedelta def generate_ohlcv_from_pattern(symbol, date_str, pattern_segments): """ Generates minute-by-minute OHLCV data based on a list of trend segments. Args: symbol: Ticker symbol date_str: Date (YYYY-MM-DD) pattern_segments: List of dicts with: - duration_mins: int - start_price: float - end_price: float - volatility: float (standard deviation) - volume_mult: float (multiplier of average volume) """ base_date = datetime.strptime(date_str, "%Y-%m-%d") current_time = base_date.replace(hour=9, minute=30) data = [] for segment in pattern_segments: duration = segment['duration_mins'] start_p = segment['start_price'] end_p = segment['end_price'] volatility = segment['volatility'] vol_mult = segment.get('volume_mult', 1.0) price_step = (end_p - start_p) / duration for i in range(duration): trend_price = start_p + (price_step * i) # Random walk component noise = random.gauss(0, volatility) # OHLC generation open_p = trend_price + noise high_p = open_p + abs(random.gauss(0, volatility/2)) low_p = open_p - abs(random.gauss(0, volatility/2)) close_p = (open_p + high_p + low_p) / 3 + random.gauss(0, volatility/4) # Ensure logical constraints high_p = max(open_p, close_p, high_p) low_p = min(open_p, close_p, low_p) # Volume profile (U-shaped usually, but modulated by segment) time_factor = 1.0 if current_time.hour < 10 or current_time.hour >= 15: time_factor = 1.5 base_vol = 50000 * time_factor * vol_mult volume = int(max(100, random.gauss(base_vol, base_vol * 0.3))) data.append({ "code": symbol, "time": current_time.strftime("%Y-%m-%d %H:%M:%S"), "open": round(open_p, 2), "high": round(high_p, 2), "low": round(low_p, 2), "close": round(close_p, 2), "volume": volume }) current_time += timedelta(minutes=1) return data if __name__ == "__main__": # Example: AAPL "Morning Rally then Chop" # Starting at 150.00 segments = [ {"duration_mins": 30, "start_price": 150.0, "end_price": 152.5, "volatility": 0.15, "volume_mult": 1.5}, # Rally {"duration_mins": 60, "start_price": 152.5, "end_price": 151.8, "volatility": 0.10, "volume_mult": 0.8}, # Pullback {"duration_mins": 120, "start_price": 151.8, "end_price": 152.2, "volatility": 0.08, "volume_mult": 0.5}, # Chop {"duration_mins": 180, "start_price": 152.2, "end_price": 153.5, "volatility": 0.12, "volume_mult": 1.2}, # Afternoon push ] data = generate_ohlcv_from_pattern("AAPL", "2026-02-06", segments) with open("data/mock_scenarios/AAPL_rally.json", "w") as f: json.dump(data, f, indent=2) print(f"Generated {len(data)} points for AAPL")