2.2 Labeling: Triple barrier method
This post will describe how to label a financial dataset to support supervised learning
The post is directly based on content from the book "Advances in Financial Machine Learning" from Marcos Lopez de Prado
Physical meaning:
Algorithm description:
Python code:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
# Import data
df = pd.read_csv(r'C:\Users\josde\OneDrive\Denny\Deep-learning\Data-sets\Trade-data\ES_Trades.csv')
df = df.iloc[:, 0:5]
df['Dollar'] = df['Price'] * df['Volume']
print(df.columns)
# Generate thresholds
d = pd.DataFrame(pd.pivot_table(df, values='Dollar', aggfunc='sum', index='Date'))
DOLLAR_THRESHOLD = (1 / 50) * np.average(d['Dollar'])
# Generate bars
def bar_gen(df, DOLLAR_THRESHOLD):
collector, dollarbar_tmp = [], []
dollar_cusum = 0
for i, (price, dollar) in enumerate(zip(df['Price'], df['Dollar'])):
collector.append(price)
dollar_cusum = dollar_cusum + dollar
if dollar_cusum >= DOLLAR_THRESHOLD:
o, h, l, c = collector[0], np.max(collector), np.min(collector), collector[-1]
dollarbar_tmp.append((i, o, h, l, c))
o, h, l, c, dollar_cusum = 0, 0, 0, 0, 0
collector = []
cols = ['Index', 'Open', 'High', 'Low', 'Close']
dollarbar = pd.DataFrame(dollarbar_tmp, columns=cols)
return dollarbar
dollarbar = bar_gen(df, DOLLAR_THRESHOLD)
def tripple(dollarbar, df, pt, sl, t1):
start, tmp1, tmp2 = 0, 0, 0
events = []
for t, idx in enumerate(dollarbar['Index']):
if t + t1 < len(dollarbar['Index']):
end = dollarbar.iloc[t + t1, 0]
else:
end = idx
prices = df.iloc[start:end, 3]
outcome = ((prices / df.iloc[start, 3]) - 1.0)
stop_loss = outcome[outcome < sl].index.min() # earliest stop loss
profit_take = outcome[outcome > pt].index.min() # earliest profit taking
indices = [stop_loss, end, profit_take]
tmp1 = int(np.nanmin(indices))
tmp2 = np.argmin(indices) - 1
if math.isnan(tmp1):
final_outcome = 0.0
else:
final_outcome = (df.iloc[tmp1, 3] / df.iloc[start, 3]) - 1.0
events.append((stop_loss, end, profit_take, final_outcome, tmp2))
start = idx
cols = ['Stop Loss', 'Index', 'Profit Take', 'Return', 'Label']
events = pd.DataFrame(events, columns=cols)
return events
events = tripple(dollarbar, df, 0.000002, -0.000002, 45)
print(events)
Comments
Post a Comment