# 高频交易策略的思考(5)

Author: 小草, Created: 2023-08-09 18:13:16, Updated: 2023-09-18 19:51:59

### 所需数据

``````from datetime import date,datetime
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ast
%matplotlib inline
``````
``````tick_size = 0.0001
``````
``````trades = pd.read_csv('YGGUSDT_aggTrade.csv',names=['type','event_time', 'agg_trade_id','symbol', 'price', 'quantity', 'first_trade_id', 'last_trade_id',
``````
``````trades = trades.groupby(['transact_time','is_buyer_maker']).agg({
'transact_time':'last',
'price': 'first',
'quantity': 'sum',
})
``````
``````trades.index = pd.to_datetime(trades['transact_time'], unit='ms')
``````
``````depths = pd.read_csv('YGGUSDT_depth.csv',names=['type','event_time', 'transact_time','symbol', 'u1', 'u2', 'u3', 'bids','asks'])
``````
``````depths = depths.iloc[:100000]
``````
``````depths['bids'] = depths['bids'].apply(ast.literal_eval).copy()
``````
``````def expand_bid(bid_data):
expanded = {}
for j, (price, quantity) in enumerate(bid_data):
expanded[f'bid_{j}_price'] = float(price)
expanded[f'bid_{j}_quantity'] = float(quantity)
return pd.Series(expanded)
expanded = {}
for j, (price, quantity) in enumerate(ask_data):
return pd.Series(expanded)
# 应用到每一行，得到新的df
expanded_df_bid = depths['bids'].apply(expand_bid)
# 在原有df上进行扩展
depths = pd.concat([depths, expanded_df_bid, expanded_df_ask], axis=1)
``````
``````depths.index = pd.to_datetime(depths['transact_time'], unit='ms')
depths.index.rename('time', inplace=True);
``````
``````trades = trades[trades['transact_time'] < depths['transact_time'].iloc[-1]]
``````

``````bid_mean_list = []
for i in range(20):
bid_mean_list.append(round(depths[f'bid_{i}_quantity'].mean(),0))
plt.figure(figsize=(10, 5))
plt.plot(bid_mean_list);
plt.grid(True)
``````

``````df = pd.merge_asof(trades, depths, on='transact_time', direction='backward')
``````
``````df['spread'] = round(df['ask_0_price'] - df['bid_0_price'],4)
df['mid_price'] = (df['bid_0_price']+ df['ask_0_price']) / 2
``````
``````print('平均值     mid_price的误差：', ((df['price']-df['mid_price'])**2).sum())
print('挂单量加权 mid_price的误差：', ((df['price']-df['weight_mid_price'])**2).sum())
``````
``````平均值     mid_price的误差： 0.0048751924999999845

``````

### 考虑第二档深度

``````bins = np.linspace(-1, 1, 50)
df['change'] = (df['price'].pct_change().shift(-1))/tick_size
df['I_bins'] = pd.cut(df['I'], bins, labels=bins[1:])
df['I_2_bins'] = pd.cut(df['I_2'], bins, labels=bins[1:])
df['I_3_bins'] = pd.cut(df['I_3'], bins, labels=bins[1:])
df['I_4_bins'] = pd.cut(df['I_4'], bins, labels=bins[1:])
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(8, 5))

axes[0][0].plot(df.groupby('I_bins')['change'].mean())
axes[0][0].set_title('I')
axes[0][0].grid(True)

axes[0][1].plot(df.groupby('I_2_bins')['change'].mean())
axes[0][1].set_title('I 2')
axes[0][1].grid(True)

axes[1][0].plot(df.groupby('I_3_bins')['change'].mean())
axes[1][0].set_title('I 3')
axes[1][0].grid(True)

axes[1][1].plot(df.groupby('I_4_bins')['change'].mean())
axes[1][1].set_title('I 4')
axes[1][1].grid(True)
plt.tight_layout();
``````

``````df['adjust_mid_price_4'] = df['mid_price'] + df['spread']*(df['I']+0.3)*(df['I']**4+0.7)/3.8
``````
``````print('调整后的 mid_price_4的误差：', ((df['price']-df['adjust_mid_price_4'])**2).sum())
``````
``````调整后的 mid_price_4的误差： 0.0047909595497071375

``````

### 考虑成交数据

``````alpha=0.1
``````
``````df['avg_buy_interval'] = None
df['avg_sell_interval'] = None
``````
``````df['avg_buy_quantity'] = None
df['avg_sell_quantity'] = None
``````
``````df['avg_buy_quantity'] = df['avg_buy_quantity'].fillna(method='ffill')
df['avg_sell_quantity'] = df['avg_sell_quantity'].fillna(method='ffill')
df['avg_sell_interval'] = df['avg_sell_interval'].fillna(method='ffill')

df['avg_sell_rate'] =1000 / df['avg_sell_interval']

df['avg_sell_volume'] = df['avg_sell_rate']*df['avg_sell_quantity']
``````
``````df['I'] = (df['bid_0_quantity']- df['ask_0_quantity']) / (df['bid_0_quantity'] + df['ask_0_quantity'])
``````
``````bins = np.linspace(-1, 1, 50)
df['VI_bins'] = pd.cut(df['VI'], bins, labels=bins[1:])
plt.plot(df.groupby('VI_bins')['change'].mean());
plt.grid(True)
``````

``````df['adjust_mid_price'] = df['mid_price'] + df['spread']*df['I']/2
``````
``````print('调整后的mid_price   的误差：', ((df['price']-df['adjust_mid_price'])**2).sum())
``````
``````调整后的mid_price   的误差： 0.0048373440193987035

``````

### 综合的中间价

``````#注意VI需要延后一个使用
df['price_change'] = np.log(df['price']/df['price'].rolling(40).mean())
df['CI'] = -1.5*df['VI'].shift()+0.7*(0.7*df['I']+0.2*df['I_2']+0.1*df['I_3'])**3 + 150*df['price_change'].shift(1)
``````
``````df['adjust_mid_price_11'] = df['mid_price'] + df['spread']*(df['CI'])
``````
``````调整后的mid_price_11的误差： 0.00421125960463469
``````

### 总结

More

mztcoin 太牛了草神，期待下一次的更新

louis 谁看懂了

xukitty 曲高和寡吧，唯有佩服