金融数据处理常用代码

pandas code 合集

生成ohlc数据

def ohlc_resample(data,freq,index_name = 'datetime'):
    """
    D:day
    W:week
    M : month
    H:hour
    T: minutes
    S:seconds
    """
    resampled = pd.DataFrame()
    resampled['open'] = data.set_index(index_name).resample(freq).open.first()
    resampled['high'] = data.set_index(index_name).resample(freq).high.max()
    resampled['low'] = data.set_index(index_name).resample(freq).low.min()
    resampled['close'] = data.set_index(index_name).resample(freq).close.last()
    return resampled

交易胜率

loss = 0
count =0 
for i in back_summary.daily_pnl:
    if i==0 and count ==0:
        pass
    elif i==0 and count!=0:
        if count < 0:
            loss+=1
        count =0
    else:
        count += i
        
trade_times = int(((back_summary.pos - back_summary.pos.shift(1)).abs() >0).sum()/2)
trade_times
win_ratio = 1- loss/trade_times
win_ratio

分组统计行业排名

df2['rankinfo'] = df2.groupby(['ind_lv2','date'])['turnover'].rank(pct=True,na_option= 'bottom')

分天计算因子

from talib import MA

def ta_sma(df):
    SMA =  MA(df.close.values,3) -  MA(df.close.values,5)
    return pd.DataFrame(SMA,index = df.index,columns=['sma'])
def sma(df):
    SMA =  df.close.rolling(3).mean() -  df.close.rolling(5).mean()
    return pd.DataFrame(SMA.values,index = df.index,columns=['sma'])
    
    
temp= data_day.groupby('day').apply(ta_sma)
temp1= data_day.groupby('day').apply(sma)