资金流入流出预测-挑战Baseline-排名445 /122.17
发布日期:2021-06-29 19:49:21 浏览次数:2 分类:技术文章

本文共 5994 字,大约阅读时间需要 19 分钟。

介绍

蚂蚁金服拥有上亿会员并且业务场景中每天都涉及大量的资金流入和流出,面对如此庞大的用户群,资金管理压力会非常大。在既保证资金流动性风险最小,又满足日常业务运转的情况下,精准地预测资金的流入流出情况变得尤为重要。此届大赛以《资金流入流出预测》为题,期望参赛者能够通过对例如余额宝用户的申购赎回数据的把握,精准预测未来每日的资金流入流出情况。对货币基金而言,资金流入意味着申购行为,资金流出为赎回行为 。

具体请移步:

思路介绍:

计算统计特征
把时间作为时序特征
构建cnn模型

在这里插入图片描述

具体代码:

import matplotlib.pyplot as pltimport numpy as npimport pandas as pdfrom keras import optimizersfrom keras.callbacks import EarlyStoppingfrom keras.layers import Input, Conv1D, MaxPooling1D, Dense, Dropout, Flattenfrom keras.models import Modelfrom sklearn.metrics import mean_absolute_errorfrom sklearn.preprocessing import MinMaxScalerimport osos.chdir(r'E:\项目文件\资金流入流出预测')user_balance = pd.read_csv('user_balance_table.csv')# user_profile = pd.read_csv('user_profile_table.csv')df_tmp = user_balance.groupby(['report_date'])['total_purchase_amt', 'total_redeem_amt'].sum()df_tmp.index = pd.to_datetime(df_tmp.index, format='%Y%m%d')holidays = ('20130813', '20130902', '20131001', '20131111', '20130919', '20131225', '20140101', '20140130', '20140131',            '20140214', '20140405', '20140501', '20140602', '20140802', '20140901', '20140908')def create_features(timeindex):    n = len(timeindex)    features = np.zeros((n, 4))    features[:, 0] = timeindex.day.values / 31    features[:, 1] = timeindex.month.values / 12    features[:, 2] = timeindex.weekday.values / 6    for i in range(n):        if timeindex[i].strftime('%Y%m%d') in holidays:            features[i, 3] = 1    return featuresfeatures = create_features(df_tmp.index)september = pd.to_datetime(['201409%02d' % i for i in range(1, 31)])features_sep = create_features(september)scaler_pur = MinMaxScaler()scaler_red = MinMaxScaler()data_pur = scaler_pur.fit_transform(df_tmp.values[:, 0:1])data_red = scaler_red.fit_transform(df_tmp.values[:, 1:2])def create_dataset(data, back, forward=30):    n_samples = len(data) - back - forward + 1    X, Y = np.zeros((n_samples, back, data.shape[-1])), np.zeros((n_samples, forward, data.shape[-1]))    for i in range(n_samples):        X[i, ...] = data[i:i + back, :]        Y[i, ...] = data[i + back:i + back + forward, :]    return X, Ydef build_cnn(X_trn, lr, n_outputs, dropout_rate):    inputs = Input(X_trn.shape[1:])    z = Conv1D(64, 14, padding='valid', activation='relu', kernel_initializer='he_uniform')(inputs)    #     z = MaxPooling1D(2)(z)    z = Conv1D(128, 7, padding='valid', activation='relu', kernel_initializer='he_uniform')(z)    z = MaxPooling1D(2)(z)    z = Conv1D(256, 3, padding='valid', activation='relu', kernel_initializer='he_uniform')(z)    z = Conv1D(256, 3, padding='valid', activation='relu', kernel_initializer='he_uniform')(z)    z = MaxPooling1D(2)(z)    z = Flatten()(z)    z = Dropout(dropout_rate)(z)    z = Dense(128, activation='relu', kernel_initializer='he_uniform')(z)    z = Dropout(dropout_rate)(z)    z = Dense(84, activation='relu', kernel_initializer='he_uniform')(z)    outputs = Dense(n_outputs)(z)    model = Model(inputs=inputs, outputs=outputs)    adam = optimizers.Adam(lr=lr)    model.compile(loss='mse', optimizer=adam, metrics=['mae'])    model.summary()    return modelback = 60forward = 30X_pur_data, Y_pur_data = create_dataset(data_pur, back, forward)X_red_data, Y_red_data = create_dataset(data_red, back, forward)X_features, Y_features = create_dataset(features, back, forward)Y_features = np.concatenate((Y_features, np.zeros((Y_features.shape[0], back-forward, Y_features.shape[-1]))), axis=1)# X_pur, X_red = np.concatenate((X_pur_data, X_features, Y_features), axis=-1), np.concatenate((X_red_data, X_features, Y_features), axis=-1)# X_pur_trn, X_pur_val, X_red_trn, X_red_val = X_pur[:-forward, ...], X_pur[-1:, ...], X_red[:-forward, ...], X_red[-1:, ...]# Y_pur_trn, Y_pur_val, Y_red_trn, Y_red_val = Y_pur_data[:-forward, ...], Y_pur_data[-1:, ...], Y_red_data[:-forward, ...], Y_red_data[-1:, ...]Y_fea_sep = np.concatenate((features_sep, np.zeros((back-forward, features_sep.shape[-1]))), axis=0)# X_pur_tst = np.concatenate((data_pur[-back:, :], features[-back:, :], Y_fea_sep), axis=-1)[None, ...]# X_red_tst = np.concatenate((data_red[-back:, :], features[-back:, :], Y_fea_sep), axis=-1)[None, ...]X = np.concatenate((X_pur_data, X_red_data, X_features, Y_features), axis=-1)Y = np.concatenate((Y_pur_data, Y_red_data), axis=1)X_trn, X_val, Y_trn, Y_val = X[:-forward, ...], X[-1:, ...], Y[:-forward, ...], Y[-1:, ...]X_tst = np.concatenate((data_pur[-back:, :], data_red[-back:, :], features[-back:, :], Y_fea_sep), axis=-1)[None, ...]cnn = build_cnn(X_trn, lr=0.0008, n_outputs=2 * forward, dropout_rate=0.5)history = cnn.fit(X_trn, Y_trn, batch_size=32, epochs=1000, verbose=2,                  validation_data=(X_val, Y_val),                  callbacks=[EarlyStopping(monitor='val_mae', patience=200, restore_best_weights=True)])plt.figure(figsize=(8, 5))plt.plot(history.history['mae'], label='train mae')plt.plot(history.history['val_mae'], label='validation mae')plt.ylim([0, 0.2])plt.legend()plt.show()def plot_prediction(y_pred, y_true):    plt.figure(figsize=(16, 4))    plt.plot(np.squeeze(y_pred), label='prediction')    plt.plot(np.squeeze(y_true), label='true')    plt.legend()    plt.show()    print('MAE: %.3f' % mean_absolute_error(np.squeeze(y_pred), np.squeeze(y_true)))pred = cnn.predict(X_val)plot_prediction(pred, Y_val)history = cnn.fit(X, Y, batch_size=32, epochs=500, verbose=2,                  callbacks=[EarlyStopping(monitor='mae', patience=30, restore_best_weights=True)])plt.figure(figsize=(8, 5))plt.plot(history.history['mae'], label='train mae')plt.legend()plt.show()print(cnn.evaluate(X, Y, verbose=2))pred_tst = cnn.predict(X_tst)pur_sep = scaler_pur.inverse_transform(pred_tst[:, :forward].transpose())red_sep = scaler_red.inverse_transform(pred_tst[:, forward:].transpose())test_user = pd.DataFrame({
'report_date': [20140900 + i for i in range(1, 31)]})test_user['pur'] = pur_sep.astype('int')test_user['red'] = red_sep.astype('int')test_user.to_csv('submission.csv', encoding='utf-8', index=False, header=False)

转载地址:https://data-mining.blog.csdn.net/article/details/109557936 如侵犯您的版权,请留言回复原文章的地址,我们会给您删除此文章,给您带来不便请您谅解!

上一篇:第四届工业大数据创新竞赛-注塑成型工艺的虚拟量测和调机优化-决赛排名22
下一篇:天池新人赛-新浪微博互动预测-挑战Baseline-排名329-0.29%

发表评论

最新留言

感谢大佬
[***.8.128.20]2024年04月06日 01时31分45秒