资金流入流出预测-挑战Baseline-排名445 /122.17
发布日期:2021-06-29 19:49:21
浏览次数:2
分类:技术文章
本文共 5994 字,大约阅读时间需要 19 分钟。
介绍
蚂蚁金服拥有上亿会员并且业务场景中每天都涉及大量的资金流入和流出,面对如此庞大的用户群,资金管理压力会非常大。在既保证资金流动性风险最小,又满足日常业务运转的情况下,精准地预测资金的流入流出情况变得尤为重要。此届大赛以《资金流入流出预测》为题,期望参赛者能够通过对例如余额宝用户的申购赎回数据的把握,精准预测未来每日的资金流入流出情况。对货币基金而言,资金流入意味着申购行为,资金流出为赎回行为 。具体请移步:
思路介绍:
计算统计特征 把时间作为时序特征 构建cnn模型具体代码:
import matplotlib.pyplot as pltimport numpy as npimport pandas as pdfrom keras import optimizersfrom keras.callbacks import EarlyStoppingfrom keras.layers import Input, Conv1D, MaxPooling1D, Dense, Dropout, Flattenfrom keras.models import Modelfrom sklearn.metrics import mean_absolute_errorfrom sklearn.preprocessing import MinMaxScalerimport osos.chdir(r'E:\项目文件\资金流入流出预测')user_balance = pd.read_csv('user_balance_table.csv')# user_profile = pd.read_csv('user_profile_table.csv')df_tmp = user_balance.groupby(['report_date'])['total_purchase_amt', 'total_redeem_amt'].sum()df_tmp.index = pd.to_datetime(df_tmp.index, format='%Y%m%d')holidays = ('20130813', '20130902', '20131001', '20131111', '20130919', '20131225', '20140101', '20140130', '20140131', '20140214', '20140405', '20140501', '20140602', '20140802', '20140901', '20140908')def create_features(timeindex): n = len(timeindex) features = np.zeros((n, 4)) features[:, 0] = timeindex.day.values / 31 features[:, 1] = timeindex.month.values / 12 features[:, 2] = timeindex.weekday.values / 6 for i in range(n): if timeindex[i].strftime('%Y%m%d') in holidays: features[i, 3] = 1 return featuresfeatures = create_features(df_tmp.index)september = pd.to_datetime(['201409%02d' % i for i in range(1, 31)])features_sep = create_features(september)scaler_pur = MinMaxScaler()scaler_red = MinMaxScaler()data_pur = scaler_pur.fit_transform(df_tmp.values[:, 0:1])data_red = scaler_red.fit_transform(df_tmp.values[:, 1:2])def create_dataset(data, back, forward=30): n_samples = len(data) - back - forward + 1 X, Y = np.zeros((n_samples, back, data.shape[-1])), np.zeros((n_samples, forward, data.shape[-1])) for i in range(n_samples): X[i, ...] = data[i:i + back, :] Y[i, ...] = data[i + back:i + back + forward, :] return X, Ydef build_cnn(X_trn, lr, n_outputs, dropout_rate): inputs = Input(X_trn.shape[1:]) z = Conv1D(64, 14, padding='valid', activation='relu', kernel_initializer='he_uniform')(inputs) # z = MaxPooling1D(2)(z) z = Conv1D(128, 7, padding='valid', activation='relu', kernel_initializer='he_uniform')(z) z = MaxPooling1D(2)(z) z = Conv1D(256, 3, padding='valid', activation='relu', kernel_initializer='he_uniform')(z) z = Conv1D(256, 3, padding='valid', activation='relu', kernel_initializer='he_uniform')(z) z = MaxPooling1D(2)(z) z = Flatten()(z) z = Dropout(dropout_rate)(z) z = Dense(128, activation='relu', kernel_initializer='he_uniform')(z) z = Dropout(dropout_rate)(z) z = Dense(84, activation='relu', kernel_initializer='he_uniform')(z) outputs = Dense(n_outputs)(z) model = Model(inputs=inputs, outputs=outputs) adam = optimizers.Adam(lr=lr) model.compile(loss='mse', optimizer=adam, metrics=['mae']) model.summary() return modelback = 60forward = 30X_pur_data, Y_pur_data = create_dataset(data_pur, back, forward)X_red_data, Y_red_data = create_dataset(data_red, back, forward)X_features, Y_features = create_dataset(features, back, forward)Y_features = np.concatenate((Y_features, np.zeros((Y_features.shape[0], back-forward, Y_features.shape[-1]))), axis=1)# X_pur, X_red = np.concatenate((X_pur_data, X_features, Y_features), axis=-1), np.concatenate((X_red_data, X_features, Y_features), axis=-1)# X_pur_trn, X_pur_val, X_red_trn, X_red_val = X_pur[:-forward, ...], X_pur[-1:, ...], X_red[:-forward, ...], X_red[-1:, ...]# Y_pur_trn, Y_pur_val, Y_red_trn, Y_red_val = Y_pur_data[:-forward, ...], Y_pur_data[-1:, ...], Y_red_data[:-forward, ...], Y_red_data[-1:, ...]Y_fea_sep = np.concatenate((features_sep, np.zeros((back-forward, features_sep.shape[-1]))), axis=0)# X_pur_tst = np.concatenate((data_pur[-back:, :], features[-back:, :], Y_fea_sep), axis=-1)[None, ...]# X_red_tst = np.concatenate((data_red[-back:, :], features[-back:, :], Y_fea_sep), axis=-1)[None, ...]X = np.concatenate((X_pur_data, X_red_data, X_features, Y_features), axis=-1)Y = np.concatenate((Y_pur_data, Y_red_data), axis=1)X_trn, X_val, Y_trn, Y_val = X[:-forward, ...], X[-1:, ...], Y[:-forward, ...], Y[-1:, ...]X_tst = np.concatenate((data_pur[-back:, :], data_red[-back:, :], features[-back:, :], Y_fea_sep), axis=-1)[None, ...]cnn = build_cnn(X_trn, lr=0.0008, n_outputs=2 * forward, dropout_rate=0.5)history = cnn.fit(X_trn, Y_trn, batch_size=32, epochs=1000, verbose=2, validation_data=(X_val, Y_val), callbacks=[EarlyStopping(monitor='val_mae', patience=200, restore_best_weights=True)])plt.figure(figsize=(8, 5))plt.plot(history.history['mae'], label='train mae')plt.plot(history.history['val_mae'], label='validation mae')plt.ylim([0, 0.2])plt.legend()plt.show()def plot_prediction(y_pred, y_true): plt.figure(figsize=(16, 4)) plt.plot(np.squeeze(y_pred), label='prediction') plt.plot(np.squeeze(y_true), label='true') plt.legend() plt.show() print('MAE: %.3f' % mean_absolute_error(np.squeeze(y_pred), np.squeeze(y_true)))pred = cnn.predict(X_val)plot_prediction(pred, Y_val)history = cnn.fit(X, Y, batch_size=32, epochs=500, verbose=2, callbacks=[EarlyStopping(monitor='mae', patience=30, restore_best_weights=True)])plt.figure(figsize=(8, 5))plt.plot(history.history['mae'], label='train mae')plt.legend()plt.show()print(cnn.evaluate(X, Y, verbose=2))pred_tst = cnn.predict(X_tst)pur_sep = scaler_pur.inverse_transform(pred_tst[:, :forward].transpose())red_sep = scaler_red.inverse_transform(pred_tst[:, forward:].transpose())test_user = pd.DataFrame({ 'report_date': [20140900 + i for i in range(1, 31)]})test_user['pur'] = pur_sep.astype('int')test_user['red'] = red_sep.astype('int')test_user.to_csv('submission.csv', encoding='utf-8', index=False, header=False)
转载地址:https://data-mining.blog.csdn.net/article/details/109557936 如侵犯您的版权,请留言回复原文章的地址,我们会给您删除此文章,给您带来不便请您谅解!
发表评论
最新留言
感谢大佬
[***.8.128.20]2024年04月06日 01时31分45秒
关于作者
喝酒易醉,品茶养心,人生如梦,品茶悟道,何以解忧?唯有杜康!
-- 愿君每日到此一游!
推荐文章
Gstreamer学习笔记(8):Gobject类对象
2019-04-30
melis cedar模块的链接脚本
2019-04-30
RTThread IO设备和驱动学习
2019-04-30
mmap函数实现
2019-04-30
音频基础知识详解
2019-04-30
Linux 命令xxd功能
2019-04-30
repo使用指南
2019-04-30
gstreamer-test
2019-04-30
GDB基本用法
2019-04-30
动态范围控制(DRC)简介
2019-04-30
使用C语言查看一个文件夹中所有文件及目录
2019-04-30
音频硬件基础
2019-04-30
TS流分析
2019-04-30
详解YUV420数据格式
2019-04-30
Gstreamer学习笔记(2):GstElement定义、连接
2019-04-30
GStreamer建议的学习步骤和网页链接汇总
2019-04-30