电机故障诊断分类算法_分类算法准确率100%-白红宇的个人博客

电机故障诊断分类算法_分类算法准确率100%

发布日期：2021-06-29 19:49:12 浏览次数：3 分类：技术文章

本文共 11220 字，大约阅读时间需要 37 分钟。

电机故障诊断分类算法

数据集来源：

需要登录注册后到数据集页面下载

数据文件已上传到我的下载：

该平台收录了多种行业场景，包括加工制造、轨道交通、能源电力、半导体等行业，从不同层级收录了包括部件级、设备级、产线级的数据。

电机故障诊断数据介绍：

数据描述训练数据包含3个不同机械的运行数据，数据已经经过脱敏处理。数据中包含一个电机故障，发生在3号机器。

数据包含如下信息：

temp_drv：发电机轴承驱动端温度

temp_nondrv：发电机轴承非驱动端温度

wind_speed：环境风速

wind_dir：环境风向

env_temp：环境温度

power_W：电机有功功率

time：时间

wtid：机器编号

最终效果如图：

总体思路：

1、稍后更新

import warningsimport numpy as npfrom Motor_fault.data_utiles import train, get_testfrom Motor_fault.model_utils import build_model_etr, score_model, \    write_mae, build_model_rf, pr, scatter_line, draw_ROC_curve, save_loadwarnings.filterwarnings('ignore')import pandas as pdfrom sklearn.metrics import accuracy_scorex_train, y_train = train()x_val, y_val = get_test()# np.random.seed(2020)model_rf = build_model_rf(x_train, y_train)val_rf = model_rf.predict(x_val)model_etr = build_model_etr(x_train, y_train)# model_etr = save_load('', 'load')val_etr = model_etr.predict(x_val)accuracy = round(accuracy_score(y_val, val_etr), 6)print('etr测试集,准确率:', accuracy)write_mae('etr', '测试集', accuracy)score_model(y_val, val_etr, model_etr, '验证集')scatter_line(y_val, val_etr)#train_etr_pred = model_etr.predict(x_train)accuracy = round(accuracy_score(y_train, train_etr_pred), 6)print('etr训练集,准确率:', accuracy)write_mae('etr', '训练集', accuracy)train_rf_pred = model_rf.predict(x_train)accuracy = round(accuracy_score(y_train, train_rf_pred), 6)print('rf训练集,准确率:', accuracy)write_mae('rf', '训练集', accuracy)Strak_X_train = pd.DataFrame()Strak_X_train['Method_2'] = train_rf_predStrak_X_train['Method_4'] = train_etr_predStrak_X_val = pd.DataFrame()Strak_X_val['Method_1'] = val_rfStrak_X_val['Method_4'] = val_etr## # 第二层model_Stacking = build_model_etr(Strak_X_train, y_train)train_pre_Stacking = model_Stacking.predict(Strak_X_train)score_model(y_train, train_pre_Stacking, model_Stacking, '训练集')val_pre_Stacking = model_Stacking.predict(Strak_X_val)score_model(y_val, val_pre_Stacking, model_Stacking, '验证集')val_proba_Stacking = model_Stacking.predict_proba(Strak_X_val)

import osos.chdir(r'E:\项目文件\电机故障诊断\\')import pandas as pddef train():    data = pd.read_csv('Motor_fault_train.csv', usecols=range(0, 9))    return data.values[:, 0:6], data.values[:, -1]def get_test():    data = pd.read_csv('Motor_fault_test.csv', dtype=float, usecols=range(0, 9), engine='python')    return data.values[:, 0:6], data.values[:, -1]

模型工具方法：里面包含：sklearn包：决策树，随机森林，极端随机森林，多层感知机，lgb，xgb，GBDT，逻辑回归等等好的机器学习算法

model_utils.py

import joblibimport numpy as npfrom lightgbm import LGBMClassifierfrom sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, ExtraTreesClassifier, \    AdaBoostClassifierfrom sklearn.linear_model import LogisticRegressionfrom sklearn.metrics import precision_score, accuracy_score, roc_auc_score, precision_recall_curve, auc, roc_curve, \    f1_score, recall_score, cohen_kappa_scorefrom sklearn.model_selection import GridSearchCVfrom sklearn.svm import SVC, LinearSVCfrom sklearn.tree import DecisionTreeClassifierfrom xgboost import XGBClassifierimport matplotlib.pyplot as pltfrom utils.read_write import writeOneCsvsrc = r'E:\项目文件\电机故障诊断\data\\'def build_model_dt(x_train, y_train):    estimator = DecisionTreeClassifier(random_state=7)    param_grid = {
           'max_depth': range(10, 25, 1),    }    model = GridSearchCV(estimator, param_grid, cv=3)    model.fit(x_train, y_train)    print('dt')    print(model.best_params_)    writeParams('dt', model.best_params_)    return modeldef build_model_rf(x_train, y_train):    estimator = RandomForestClassifier()    param_grid = {
           'max_depth': range(42, 43, 1),        'n_estimators': range(79, 80, 1),    }    model = GridSearchCV(estimator, param_grid, cv=3)    model.fit(x_train, y_train)    print('rf')    print(model.best_params_)    writeParams('rf', model.best_params_)    return modeldef build_model_etr(x_train, y_train):    # 极端随机森林回归   n_estimators 即ExtraTreesRegressor最大的决策树个数    estimator = ExtraTreesClassifier()    param_grid = {
           'max_depth': range(33, 34, 1),        'n_estimators': range(108, 109, 1),    }    model = GridSearchCV(estimator, param_grid, cv=3)    model.fit(x_train, y_train)    print('etr')    print(model.best_params_)    writeParams('etr', model.best_params_)    return modeldef build_model_xgb(x_train, y_train):    estimator = XGBClassifier(gamma=0, colsample_bytree=0.9, subsample=0.91)    param_grid = {
           'learning_rate': [ 0.27],        'max_depth': range(12, 13, 1),        'n_estimators': range(34, 35, 3),    }    model = GridSearchCV(estimator, param_grid, cv=3)    model.fit(x_train, y_train)    print('xgb')    print(model.best_params_)    writeParams('xgb', model.best_params_)    return modeldef build_model_lgb(x_train, y_train):    estimator = LGBMClassifier()    param_grid = {
           'learning_rate': [0.18],        'n_estimators': range(100, 101, 1),        'num_leaves': range(75, 80, 5)    }    gbm = GridSearchCV(estimator, param_grid)    gbm.fit(x_train, y_train.ravel())    print('lgb')    print(gbm.best_params_)    writeParams('lgb', gbm.best_params_)    return gbmdef build_model_mlpr(x_train, y_train):    from sklearn.neural_network import MLPClassifier    '''激活函数用relu，梯度下降方法用lbfgs,效果是最好的'''    mlp = MLPClassifier(activation='relu', solver='lbfgs')    param_grid = {
           'alpha': [0.002, 0.001],        'hidden_layer_sizes': [(38, 19)],        'max_iter': range(75, 85, 1),    }    model = GridSearchCV(mlp, param_grid, cv=3)    model.fit(x_train, y_train.ravel())    print('mlpr')    print(model.best_params_)    writeParams('mlpr', model.best_params_)    return modeldef build_model_ada(x_train, y_train):    estimator = AdaBoostClassifier()    param_grid = {
           'learning_rate': [0.23],        'n_estimators': range(13, 14, 1),    }    model = GridSearchCV(estimator, param_grid, cv=3)    model.fit(x_train, y_train)    print('ada')    print(model.best_params_)    writeParams('ada', model.best_params_)    return modeldef build_model_gbdt(x_train, y_train):    estimator = GradientBoostingClassifier(min_samples_leaf=0.1, min_samples_split=10, subsample=0.998)    param_grid = {
           'learning_rate': [0.75],        'max_depth': range(25, 30, 1),        'n_estimators': range(80, 85, 1)    }    gbdt = GridSearchCV(estimator, param_grid, cv=3)    gbdt.fit(x_train, y_train.ravel())    print('gbdt')    print(gbdt.best_params_)    writeParams('gbdt', gbdt.best_params_)    return gbdtdef build_model_liner_svc(x_train, y_train):    svm_reg = LinearSVC(max_iter=-1)    param_grid = {
           'C': range(1, 2, 1),    }    model = GridSearchCV(svm_reg, param_grid, cv=3)    model.fit(x_train, y_train)    print('LinearSVC')    print(model.best_params_)    return modeldef train_logistic_classifier(x_train, y_train):    model = LogisticRegression()    param_grid = {
           'C': range(2, 3, 1),        'penalty': ['l2'],    }    model = GridSearchCV(model, param_grid, cv=3)    model.fit(x_train, y_train.ravel())    print('LR')    print(model.best_params_)    return modeldef build_model_svc(x_train, y_train):    model = SVC(max_iter=-1)    param_grid = {
           'C': range(1, 2, 2),        'kernel': ['poly', 'rbf', 'precomputed'],        'cache_size': range(200, 210, 20),    }    model = GridSearchCV(model, param_grid, cv=3)    model.fit(x_train, y_train.ravel())    print('SVC')    print(model.best_params_)    return modeldef score_model(test, predict, model, data_type):    accuracy = round(accuracy_score(test, predict), 6)    print(data_type + ',accuracy,', accuracy)    writeOneCsv(['staking', data_type, 'accuracy', accuracy], src + '调参记录.csv')    pre_score = precision_score(test, predict, average="macro")    print(data_type + ",precision,", round(pre_score, 6))    writeOneCsv(['staking', data_type, 'precision', round(pre_score, 6)], src + '调参记录.csv')    roc_auc = round(roc_auc_score(test, predict), 6)    print(data_type + ",roc_auc,", roc_auc)    writeOneCsv(['staking', data_type, 'roc_auc', roc_auc], src + '调参记录.csv')    f1 = f1_score(predict, test)    print(data_type + ",f1,", round(f1, 6))    writeOneCsv(['staking', data_type, 'f1', round(f1, 6)], src + '调参记录.csv')    recall = recall_score(predict, test)    print(data_type + ",recall,", round(recall, 6))    writeOneCsv(['staking', data_type, 'recall', round(recall, 6)], src + '调参记录.csv')    cohen_kappa = cohen_kappa_score(predict, test)    print(data_type + ",cohen_kappa,", round(cohen_kappa, 6))    writeOneCsv(['staking', data_type, 'cohen_kappa', round(cohen_kappa, 6)], src + '调参记录.csv')def save_load(model, save_or_load):    path = src + 'etr.pkl'    # save model    if save_or_load == 'save':        joblib.dump(model, path)    else:        # load model        model_etr = joblib.load(path)        return model_etrdef fit_size(x, y):    from sklearn import preprocessing    x_min = preprocessing.MinMaxScaler()    y_min = preprocessing.MinMaxScaler()    y = np.array(y).reshape(len(y), 1)    x = x_min.fit_transform(x)    y = y_min.fit_transform(y)    return x, ydef scatter_line(y_val, y_pre):    xx = range(0, len(y_val))    plt.scatter(xx, y_val, color="red", label="actual", linewidth=3)    plt.plot(xx, y_pre, color="orange", label="predicted", linewidth=2)    plt.legend()    plt.show()def draw_ROC_curve(y_test, y_predict):    false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_predict)    roc_auc = auc(false_positive_rate, true_positive_rate)    plt.title('ROC')    plt.plot(false_positive_rate, true_positive_rate, 'b', label='AUC = %0.2f' % roc_auc)    plt.legend(loc='lower right')    plt.plot([0, 1], [0, 1], 'r--')    plt.ylabel('TPR')    plt.xlabel('FPR')    plt.legend()    plt.show()    plt.close(0)def pr(y_val, predict_proba):    precision, recall, thresholds = precision_recall_curve(y_val, predict_proba)    plt.step(recall, precision, color='b', alpha=0.2, where='post')    plt.fill_between(recall, precision, step='post', alpha=0.2, color='b')    plt.xlabel('Recall')    plt.ylabel('Precision')    plt.ylim([0.0, 1.05])    plt.xlim([0.0, 1.0])    plt.title('2-class Precision-Recall curve')    plt.legend()    plt.show()def writeParams(model, best):    if model in ['gbdt', 'xgb']:        writeOneCsv([model, best['max_depth'], best['n_estimators'], best['learning_rate']], src + '调参记录.csv')    elif model == 'mlpr':        writeOneCsv([model, best['hidden_layer_sizes'], best['max_iter'], best['alpha']], src + '调参记录.csv')    elif model == 'ada':        writeOneCsv([model, 0, best['n_estimators'], best['learning_rate']], src + '调参记录.csv')    elif model == 'lgb':        writeOneCsv([model, best['num_leaves'], best['n_estimators'], best['learning_rate']], src + '调参记录.csv')    elif model == 'dt':        writeOneCsv([model, best['max_depth'], 0, 0], src + '调参记录.csv')    else:        writeOneCsv([model, best['max_depth'], best['n_estimators'], 0], src + '调参记录.csv')def write_mae(model, data_type, mae):    writeOneCsv([model, data_type, 'mae', mae], src + '调参记录.csv')

def writeOneCsv(relate_record, src):    try:        with open(src, 'a', newline='\n') as csvFile:            writer = csv.writer(csvFile)            writer.writerow(relate_record)        # csvFile.close()    except Exception as e:        print(e)        print(relate_record)

转载地址：https://data-mining.blog.csdn.net/article/details/109235182 如侵犯您的版权，请留言回复原文章的地址，我们会给您删除此文章，给您带来不便请您谅解！

上一篇：第四届工业大数据创新竞赛-水电站入库流量预测-初赛2月份25.4分

下一篇：2020年高教社杯全国大学生数学建模：中小微企业的信贷决策

发表评论

关于作者

喝酒易醉，品茶养心，人生如梦，品茶悟道，何以解忧？唯有杜康！

-- 愿君每日到此一游！

发表评论

最新留言

关于作者

推荐文章