电机故障诊断分类算法_分类算法准确率100%
发布日期:2021-06-29 19:49:12 浏览次数:3 分类:技术文章

本文共 11220 字,大约阅读时间需要 37 分钟。

电机故障诊断分类算法

数据集来源:
需要登录注册后到数据集页面下载

数据文件已上传到我的下载:

该平台收录了多种行业场景,包括加工制造、轨道交通、能源电力、半导体等行业,从不同层级收录了包括部件级、设备级、产线级的数据。

电机故障诊断数据介绍:

数据描述训练数据包含3个不同机械的运行数据,数据已经经过脱敏处理。数据中包含一个电机故障,发生在3号机器。

数据包含如下信息:
temp_drv:发电机轴承驱动端温度
temp_nondrv:发电机轴承非驱动端温度
wind_speed:环境风速
wind_dir:环境风向
env_temp:环境温度
power_W:电机有功功率
time:时间
wtid:机器编号

最终效果如图:

在这里插入图片描述
在这里插入图片描述

总体思路:

1、稍后更新

import warningsimport numpy as npfrom Motor_fault.data_utiles import train, get_testfrom Motor_fault.model_utils import build_model_etr, score_model, \    write_mae, build_model_rf, pr, scatter_line, draw_ROC_curve, save_loadwarnings.filterwarnings('ignore')import pandas as pdfrom sklearn.metrics import accuracy_scorex_train, y_train = train()x_val, y_val = get_test()# np.random.seed(2020)model_rf = build_model_rf(x_train, y_train)val_rf = model_rf.predict(x_val)model_etr = build_model_etr(x_train, y_train)# model_etr = save_load('', 'load')val_etr = model_etr.predict(x_val)accuracy = round(accuracy_score(y_val, val_etr), 6)print('etr测试集,准确率:', accuracy)write_mae('etr', '测试集', accuracy)score_model(y_val, val_etr, model_etr, '验证集')scatter_line(y_val, val_etr)#train_etr_pred = model_etr.predict(x_train)accuracy = round(accuracy_score(y_train, train_etr_pred), 6)print('etr训练集,准确率:', accuracy)write_mae('etr', '训练集', accuracy)train_rf_pred = model_rf.predict(x_train)accuracy = round(accuracy_score(y_train, train_rf_pred), 6)print('rf训练集,准确率:', accuracy)write_mae('rf', '训练集', accuracy)Strak_X_train = pd.DataFrame()Strak_X_train['Method_2'] = train_rf_predStrak_X_train['Method_4'] = train_etr_predStrak_X_val = pd.DataFrame()Strak_X_val['Method_1'] = val_rfStrak_X_val['Method_4'] = val_etr## # 第二层model_Stacking = build_model_etr(Strak_X_train, y_train)train_pre_Stacking = model_Stacking.predict(Strak_X_train)score_model(y_train, train_pre_Stacking, model_Stacking, '训练集')val_pre_Stacking = model_Stacking.predict(Strak_X_val)score_model(y_val, val_pre_Stacking, model_Stacking, '验证集')val_proba_Stacking = model_Stacking.predict_proba(Strak_X_val)
import osos.chdir(r'E:\项目文件\电机故障诊断\\')import pandas as pddef train():    data = pd.read_csv('Motor_fault_train.csv', usecols=range(0, 9))    return data.values[:, 0:6], data.values[:, -1]def get_test():    data = pd.read_csv('Motor_fault_test.csv', dtype=float, usecols=range(0, 9), engine='python')    return data.values[:, 0:6], data.values[:, -1]

模型工具方法:里面包含:sklearn包:决策树,随机森林,极端随机森林,多层感知机,lgb,xgb,GBDT,逻辑回归等等好的机器学习算法

model_utils.py

import joblibimport numpy as npfrom lightgbm import LGBMClassifierfrom sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, ExtraTreesClassifier, \    AdaBoostClassifierfrom sklearn.linear_model import LogisticRegressionfrom sklearn.metrics import precision_score, accuracy_score, roc_auc_score, precision_recall_curve, auc, roc_curve, \    f1_score, recall_score, cohen_kappa_scorefrom sklearn.model_selection import GridSearchCVfrom sklearn.svm import SVC, LinearSVCfrom sklearn.tree import DecisionTreeClassifierfrom xgboost import XGBClassifierimport matplotlib.pyplot as pltfrom utils.read_write import writeOneCsvsrc = r'E:\项目文件\电机故障诊断\data\\'def build_model_dt(x_train, y_train):    estimator = DecisionTreeClassifier(random_state=7)    param_grid = {
'max_depth': range(10, 25, 1), } model = GridSearchCV(estimator, param_grid, cv=3) model.fit(x_train, y_train) print('dt') print(model.best_params_) writeParams('dt', model.best_params_) return modeldef build_model_rf(x_train, y_train): estimator = RandomForestClassifier() param_grid = {
'max_depth': range(42, 43, 1), 'n_estimators': range(79, 80, 1), } model = GridSearchCV(estimator, param_grid, cv=3) model.fit(x_train, y_train) print('rf') print(model.best_params_) writeParams('rf', model.best_params_) return modeldef build_model_etr(x_train, y_train): # 极端随机森林回归 n_estimators 即ExtraTreesRegressor最大的决策树个数 estimator = ExtraTreesClassifier() param_grid = {
'max_depth': range(33, 34, 1), 'n_estimators': range(108, 109, 1), } model = GridSearchCV(estimator, param_grid, cv=3) model.fit(x_train, y_train) print('etr') print(model.best_params_) writeParams('etr', model.best_params_) return modeldef build_model_xgb(x_train, y_train): estimator = XGBClassifier(gamma=0, colsample_bytree=0.9, subsample=0.91) param_grid = {
'learning_rate': [ 0.27], 'max_depth': range(12, 13, 1), 'n_estimators': range(34, 35, 3), } model = GridSearchCV(estimator, param_grid, cv=3) model.fit(x_train, y_train) print('xgb') print(model.best_params_) writeParams('xgb', model.best_params_) return modeldef build_model_lgb(x_train, y_train): estimator = LGBMClassifier() param_grid = {
'learning_rate': [0.18], 'n_estimators': range(100, 101, 1), 'num_leaves': range(75, 80, 5) } gbm = GridSearchCV(estimator, param_grid) gbm.fit(x_train, y_train.ravel()) print('lgb') print(gbm.best_params_) writeParams('lgb', gbm.best_params_) return gbmdef build_model_mlpr(x_train, y_train): from sklearn.neural_network import MLPClassifier '''激活函数用relu,梯度下降方法用lbfgs,效果是最好的''' mlp = MLPClassifier(activation='relu', solver='lbfgs') param_grid = {
'alpha': [0.002, 0.001], 'hidden_layer_sizes': [(38, 19)], 'max_iter': range(75, 85, 1), } model = GridSearchCV(mlp, param_grid, cv=3) model.fit(x_train, y_train.ravel()) print('mlpr') print(model.best_params_) writeParams('mlpr', model.best_params_) return modeldef build_model_ada(x_train, y_train): estimator = AdaBoostClassifier() param_grid = {
'learning_rate': [0.23], 'n_estimators': range(13, 14, 1), } model = GridSearchCV(estimator, param_grid, cv=3) model.fit(x_train, y_train) print('ada') print(model.best_params_) writeParams('ada', model.best_params_) return modeldef build_model_gbdt(x_train, y_train): estimator = GradientBoostingClassifier(min_samples_leaf=0.1, min_samples_split=10, subsample=0.998) param_grid = {
'learning_rate': [0.75], 'max_depth': range(25, 30, 1), 'n_estimators': range(80, 85, 1) } gbdt = GridSearchCV(estimator, param_grid, cv=3) gbdt.fit(x_train, y_train.ravel()) print('gbdt') print(gbdt.best_params_) writeParams('gbdt', gbdt.best_params_) return gbdtdef build_model_liner_svc(x_train, y_train): svm_reg = LinearSVC(max_iter=-1) param_grid = {
'C': range(1, 2, 1), } model = GridSearchCV(svm_reg, param_grid, cv=3) model.fit(x_train, y_train) print('LinearSVC') print(model.best_params_) return modeldef train_logistic_classifier(x_train, y_train): model = LogisticRegression() param_grid = {
'C': range(2, 3, 1), 'penalty': ['l2'], } model = GridSearchCV(model, param_grid, cv=3) model.fit(x_train, y_train.ravel()) print('LR') print(model.best_params_) return modeldef build_model_svc(x_train, y_train): model = SVC(max_iter=-1) param_grid = {
'C': range(1, 2, 2), 'kernel': ['poly', 'rbf', 'precomputed'], 'cache_size': range(200, 210, 20), } model = GridSearchCV(model, param_grid, cv=3) model.fit(x_train, y_train.ravel()) print('SVC') print(model.best_params_) return modeldef score_model(test, predict, model, data_type): accuracy = round(accuracy_score(test, predict), 6) print(data_type + ',accuracy,', accuracy) writeOneCsv(['staking', data_type, 'accuracy', accuracy], src + '调参记录.csv') pre_score = precision_score(test, predict, average="macro") print(data_type + ",precision,", round(pre_score, 6)) writeOneCsv(['staking', data_type, 'precision', round(pre_score, 6)], src + '调参记录.csv') roc_auc = round(roc_auc_score(test, predict), 6) print(data_type + ",roc_auc,", roc_auc) writeOneCsv(['staking', data_type, 'roc_auc', roc_auc], src + '调参记录.csv') f1 = f1_score(predict, test) print(data_type + ",f1,", round(f1, 6)) writeOneCsv(['staking', data_type, 'f1', round(f1, 6)], src + '调参记录.csv') recall = recall_score(predict, test) print(data_type + ",recall,", round(recall, 6)) writeOneCsv(['staking', data_type, 'recall', round(recall, 6)], src + '调参记录.csv') cohen_kappa = cohen_kappa_score(predict, test) print(data_type + ",cohen_kappa,", round(cohen_kappa, 6)) writeOneCsv(['staking', data_type, 'cohen_kappa', round(cohen_kappa, 6)], src + '调参记录.csv')def save_load(model, save_or_load): path = src + 'etr.pkl' # save model if save_or_load == 'save': joblib.dump(model, path) else: # load model model_etr = joblib.load(path) return model_etrdef fit_size(x, y): from sklearn import preprocessing x_min = preprocessing.MinMaxScaler() y_min = preprocessing.MinMaxScaler() y = np.array(y).reshape(len(y), 1) x = x_min.fit_transform(x) y = y_min.fit_transform(y) return x, ydef scatter_line(y_val, y_pre): xx = range(0, len(y_val)) plt.scatter(xx, y_val, color="red", label="actual", linewidth=3) plt.plot(xx, y_pre, color="orange", label="predicted", linewidth=2) plt.legend() plt.show()def draw_ROC_curve(y_test, y_predict): false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_predict) roc_auc = auc(false_positive_rate, true_positive_rate) plt.title('ROC') plt.plot(false_positive_rate, true_positive_rate, 'b', label='AUC = %0.2f' % roc_auc) plt.legend(loc='lower right') plt.plot([0, 1], [0, 1], 'r--') plt.ylabel('TPR') plt.xlabel('FPR') plt.legend() plt.show() plt.close(0)def pr(y_val, predict_proba): precision, recall, thresholds = precision_recall_curve(y_val, predict_proba) plt.step(recall, precision, color='b', alpha=0.2, where='post') plt.fill_between(recall, precision, step='post', alpha=0.2, color='b') plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('2-class Precision-Recall curve') plt.legend() plt.show()def writeParams(model, best): if model in ['gbdt', 'xgb']: writeOneCsv([model, best['max_depth'], best['n_estimators'], best['learning_rate']], src + '调参记录.csv') elif model == 'mlpr': writeOneCsv([model, best['hidden_layer_sizes'], best['max_iter'], best['alpha']], src + '调参记录.csv') elif model == 'ada': writeOneCsv([model, 0, best['n_estimators'], best['learning_rate']], src + '调参记录.csv') elif model == 'lgb': writeOneCsv([model, best['num_leaves'], best['n_estimators'], best['learning_rate']], src + '调参记录.csv') elif model == 'dt': writeOneCsv([model, best['max_depth'], 0, 0], src + '调参记录.csv') else: writeOneCsv([model, best['max_depth'], best['n_estimators'], 0], src + '调参记录.csv')def write_mae(model, data_type, mae): writeOneCsv([model, data_type, 'mae', mae], src + '调参记录.csv')
def writeOneCsv(relate_record, src):    try:        with open(src, 'a', newline='\n') as csvFile:            writer = csv.writer(csvFile)            writer.writerow(relate_record)        # csvFile.close()    except Exception as e:        print(e)        print(relate_record)

转载地址:https://data-mining.blog.csdn.net/article/details/109235182 如侵犯您的版权,请留言回复原文章的地址,我们会给您删除此文章,给您带来不便请您谅解!

上一篇:第四届工业大数据创新竞赛-水电站入库流量预测-初赛2月份25.4分
下一篇:2020年高教社杯全国大学生数学建模:中小微企业的信贷决策

发表评论

最新留言

能坚持,总会有不一样的收获!
[***.219.124.196]2024年04月20日 07时18分04秒