COMBAI computational biology and artificial intelligence
binary classifier
mymodel = Sequential([ Dense(30, input_dim=len(train_x.columns), activation='relu'), Dropout(0.1), Dense(60, activation='relu'), Dropout(0.1), Dense(1, activation='sigmoid') ]) mymodel.compile(optimizer='adam',loss='binary_crossentropy', metrics=['accuracy']) return mymodel
Typical run
nnt = myModel(train_x) history=nnt.fit(x=train_x,y=train_y, validation_data=(val_x, val_y),batch_size=20,epochs=30,shuffle=True,verbose=0)
An example run for three math model comparison
import os,pickle import tensorflow as tf from tensorflow import keras from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Activation, Dense,Dropout from tensorflow.keras import layers from tensorflow.keras.optimizers import Adam from tensorflow.keras.metrics import categorical_crossentropy,binary_crossentropy from sklearn.utils import shuffle import pandas as pd import numpy as np from scipy import interp import matplotlib.pyplot as plt from itertools import cycle from sklearn.metrics import roc_curve, auc from sklearn.model_selection import train_test_split from sklearn import preprocessing from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import LogisticRegression from sklearn import tree ###NN model def myModel(train_x): mymodel = Sequential([ Dense(30, input_dim=len(train_x.columns), activation='relu'), Dropout(0.1), Dense(60, activation='relu'), Dropout(0.1), Dense(1, activation='sigmoid') ]) mymodel.compile(optimizer='adam',loss='binary_crossentropy', metrics=['accuracy']) return mymodel # ROC Plot def plotROC(i,fpr_nnt, tpr_nnt,auc_nnt,fpr_rfc, tpr_rfc,auc_rfc,fpr_td, tpr_td,auc_td): plt.plot([0, 1], [0, 1], 'k--') plt.plot(fpr_nnt, tpr_nnt, label='NNT (area = {:.3f})'.format(auc_nnt)) plt.plot(fpr_rfc, tpr_rfc, label='RF (area = {:.3f})'.format(auc_rfc)) plt.plot(fpr_td, tpr_td, label='TD (area = {:.3f})'.format(auc_td)) plt.xlabel('False positive rate') plt.ylabel('True positive rate') plt.title("Number: "+str(i)+ '_ROC curve') plt.legend(loc='best') plt.savefig("Marker_number:"+str(i)+ '_ROC.'+'pdf') plt.savefig("Marker_number:"+str(i)+ '_ROC.'+'png') plt.close() return ###model comparison def seriesRun(X1): auc_dic={} for i in range(1,len(X1.columns)): x1=X1.iloc[:,:i] x1['target']=X1['target'] train, test = train_test_split(x1, test_size=0.2) train, val = train_test_split(train, test_size=0.2) train_y=train['target'] train_x=train.drop(columns=['target']) val_y=val['target'] val_x=val.drop(columns=['target']) test_y=test['target'] test_x=test.drop(columns=['target']) nnt = myModel(train_x) history=nnt.fit(x=train_x,y=train_y, validation_data=(val_x, val_y),batch_size=20,epochs=30,shuffle=True,verbose=0) h = history.history['accuracy'] v = history.history['val_accuracy'] df = pd.DataFrame(list(zip(h, v)), columns=["acc", "val_acc"]) df.to_csv("total_vs_c_" + "_" + str(i) + "_" + "ACCURACY_matrix.csc") pred_nnt = nnt.predict(test_x,batch_size=20,verbose=0).ravel() fpr_nnt, tpr_nnt, thresholds_nnt = roc_curve(test_y, pred_nnt) auc_nnt = auc(fpr_nnt, tpr_nnt) rfc = RandomForestClassifier(max_depth=3, n_estimators=40) rfc.fit(train_x, train_y) pred_rfc = rfc.predict_proba(test_x)[:, 1] fpr_rfc, tpr_rfc, cutoff_rfc = roc_curve(test_y, pred_rfc) auc_rfc = auc(fpr_rfc, tpr_rfc) td = tree.DecisionTreeRegressor(random_state=0, max_depth=2) td = td.fit(train_x, train_y) pred_td=td.predict(test_x) fpr_td, tpr_td, cutoff_td = roc_curve(test_y, pred_td) auc_td = auc(fpr_td, tpr_td) auc_dic[str(i)]=[auc_nnt,auc_rfc,auc_td] plotROC(i,fpr_nnt, tpr_nnt,auc_nnt,fpr_rfc, tpr_rfc,auc_rfc,fpr_td, tpr_td,auc_td) return auc_dic
To search biomarker database, please enter a gene ID or symbol based on GRCh38.p2.v22
References