COMBAI computational biology and artificial intelligence
binary classifier
mymodel = Sequential([
Dense(30, input_dim=len(train_x.columns), activation='relu'),
Dropout(0.1),
Dense(60, activation='relu'),
Dropout(0.1),
Dense(1, activation='sigmoid')
])
mymodel.compile(optimizer='adam',loss='binary_crossentropy',
metrics=['accuracy'])
return mymodel
Typical run
nnt = myModel(train_x)
history=nnt.fit(x=train_x,y=train_y,
validation_data=(val_x, val_y),batch_size=20,epochs=30,shuffle=True,verbose=0)
An example run for three math model comparison
import os,pickle
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense,Dropout
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy,binary_crossentropy
from sklearn.utils import shuffle
import pandas as pd
import numpy as np
from scipy import interp
import matplotlib.pyplot as plt
from itertools import cycle
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn import tree
###NN model
def myModel(train_x):
mymodel = Sequential([
Dense(30, input_dim=len(train_x.columns), activation='relu'),
Dropout(0.1),
Dense(60, activation='relu'),
Dropout(0.1),
Dense(1, activation='sigmoid')
])
mymodel.compile(optimizer='adam',loss='binary_crossentropy',
metrics=['accuracy'])
return mymodel
# ROC Plot
def plotROC(i,fpr_nnt, tpr_nnt,auc_nnt,fpr_rfc, tpr_rfc,auc_rfc,fpr_td, tpr_td,auc_td):
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr_nnt, tpr_nnt, label='NNT (area = {:.3f})'.format(auc_nnt))
plt.plot(fpr_rfc, tpr_rfc, label='RF (area = {:.3f})'.format(auc_rfc))
plt.plot(fpr_td, tpr_td, label='TD (area = {:.3f})'.format(auc_td))
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title("Number: "+str(i)+ '_ROC curve')
plt.legend(loc='best')
plt.savefig("Marker_number:"+str(i)+ '_ROC.'+'pdf')
plt.savefig("Marker_number:"+str(i)+ '_ROC.'+'png')
plt.close()
return
###model comparison
def seriesRun(X1):
auc_dic={}
for i in range(1,len(X1.columns)):
x1=X1.iloc[:,:i]
x1['target']=X1['target']
train, test = train_test_split(x1, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
train_y=train['target']
train_x=train.drop(columns=['target'])
val_y=val['target']
val_x=val.drop(columns=['target'])
test_y=test['target']
test_x=test.drop(columns=['target'])
nnt = myModel(train_x)
history=nnt.fit(x=train_x,y=train_y,
validation_data=(val_x, val_y),batch_size=20,epochs=30,shuffle=True,verbose=0)
h = history.history['accuracy']
v = history.history['val_accuracy']
df = pd.DataFrame(list(zip(h, v)), columns=["acc", "val_acc"])
df.to_csv("total_vs_c_" + "_" + str(i) + "_" + "ACCURACY_matrix.csc")
pred_nnt = nnt.predict(test_x,batch_size=20,verbose=0).ravel()
fpr_nnt, tpr_nnt, thresholds_nnt = roc_curve(test_y, pred_nnt)
auc_nnt = auc(fpr_nnt, tpr_nnt)
rfc = RandomForestClassifier(max_depth=3, n_estimators=40)
rfc.fit(train_x, train_y)
pred_rfc = rfc.predict_proba(test_x)[:, 1]
fpr_rfc, tpr_rfc, cutoff_rfc = roc_curve(test_y, pred_rfc)
auc_rfc = auc(fpr_rfc, tpr_rfc)
td = tree.DecisionTreeRegressor(random_state=0, max_depth=2)
td = td.fit(train_x, train_y)
pred_td=td.predict(test_x)
fpr_td, tpr_td, cutoff_td = roc_curve(test_y, pred_td)
auc_td = auc(fpr_td, tpr_td)
auc_dic[str(i)]=[auc_nnt,auc_rfc,auc_td]
plotROC(i,fpr_nnt, tpr_nnt,auc_nnt,fpr_rfc, tpr_rfc,auc_rfc,fpr_td, tpr_td,auc_td)
return auc_dic
To search biomarker database, please enter a gene ID or symbol based on GRCh38.p2.v22
References