# Importing required libraries
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score,precision_score,f1_score,recall_score,confusion_matrix
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Reading CSV
df=pd.read_csv('data.csv')
df.head()

# Drop irrelevant columns
df=df.drop(columns=['id','Unnamed: 32'])

df['diagnosis_numeric'] = df['diagnosis'].map({'M': 1, 'B': 0})

# Handle missing values
df.isna().sum()

diagnosis                  0
radius_mean                0
texture_mean               0
perimeter_mean             0
area_mean                  0
smoothness_mean            0
compactness_mean           0
concavity_mean             0
concave points_mean        0
symmetry_mean              0
fractal_dimension_mean     0
radius_se                  0
texture_se                 0
perimeter_se               0
area_se                    0
smoothness_se              0
compactness_se             0
concavity_se               0
concave points_se          0
symmetry_se                0
fractal_dimension_se       0
radius_worst               0
texture_worst              0
perimeter_worst            0
area_worst                 0
smoothness_worst           0
compactness_worst          0
concavity_worst            0
concave points_worst       0
symmetry_worst             0
fractal_dimension_worst    0
diagnosis_numeric          0
dtype: int64

# Checking size of dataset and datatypes of columns
print(f'Dataset size is {df.size}')
print(f'\nColumn Datatypes \n{df.dtypes}')

Dataset size is 18208

Column Datatypes 
diagnosis                   object
radius_mean                float64
texture_mean               float64
perimeter_mean             float64
area_mean                  float64
smoothness_mean            float64
compactness_mean           float64
concavity_mean             float64
concave points_mean        float64
symmetry_mean              float64
fractal_dimension_mean     float64
radius_se                  float64
texture_se                 float64
perimeter_se               float64
area_se                    float64
smoothness_se              float64
compactness_se             float64
concavity_se               float64
concave points_se          float64
symmetry_se                float64
fractal_dimension_se       float64
radius_worst               float64
texture_worst              float64
perimeter_worst            float64
area_worst                 float64
smoothness_worst           float64
compactness_worst          float64
concavity_worst            float64
concave points_worst       float64
symmetry_worst             float64
fractal_dimension_worst    float64
diagnosis_numeric            int64
dtype: object

# General Statistics
df.describe()

val_counts=df['diagnosis'].value_counts()
print(val_counts.values)
plt.bar(val_counts.index, val_counts.values)
plt.xlabel('Diagnosis')
plt.ylabel('Counts')
plt.show()

[357 212]

features = df.columns[1:31]

cols = 6
rows = 5

fig, axes = plt.subplots(rows, cols, figsize=(22, rows * 4))
axes = axes.flatten()

for i, col in enumerate(features):
    sns.boxplot(x='diagnosis', y=col, data=df, ax=axes[i])
    axes[i].set_title(col, fontsize=9)
    axes[i].tick_params(axis='x', labelrotation=45)

for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

plt.subplots_adjust(hspace=0.6, wspace=0.3)

plt.show()

X=df.drop(columns=['diagnosis','diagnosis_numeric'])
y=df['diagnosis_numeric']
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)

lr=LogisticRegression().fit(X_train_scaled,y_train)
rf=RandomForestClassifier().fit(X_train,y_train)
svm=SVC(probability=True).fit(X_train_scaled,y_train)
knn=KNeighborsClassifier().fit(X_train_scaled,y_train)

lr_pred=lr.predict(X_test_scaled)
rf_pred=rf.predict(X_test)
svm_pred=svm.predict(X_test_scaled)
knn_pred=knn.predict(X_test_scaled)

models={
    "Logistic Regression":lr_pred,
    "Random Forest":rf_pred,
    "SVM":svm_pred,
    "KNN":knn_pred
}

for name,pred in models.items():
    print(f"Model : {name}")
    print(f"Accuracy Score: {accuracy_score(y_test,pred)}")
    print(f"Precision Score: {precision_score(y_test,pred)}")
    print(f"F1 Score: {f1_score(y_test,pred)}")
    print(f"Recall Score: {recall_score(y_test,pred)}")
    print(f"Confusion matrix\n {confusion_matrix(y_test,pred)}")
    print("\n")

Model : Logistic Regression
Accuracy Score: 0.9736842105263158
Precision Score: 0.9761904761904762
F1 Score: 0.9647058823529412
Recall Score: 0.9534883720930233
Confusion matrix
 [[70  1]
 [ 2 41]]


Model : Random Forest
Accuracy Score: 0.9649122807017544
Precision Score: 0.975609756097561
F1 Score: 0.9523809523809523
Recall Score: 0.9302325581395349
Confusion matrix
 [[70  1]
 [ 3 40]]


Model : SVM
Accuracy Score: 0.9824561403508771
Precision Score: 1.0
F1 Score: 0.9761904761904762
Recall Score: 0.9534883720930233
Confusion matrix
 [[71  0]
 [ 2 41]]


Model : KNN
Accuracy Score: 0.9473684210526315
Precision Score: 0.9302325581395349
F1 Score: 0.9302325581395349
Recall Score: 0.9302325581395349
Confusion matrix
 [[68  3]
 [ 3 40]]

lr_probs = lr.predict_proba(X_test_scaled)[:, 1]
lr_pred_new = (lr_probs > 0.4).astype(int)
svm_probs=svm.predict_proba(X_test_scaled)[:,1]
svm_pred_new=(svm_probs > 0.4).astype(int)
models={
    "Logistic (Threshold: 0.4)":lr_pred_new,
    "SVM New (Threshold: 0.4)":svm_pred_new
}
for name,pred in models.items():
    print(f"Model : {name}")
    print(f"Accuracy Score: {accuracy_score(y_test,pred)}")
    print(f"Precision Score: {precision_score(y_test,pred)}")
    print(f"F1 Score: {f1_score(y_test,pred)}")
    print(f"Recall Score: {recall_score(y_test,pred)}")
    print(f"Confusion matrix\n {confusion_matrix(y_test,pred)}")
    print("\n")

Model : Logistic (Threshold: 0.4)
Accuracy Score: 0.9824561403508771
Precision Score: 0.9767441860465116
F1 Score: 0.9767441860465116
Recall Score: 0.9767441860465116
Confusion matrix
 [[70  1]
 [ 1 42]]


Model : SVM New (Threshold: 0.4)
Accuracy Score: 0.956140350877193
Precision Score: 0.9318181818181818
F1 Score: 0.9425287356321839
Recall Score: 0.9534883720930233
Confusion matrix
 [[68  3]
 [ 2 41]]

lr=LogisticRegression(l1_ratio=0,solver='liblinear',class_weight='balanced')
lr.fit(X_train_scaled, y_train)
svm=SVC(class_weight='balanced',kernel='linear',probability=True)
svm.fit(X_train_scaled,y_train)
lr_probs = lr.predict_proba(X_test_scaled)[:, 1]
lr_pred_new = (lr_probs > 0.4).astype(int)
models={
    "Logistic (Threshold: 0.4)":lr_pred_new,
    "SVM New":svm_pred
}
for name,pred in models.items():
    print(f"Model : {name}")
    print(f"Accuracy Score: {accuracy_score(y_test,pred)}")
    print(f"Precision Score: {precision_score(y_test,pred)}")
    print(f"F1 Score: {f1_score(y_test,pred)}")
    print(f"Recall Score: {recall_score(y_test,pred)}")
    print(f"Confusion matrix\n {confusion_matrix(y_test,pred)}")
    print("\n")

Model : Logistic (Threshold: 0.4)
Accuracy Score: 0.9736842105263158
Precision Score: 0.9545454545454546
F1 Score: 0.9655172413793104
Recall Score: 0.9767441860465116
Confusion matrix
 [[69  2]
 [ 1 42]]


Model : SVM New
Accuracy Score: 0.9824561403508771
Precision Score: 1.0
F1 Score: 0.9761904761904762
Recall Score: 0.9534883720930233
Confusion matrix
 [[71  0]
 [ 2 41]]

clf=MLPClassifier(solver='adam',hidden_layer_sizes=(100,),random_state=42,early_stopping=True)
clf.fit(X_train,y_train)
clf_pred=clf.predict(X_test)

print("Model: MLP")
print(f"Accuracy Score: {accuracy_score(y_test,clf_pred)}")
print(f"Precision Score: {precision_score(y_test,clf_pred)}")
print(f"F1 Score: {f1_score(y_test,clf_pred)}")
print(f"Recall Score: {recall_score(y_test,clf_pred)}")
print(f"Confusion matrix\n {confusion_matrix(y_test,clf_pred)}")

Model: MLP
Accuracy Score: 0.9385964912280702
Precision Score: 0.8913043478260869
F1 Score: 0.9213483146067416
Recall Score: 0.9534883720930233
Confusion matrix
 [[66  5]
 [ 2 41]]

y_probs = clf.predict_proba(X_test)[:, 1]
clf_pred_new=(y_probs>=0.3).astype(int)
print("Model: MLP")
print(f"Accuracy Score: {accuracy_score(y_test,clf_pred_new)}")
print(f"Precision Score: {precision_score(y_test,clf_pred_new)}")
print(f"F1 Score: {f1_score(y_test,clf_pred_new)}")
print(f"Recall Score: {recall_score(y_test,clf_pred_new)}")
print(f"Confusion matrix\n {confusion_matrix(y_test,clf_pred_new)}")

Model: MLP
Accuracy Score: 0.8859649122807017
Precision Score: 0.7884615384615384
F1 Score: 0.8631578947368421
Recall Score: 0.9534883720930233
Confusion matrix
 [[60 11]
 [ 2 41]]

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2

model = Sequential()

# Input Layer
model.add(Dense(128, activation='relu',
                kernel_regularizer=l2(0.001),
                input_shape=(X_train.shape[1],)))
model.add(BatchNormalization())
model.add(Dropout(0.4))

# Hidden Layer 1
model.add(Dense(64, activation='relu',
                kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())
model.add(Dropout(0.3))

# Hidden Layer 2
model.add(Dense(32, activation='relu'))

# Output Layer
model.add(Dense(1, activation='sigmoid'))

model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy',
             tf.keras.metrics.Recall()
            ]
)

C:\Users\joshu\anaconda3\Lib\site-packages\keras\src\layers\core\dense.py:106: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)
lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    min_lr=1e-6
)
history = model.fit(
    X_train,
    y_train,
    epochs=200,
    batch_size=32,
    validation_split=0.2,
    callbacks=[lr_scheduler,early_stop],
    verbose=1
)

Epoch 1/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 6s 75ms/step - accuracy: 0.6511 - loss: 0.7057 - recall_3: 0.6691 - val_accuracy: 0.3626 - val_loss: 2.4376 - val_recall_3: 1.0000 - learning_rate: 0.0010
Epoch 2/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - accuracy: 0.8846 - loss: 0.4085 - recall_3: 0.8529 - val_accuracy: 0.3626 - val_loss: 2.1639 - val_recall_3: 1.0000 - learning_rate: 0.0010
Epoch 3/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 15ms/step - accuracy: 0.9038 - loss: 0.3435 - recall_3: 0.8456 - val_accuracy: 0.4066 - val_loss: 1.6458 - val_recall_3: 1.0000 - learning_rate: 0.0010
Epoch 4/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 17ms/step - accuracy: 0.8901 - loss: 0.3625 - recall_3: 0.8088 - val_accuracy: 0.5275 - val_loss: 1.0619 - val_recall_3: 1.0000 - learning_rate: 0.0010
Epoch 5/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 14ms/step - accuracy: 0.9121 - loss: 0.2987 - recall_3: 0.8382 - val_accuracy: 0.5824 - val_loss: 0.8191 - val_recall_3: 1.0000 - learning_rate: 0.0010
Epoch 6/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 17ms/step - accuracy: 0.9148 - loss: 0.2755 - recall_3: 0.8824 - val_accuracy: 0.6923 - val_loss: 0.6220 - val_recall_3: 1.0000 - learning_rate: 0.0010
Epoch 7/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 15ms/step - accuracy: 0.9066 - loss: 0.2834 - recall_3: 0.8603 - val_accuracy: 0.7473 - val_loss: 0.5494 - val_recall_3: 1.0000 - learning_rate: 0.0010
Epoch 8/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 13ms/step - accuracy: 0.8956 - loss: 0.3089 - recall_3: 0.8162 - val_accuracy: 0.7802 - val_loss: 0.5286 - val_recall_3: 1.0000 - learning_rate: 0.0010
Epoch 9/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 13ms/step - accuracy: 0.9203 - loss: 0.2752 - recall_3: 0.8456 - val_accuracy: 0.8022 - val_loss: 0.4699 - val_recall_3: 1.0000 - learning_rate: 0.0010
Epoch 10/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 13ms/step - accuracy: 0.9176 - loss: 0.2886 - recall_3: 0.8676 - val_accuracy: 0.7802 - val_loss: 0.4895 - val_recall_3: 1.0000 - learning_rate: 0.0010
Epoch 11/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 15ms/step - accuracy: 0.9093 - loss: 0.2787 - recall_3: 0.8382 - val_accuracy: 0.7143 - val_loss: 0.5720 - val_recall_3: 1.0000 - learning_rate: 0.0010
Epoch 12/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 16ms/step - accuracy: 0.8956 - loss: 0.2776 - recall_3: 0.8382 - val_accuracy: 0.8681 - val_loss: 0.4037 - val_recall_3: 0.9697 - learning_rate: 0.0010
Epoch 13/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 14ms/step - accuracy: 0.9286 - loss: 0.2512 - recall_3: 0.8971 - val_accuracy: 0.8901 - val_loss: 0.3957 - val_recall_3: 1.0000 - learning_rate: 0.0010
Epoch 14/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 15ms/step - accuracy: 0.9258 - loss: 0.2718 - recall_3: 0.8676 - val_accuracy: 0.7363 - val_loss: 0.5172 - val_recall_3: 1.0000 - learning_rate: 0.0010
Epoch 15/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - accuracy: 0.9231 - loss: 0.2675 - recall_3: 0.8750 - val_accuracy: 0.9231 - val_loss: 0.3325 - val_recall_3: 0.9697 - learning_rate: 0.0010
Epoch 16/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 16ms/step - accuracy: 0.9286 - loss: 0.2388 - recall_3: 0.8603 - val_accuracy: 0.9121 - val_loss: 0.3220 - val_recall_3: 0.9394 - learning_rate: 0.0010
Epoch 17/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 14ms/step - accuracy: 0.9203 - loss: 0.2307 - recall_3: 0.8750 - val_accuracy: 0.9231 - val_loss: 0.2901 - val_recall_3: 0.9394 - learning_rate: 0.0010
Epoch 18/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 15ms/step - accuracy: 0.9286 - loss: 0.2328 - recall_3: 0.8750 - val_accuracy: 0.9121 - val_loss: 0.2512 - val_recall_3: 0.8182 - learning_rate: 0.0010
Epoch 19/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 15ms/step - accuracy: 0.9368 - loss: 0.2223 - recall_3: 0.8897 - val_accuracy: 0.9121 - val_loss: 0.2466 - val_recall_3: 0.8485 - learning_rate: 0.0010
Epoch 20/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 14ms/step - accuracy: 0.9368 - loss: 0.2375 - recall_3: 0.9044 - val_accuracy: 0.9011 - val_loss: 0.2431 - val_recall_3: 0.8182 - learning_rate: 0.0010
Epoch 21/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 16ms/step - accuracy: 0.9176 - loss: 0.2447 - recall_3: 0.8824 - val_accuracy: 0.9121 - val_loss: 0.2360 - val_recall_3: 0.8182 - learning_rate: 0.0010
Epoch 22/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 15ms/step - accuracy: 0.9341 - loss: 0.2151 - recall_3: 0.8971 - val_accuracy: 0.9121 - val_loss: 0.2287 - val_recall_3: 0.8182 - learning_rate: 0.0010
Epoch 23/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 15ms/step - accuracy: 0.9258 - loss: 0.2348 - recall_3: 0.8750 - val_accuracy: 0.9121 - val_loss: 0.2755 - val_recall_3: 0.9394 - learning_rate: 0.0010
Epoch 24/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 14ms/step - accuracy: 0.9093 - loss: 0.2604 - recall_3: 0.8750 - val_accuracy: 0.9341 - val_loss: 0.2271 - val_recall_3: 0.9091 - learning_rate: 0.0010
Epoch 25/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 15ms/step - accuracy: 0.9231 - loss: 0.2356 - recall_3: 0.8824 - val_accuracy: 0.9121 - val_loss: 0.2301 - val_recall_3: 0.8788 - learning_rate: 0.0010
Epoch 26/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 14ms/step - accuracy: 0.9423 - loss: 0.2008 - recall_3: 0.9044 - val_accuracy: 0.9121 - val_loss: 0.2193 - val_recall_3: 0.8182 - learning_rate: 0.0010
Epoch 27/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 17ms/step - accuracy: 0.9231 - loss: 0.2532 - recall_3: 0.8897 - val_accuracy: 0.9121 - val_loss: 0.2136 - val_recall_3: 0.8182 - learning_rate: 0.0010
Epoch 28/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - accuracy: 0.9286 - loss: 0.2092 - recall_3: 0.8971 - val_accuracy: 0.9231 - val_loss: 0.2116 - val_recall_3: 0.8485 - learning_rate: 0.0010
Epoch 29/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 15ms/step - accuracy: 0.9313 - loss: 0.2101 - recall_3: 0.8750 - val_accuracy: 0.9231 - val_loss: 0.2495 - val_recall_3: 0.9697 - learning_rate: 0.0010
Epoch 30/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 13ms/step - accuracy: 0.9368 - loss: 0.1900 - recall_3: 0.8824 - val_accuracy: 0.9341 - val_loss: 0.2172 - val_recall_3: 0.9091 - learning_rate: 0.0010
Epoch 31/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 12ms/step - accuracy: 0.9368 - loss: 0.1880 - recall_3: 0.8824 - val_accuracy: 0.9011 - val_loss: 0.2277 - val_recall_3: 0.7879 - learning_rate: 0.0010
Epoch 32/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 13ms/step - accuracy: 0.9423 - loss: 0.2115 - recall_3: 0.9265 - val_accuracy: 0.9121 - val_loss: 0.2463 - val_recall_3: 0.7879 - learning_rate: 0.0010
Epoch 33/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 14ms/step - accuracy: 0.9368 - loss: 0.2082 - recall_3: 0.8971 - val_accuracy: 0.9121 - val_loss: 0.2481 - val_recall_3: 0.7879 - learning_rate: 0.0010
Epoch 34/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 13ms/step - accuracy: 0.9396 - loss: 0.1796 - recall_3: 0.9118 - val_accuracy: 0.8901 - val_loss: 0.2973 - val_recall_3: 0.7273 - learning_rate: 5.0000e-04
Epoch 35/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 14ms/step - accuracy: 0.9286 - loss: 0.2132 - recall_3: 0.8971 - val_accuracy: 0.8901 - val_loss: 0.3118 - val_recall_3: 0.6970 - learning_rate: 5.0000e-04
Epoch 36/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 15ms/step - accuracy: 0.9588 - loss: 0.1756 - recall_3: 0.9338 - val_accuracy: 0.9121 - val_loss: 0.2419 - val_recall_3: 0.7879 - learning_rate: 5.0000e-04
Epoch 37/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 17ms/step - accuracy: 0.9588 - loss: 0.1883 - recall_3: 0.9338 - val_accuracy: 0.9011 - val_loss: 0.2194 - val_recall_3: 0.7879 - learning_rate: 5.0000e-04
Epoch 38/200
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step - accuracy: 0.9286 - loss: 0.2267 - recall_3: 0.8824 - val_accuracy: 0.9121 - val_loss: 0.2226 - val_recall_3: 0.7879 - learning_rate: 5.0000e-04

y_probs = model.predict(X_test)
y_probs = y_probs.ravel()
y_pred = (y_probs > 0.4).astype(int)
print("Model: 1D CNN")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 72ms/step 
Model: 1D CNN
Accuracy: 0.9736842105263158
Precision: 0.9545454545454546
Recall: 0.9767441860465116
F1 Score: 0.9655172413793104
Confusion Matrix:
 [[69  2]
 [ 1 42]]

	id	diagnosis	radius_mean	texture_mean	perimeter_mean	area_mean	smoothness_mean	compactness_mean	concavity_mean	concave points_mean	...	texture_worst	perimeter_worst	area_worst	smoothness_worst	compactness_worst	concavity_worst	concave points_worst	symmetry_worst	fractal_dimension_worst	Unnamed: 32
0	842302	M	17.99	10.38	122.80	1001.0	0.11840	0.27760	0.3001	0.14710	...	17.33	184.60	2019.0	0.1622	0.6656	0.7119	0.2654	0.4601	0.11890	NaN
1	842517	M	20.57	17.77	132.90	1326.0	0.08474	0.07864	0.0869	0.07017	...	23.41	158.80	1956.0	0.1238	0.1866	0.2416	0.1860	0.2750	0.08902	NaN
2	84300903	M	19.69	21.25	130.00	1203.0	0.10960	0.15990	0.1974	0.12790	...	25.53	152.50	1709.0	0.1444	0.4245	0.4504	0.2430	0.3613	0.08758	NaN
3	84348301	M	11.42	20.38	77.58	386.1	0.14250	0.28390	0.2414	0.10520	...	26.50	98.87	567.7	0.2098	0.8663	0.6869	0.2575	0.6638	0.17300	NaN
4	84358402	M	20.29	14.34	135.10	1297.0	0.10030	0.13280	0.1980	0.10430	...	16.67	152.20	1575.0	0.1374	0.2050	0.4000	0.1625	0.2364	0.07678	NaN

	radius_mean	texture_mean	perimeter_mean	area_mean	smoothness_mean	compactness_mean	concavity_mean	concave points_mean	symmetry_mean	fractal_dimension_mean	...	texture_worst	perimeter_worst	area_worst	smoothness_worst	compactness_worst	concavity_worst	concave points_worst	symmetry_worst	fractal_dimension_worst	diagnosis_numeric
count	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000	...	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000
mean	14.127292	19.289649	91.969033	654.889104	0.096360	0.104341	0.088799	0.048919	0.181162	0.062798	...	25.677223	107.261213	880.583128	0.132369	0.254265	0.272188	0.114606	0.290076	0.083946	0.372583
std	3.524049	4.301036	24.298981	351.914129	0.014064	0.052813	0.079720	0.038803	0.027414	0.007060	...	6.146258	33.602542	569.356993	0.022832	0.157336	0.208624	0.065732	0.061867	0.018061	0.483918
min	6.981000	9.710000	43.790000	143.500000	0.052630	0.019380	0.000000	0.000000	0.106000	0.049960	...	12.020000	50.410000	185.200000	0.071170	0.027290	0.000000	0.000000	0.156500	0.055040	0.000000
25%	11.700000	16.170000	75.170000	420.300000	0.086370	0.064920	0.029560	0.020310	0.161900	0.057700	...	21.080000	84.110000	515.300000	0.116600	0.147200	0.114500	0.064930	0.250400	0.071460	0.000000
50%	13.370000	18.840000	86.240000	551.100000	0.095870	0.092630	0.061540	0.033500	0.179200	0.061540	...	25.410000	97.660000	686.500000	0.131300	0.211900	0.226700	0.099930	0.282200	0.080040	0.000000
75%	15.780000	21.800000	104.100000	782.700000	0.105300	0.130400	0.130700	0.074000	0.195700	0.066120	...	29.720000	125.400000	1084.000000	0.146000	0.339100	0.382900	0.161400	0.317900	0.092080	1.000000
max	28.110000	39.280000	188.500000	2501.000000	0.163400	0.345400	0.426800	0.201200	0.304000	0.097440	...	49.540000	251.200000	4254.000000	0.222600	1.058000	1.252000	0.291000	0.663800	0.207500	1.000000

Model	Accuracy	Precision	Recall	F1 Score	False Positives	False Negatives
Logistic Regression	98.25%	97.67%	97.67%	97.67%	1	1
Random Forest	96.49%	97.56%	93.02%	95.24%	1	3
Support Vector Machine	98.25%	100%	95.35%	97.62%	0	2
K-Nearest Neighbors	94.74%	93.02%	93.02%	93.02%	3	3
Multi-Layer Perceptron	88.60%	78.85%	95.35%	86.32%	11	2
1D Convolutional Neural Network	97.37%	95.45%	97.67%	96.55%	2	1

Visualizing Class distribution¶

Visualizing feature and malignancy correlation¶

Prepare data for machine learning¶

Fitting data into Logistic Regression, Random Forest, Scalable Vector Machine and K-Nearest Neighbors¶

Hyper Parameter Tuning¶

Lowering threshold¶

No further improvements seen in both Logistic regression and SVM by tweaking parameters.¶

Deep Learning Models¶

Multi-layer Perceptron¶

1D - CNN¶

📊 Model Comparison Results¶

Overview¶

🔎 Key Observations¶

1️⃣ Logistic Regression¶

2️⃣ Support Vector Machine (SVM)¶

3️⃣ Random Forest¶

4️⃣ K-Nearest Neighbors (KNN)¶

5️⃣ Deep Learning Models¶

Multi-Layer Perceptron (MLP)¶

1D CNN¶

Comparison¶

🏆 Final Conclusion¶