Compare commits
2 Commits
65e084556d
...
0d69f340ae
| Author | SHA1 | Date | |
|---|---|---|---|
| 0d69f340ae | |||
| 4192e5f2be |
@@ -29,6 +29,10 @@ Original file is located at
|
|||||||
import sys
|
import sys
|
||||||
IN_COLAB = 'google.colab' in sys.modules
|
IN_COLAB = 'google.colab' in sys.modules
|
||||||
|
|
||||||
|
# if IN_COLAB:
|
||||||
|
# !pip install pandas numpy matplotlib seaborn pillow scikit-learn tensorflow
|
||||||
|
# !pip install --upgrade kagglehub[pandas-datasets,hf-datasets]
|
||||||
|
|
||||||
import kagglehub
|
import kagglehub
|
||||||
|
|
||||||
# Download latest version
|
# Download latest version
|
||||||
@@ -180,6 +184,18 @@ df['target'] = df['dx'].map(benign_malignant_dict)
|
|||||||
# Preview
|
# Preview
|
||||||
print(df[['dx', 'target']].head())
|
print(df[['dx', 'target']].head())
|
||||||
|
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
train_df, val_df = train_test_split(
|
||||||
|
df,
|
||||||
|
test_size=0.2,
|
||||||
|
stratify=df['target'],
|
||||||
|
random_state=42
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Train size:", len(train_df))
|
||||||
|
print("Validation size:", len(val_df))
|
||||||
|
|
||||||
# Convert 'target' column to string type for ImageDataGenerator
|
# Convert 'target' column to string type for ImageDataGenerator
|
||||||
train_df['target'] = train_df['target'].astype(str)
|
train_df['target'] = train_df['target'].astype(str)
|
||||||
val_df['target'] = val_df['target'].astype(str)
|
val_df['target'] = val_df['target'].astype(str)
|
||||||
@@ -198,21 +214,10 @@ plt.show()
|
|||||||
|
|
||||||
print(df['target'].value_counts())
|
print(df['target'].value_counts())
|
||||||
|
|
||||||
"""### Train / Validation split"""
|
"""### Train / Validation split
|
||||||
|
|
||||||
from sklearn.model_selection import train_test_split
|
### Class weight (class imbalance)
|
||||||
|
"""
|
||||||
train_df, val_df = train_test_split(
|
|
||||||
df,
|
|
||||||
test_size=0.2,
|
|
||||||
stratify=df['target'],
|
|
||||||
random_state=42
|
|
||||||
)
|
|
||||||
|
|
||||||
print("Train size:", len(train_df))
|
|
||||||
print("Validation size:", len(val_df))
|
|
||||||
|
|
||||||
"""### Class weight (class imbalance)"""
|
|
||||||
|
|
||||||
from sklearn.utils.class_weight import compute_class_weight
|
from sklearn.utils.class_weight import compute_class_weight
|
||||||
|
|
||||||
@@ -236,6 +241,12 @@ from tensorflow.keras.applications import DenseNet121
|
|||||||
from tensorflow.keras.applications.densenet import preprocess_input
|
from tensorflow.keras.applications.densenet import preprocess_input
|
||||||
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
|
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
|
||||||
from tensorflow.keras.models import Model
|
from tensorflow.keras.models import Model
|
||||||
|
from tensorflow.keras.optimizers import Adam
|
||||||
|
|
||||||
|
gpus = tf.config.list_physical_devices('GPU')
|
||||||
|
print("GPUs:", gpus)
|
||||||
|
|
||||||
|
strategy = tf.distribute.MirroredStrategy()
|
||||||
|
|
||||||
data_augmentation = tf.keras.Sequential([
|
data_augmentation = tf.keras.Sequential([
|
||||||
tf.keras.layers.RandomFlip("horizontal"),
|
tf.keras.layers.RandomFlip("horizontal"),
|
||||||
@@ -260,7 +271,9 @@ x = Dense(512, activation='relu')(x) # Added another Dense layer
|
|||||||
x = Dense(256, activation='relu')(x) # Existing Dense layer
|
x = Dense(256, activation='relu')(x) # Existing Dense layer
|
||||||
predictions = Dense(1, activation='sigmoid')(x) # Output layer for binary classification
|
predictions = Dense(1, activation='sigmoid')(x) # Output layer for binary classification
|
||||||
|
|
||||||
|
with strategy.scope(): # Use all gpus
|
||||||
model = Model(inputs=base_model.input, outputs=predictions)
|
model = Model(inputs=base_model.input, outputs=predictions)
|
||||||
|
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
|
||||||
|
|
||||||
"""## 4. Data Generators
|
"""## 4. Data Generators
|
||||||
|
|
||||||
@@ -308,21 +321,7 @@ val_generator = val_datagen.flow_from_dataframe(
|
|||||||
seed=42
|
seed=42
|
||||||
)
|
)
|
||||||
|
|
||||||
"""## 5. Compile the Model
|
"""## 6. Train the Model"""
|
||||||
|
|
||||||
I will compile the model using the Adam optimizer, binary cross-entropy loss (suitable for binary classification), and track accuracy as a metric.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
from tensorflow.keras.optimizers import Adam
|
|
||||||
|
|
||||||
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
|
|
||||||
|
|
||||||
"""## 6. Train the Model
|
|
||||||
|
|
||||||
I will now train the model using the prepared data generators. I'll also add callbacks for early stopping to prevent overfitting and to save the best model.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
|
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
|
||||||
|
|
||||||
@@ -349,7 +348,7 @@ history = model.fit(
|
|||||||
class_weight=class_weights # Use class weights to handle imbalance
|
class_weight=class_weights # Use class weights to handle imbalance
|
||||||
)
|
)
|
||||||
|
|
||||||
"""## X. Evaluation
|
"""## 7. Evaluation
|
||||||
|
|
||||||
### Load best model
|
### Load best model
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user