This commit is contained in:
2026-05-21 14:23:16 +02:00
parent 65e084556d
commit 4192e5f2be

View File

@@ -29,6 +29,10 @@ Original file is located at
import sys import sys
IN_COLAB = 'google.colab' in sys.modules IN_COLAB = 'google.colab' in sys.modules
# f IN_COLAB:
# !pip install pandas numpy matplotlib seaborn pillow scikit-learn tensorflow
# !pip install --upgrade kagglehub[pandas-datasets,hf-datasets]
import kagglehub import kagglehub
# Download latest version # Download latest version
@@ -180,6 +184,18 @@ df['target'] = df['dx'].map(benign_malignant_dict)
# Preview # Preview
print(df[['dx', 'target']].head()) print(df[['dx', 'target']].head())
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(
df,
test_size=0.2,
stratify=df['target'],
random_state=42
)
print("Train size:", len(train_df))
print("Validation size:", len(val_df))
# Convert 'target' column to string type for ImageDataGenerator # Convert 'target' column to string type for ImageDataGenerator
train_df['target'] = train_df['target'].astype(str) train_df['target'] = train_df['target'].astype(str)
val_df['target'] = val_df['target'].astype(str) val_df['target'] = val_df['target'].astype(str)
@@ -198,21 +214,10 @@ plt.show()
print(df['target'].value_counts()) print(df['target'].value_counts())
"""### Train / Validation split""" """### Train / Validation split
from sklearn.model_selection import train_test_split ### Class weight (class imbalance)
"""
train_df, val_df = train_test_split(
df,
test_size=0.2,
stratify=df['target'],
random_state=42
)
print("Train size:", len(train_df))
print("Validation size:", len(val_df))
"""### Class weight (class imbalance)"""
from sklearn.utils.class_weight import compute_class_weight from sklearn.utils.class_weight import compute_class_weight