update 2
This commit is contained in:
@@ -29,6 +29,10 @@ Original file is located at
|
||||
import sys
|
||||
IN_COLAB = 'google.colab' in sys.modules
|
||||
|
||||
# f IN_COLAB:
|
||||
# !pip install pandas numpy matplotlib seaborn pillow scikit-learn tensorflow
|
||||
# !pip install --upgrade kagglehub[pandas-datasets,hf-datasets]
|
||||
|
||||
import kagglehub
|
||||
|
||||
# Download latest version
|
||||
@@ -180,6 +184,18 @@ df['target'] = df['dx'].map(benign_malignant_dict)
|
||||
# Preview
|
||||
print(df[['dx', 'target']].head())
|
||||
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
train_df, val_df = train_test_split(
|
||||
df,
|
||||
test_size=0.2,
|
||||
stratify=df['target'],
|
||||
random_state=42
|
||||
)
|
||||
|
||||
print("Train size:", len(train_df))
|
||||
print("Validation size:", len(val_df))
|
||||
|
||||
# Convert 'target' column to string type for ImageDataGenerator
|
||||
train_df['target'] = train_df['target'].astype(str)
|
||||
val_df['target'] = val_df['target'].astype(str)
|
||||
@@ -198,21 +214,10 @@ plt.show()
|
||||
|
||||
print(df['target'].value_counts())
|
||||
|
||||
"""### Train / Validation split"""
|
||||
"""### Train / Validation split
|
||||
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
train_df, val_df = train_test_split(
|
||||
df,
|
||||
test_size=0.2,
|
||||
stratify=df['target'],
|
||||
random_state=42
|
||||
)
|
||||
|
||||
print("Train size:", len(train_df))
|
||||
print("Validation size:", len(val_df))
|
||||
|
||||
"""### Class weight (class imbalance)"""
|
||||
### Class weight (class imbalance)
|
||||
"""
|
||||
|
||||
from sklearn.utils.class_weight import compute_class_weight
|
||||
|
||||
|
||||
Reference in New Issue
Block a user