update 2
This commit is contained in:
@@ -29,6 +29,10 @@ Original file is located at
|
|||||||
import sys
|
import sys
|
||||||
IN_COLAB = 'google.colab' in sys.modules
|
IN_COLAB = 'google.colab' in sys.modules
|
||||||
|
|
||||||
|
# f IN_COLAB:
|
||||||
|
# !pip install pandas numpy matplotlib seaborn pillow scikit-learn tensorflow
|
||||||
|
# !pip install --upgrade kagglehub[pandas-datasets,hf-datasets]
|
||||||
|
|
||||||
import kagglehub
|
import kagglehub
|
||||||
|
|
||||||
# Download latest version
|
# Download latest version
|
||||||
@@ -180,6 +184,18 @@ df['target'] = df['dx'].map(benign_malignant_dict)
|
|||||||
# Preview
|
# Preview
|
||||||
print(df[['dx', 'target']].head())
|
print(df[['dx', 'target']].head())
|
||||||
|
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
train_df, val_df = train_test_split(
|
||||||
|
df,
|
||||||
|
test_size=0.2,
|
||||||
|
stratify=df['target'],
|
||||||
|
random_state=42
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Train size:", len(train_df))
|
||||||
|
print("Validation size:", len(val_df))
|
||||||
|
|
||||||
# Convert 'target' column to string type for ImageDataGenerator
|
# Convert 'target' column to string type for ImageDataGenerator
|
||||||
train_df['target'] = train_df['target'].astype(str)
|
train_df['target'] = train_df['target'].astype(str)
|
||||||
val_df['target'] = val_df['target'].astype(str)
|
val_df['target'] = val_df['target'].astype(str)
|
||||||
@@ -198,21 +214,10 @@ plt.show()
|
|||||||
|
|
||||||
print(df['target'].value_counts())
|
print(df['target'].value_counts())
|
||||||
|
|
||||||
"""### Train / Validation split"""
|
"""### Train / Validation split
|
||||||
|
|
||||||
from sklearn.model_selection import train_test_split
|
### Class weight (class imbalance)
|
||||||
|
"""
|
||||||
train_df, val_df = train_test_split(
|
|
||||||
df,
|
|
||||||
test_size=0.2,
|
|
||||||
stratify=df['target'],
|
|
||||||
random_state=42
|
|
||||||
)
|
|
||||||
|
|
||||||
print("Train size:", len(train_df))
|
|
||||||
print("Validation size:", len(val_df))
|
|
||||||
|
|
||||||
"""### Class weight (class imbalance)"""
|
|
||||||
|
|
||||||
from sklearn.utils.class_weight import compute_class_weight
|
from sklearn.utils.class_weight import compute_class_weight
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user