Test

Final final
We are done
2026-06-03 18:08:13 +02:00 · 2026-05-26 16:10:39 +02:00 · 2026-05-26 16:00:50 +02:00 · 2026-05-26 15:09:15 +02:00 · 2026-05-26 15:07:02 +02:00 · 2026-05-26 14:31:57 +02:00
3 changed files with 1909 additions and 414 deletions
--- a/Skin_Cancer_Classification.ipynb
+++ b/Skin_Cancer_Classification.ipynb
--- a/run.sh
+++ b/run.sh
@@ -16,8 +16,10 @@ source "$VENV_DIR/bin/activate"
 # 3. Install dependencies (lightweight, safe to re-run)
 echo "Installing dependencies..."
 pip install --upgrade pip
-pip install pandas numpy matplotlib seaborn pillow scikit-learn tensorflow
+pip install pandas numpy matplotlib seaborn pillow scikit-learn
+pip install "tensorflow[and-cuda]"
 pip install --upgrade kagglehub[pandas-datasets,hf-datasets]

-python3 skin_cancer_classification.py
+jupyter nbconvert --to notebook --execute Skin_Cancer_Classification.ipynb --output Skin_Cancer_Classification_Final.ipynb --log-level=DEBUG
+# python3 skin_cancer_classification.py
 echo "Done."
--- a/skin_cancer_classification.py
+++ b/skin_cancer_classification.py
@@ -1,412 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Skin Cancer Classification.ipynb
-
-Automatically generated by Colab.
-
-Original file is located at
-    https://colab.research.google.com/drive/1Nhk-lK1OCihOAxo36qt2e1_WgdLc-EiR
-
-# Skin Cancer Classification
-**Output**: Benign / Malignant
-
-**Model**: DenseNet121 (transfer learning)
-
-**Dataset**: Skin Cancer: HAM10000 dataset https://www.kaggle.com/datasets/kmader/skin-cancer-mnist-ham10000/data
-
-| Class | Name                                | Value     |
-|-------|-------------------------------------|-----------|
-| nv    | Melanocytic nevi (moles)            | Benign    |
-| bcc   | Basal cell carcinoma                | Malignant |
-| bkl   | Benign keratosis-like lesions       | Benign    |
-| df    | Dermatofibroma                      | Benign    |
-| vasc  | Vascular lesions                    | Benign    |
-| mel   | Melanoma                            | Malignant |
-| akiec | Actinic keratoses / Bowen's disease | Benign    |
-
-## 1. Requirements and dataset download
-"""
-
-import sys
-IN_COLAB = 'google.colab' in sys.modules
-
-# if IN_COLAB:
-#  !pip install pandas numpy matplotlib seaborn pillow scikit-learn tensorflow
-#  !pip install --upgrade kagglehub[pandas-datasets,hf-datasets]
-
-import kagglehub
-
-# Download latest version
-path = kagglehub.dataset_download("kmader/skin-cancer-mnist-ham10000")
-
-print("Path to dataset files:", path)
-
-# !cp -R /kaggle/input/skin-cancer-mnist-ham10000 /content/skin-cancer-mnist-ham10000
-
-"""## 2. Imports and setup"""
-
-import os
-import glob
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-import seaborn as sns
-
-from PIL import Image
-
-# Source - https://stackoverflow.com/a/53586419
-# Posted by korakot, modified by community. See post 'Timeline' for change history
-# Retrieved 2026-05-21, License - CC BY-SA 4.0
-
-"""### Loading dataset"""
-
-# Path to your dataset folder
-dataset_path = path
-
-# Metadata file
-metadata_path = os.path.join(dataset_path, "HAM10000_metadata.csv")
-
-# Load CSV
-df = pd.read_csv(metadata_path)
-
-# Show first rows
-print(df.head())
-
-# Collect all image paths
-image_paths = glob.glob(os.path.join(dataset_path, "**", "*.jpg"), recursive=True)
-
-# Create dictionary:
-# key = image_id
-# value = full image path
-imageid_path_dict = {
-    os.path.splitext(os.path.basename(x))[0]: x
-    for x in image_paths
-}
-
-# Add image path column
-df['path'] = df['image_id'].map(imageid_path_dict.get)
-
-# Check
-print(df[['image_id', 'path']].head())
-
-"""## 3. Dataset analysis
-
-### Missingness
-"""
-
-missing = df['path'].isnull().sum()
-
-print(f"Missing images: {missing}")
-
-"""### Class distribution"""
-
-plt.figure(figsize=(10,5))
-
-sns.countplot(data=df, x='dx', order=df['dx'].value_counts().index)
-
-plt.title("Class Distribution")
-plt.xlabel("Diagnosis")
-plt.ylabel("Count")
-plt.show()
-
-"""### Visualize samples"""
-
-fig, axes = plt.subplots(2, 4, figsize=(10,5))
-
-for i, ax in enumerate(axes.flat):
-    sample = df.sample(1).iloc[0]
-
-    img = Image.open(sample['path'])
-
-    ax.imshow(img)
-    ax.set_title(sample['dx'])
-    ax.axis('off')
-
-plt.tight_layout()
-plt.show()
-
-"""### Data distribution"""
-
-plt.figure(figsize=(8,5))
-
-sns.histplot(df['age'].dropna(), bins=20)
-
-plt.title("Age Distribution")
-plt.show()
-
-sns.countplot(data=df, x='sex')
-
-plt.title("Sex Distribution")
-plt.show()
-
-plt.figure(figsize=(12,5))
-
-sns.countplot(
-    data=df,
-    x='localization',
-    order=df['localization'].value_counts().index
-)
-
-plt.xticks(rotation=45)
-plt.title("Lesion Localization")
-plt.show()
-
-"""### Image sizes"""
-
-sizes = []
-
-for path in df['path'].sample(100):
-    img = Image.open(path)
-    sizes.append(img.size)
-
-print(pd.Series(sizes).value_counts())
-
-"""## 2. Prepare dataset
-
-### Encoding Binary Labels
-"""
-
-# Mapping from dx to benign/malignant
-# 0 --> benign
-# 1 --> malignant
-benign_malignant_dict = {
-    'nv': 0,
-    'bcc': 1,
-    'bkl': 0,
-    'df': 0,
-    'vasc': 0,
-    'mel': 1,
-    'akiec': 0
-}
-
-# Create new column
-df['target'] = df['dx'].map(benign_malignant_dict)
-
-# Preview
-print(df[['dx', 'target']].head())
-
-from sklearn.model_selection import train_test_split
-
-train_df, val_df = train_test_split(
-    df,
-    test_size=0.2,
-    stratify=df['target'],
-    random_state=42
-)
-
-print("Train size:", len(train_df))
-print("Validation size:", len(val_df))
-
-# Convert 'target' column to string type for ImageDataGenerator
-train_df['target'] = train_df['target'].astype(str)
-val_df['target'] = val_df['target'].astype(str)
-
-print("Train target dtype after conversion:", train_df['target'].dtype)
-print("Validation target dtype after conversion:", val_df['target'].dtype)
-
-plt.figure(figsize=(6,3))
-
-sns.countplot(data=df, x='target')
-
-plt.title("Benign vs Malignant Distribution")
-plt.xlabel("Lesion Type")
-plt.ylabel("Count")
-plt.show()
-
-print(df['target'].value_counts())
-
-"""### Train / Validation split
-
-### Class weight (class imbalance)
-"""
-
-from sklearn.utils.class_weight import compute_class_weight
-
-classes = np.unique(train_df['target'])
-
-weights = compute_class_weight(
-    class_weight='balanced',
-    classes=classes,
-    y=train_df['target']
-)
-
-class_weights = dict(enumerate(weights))
-
-print(class_weights)
-
-"""## 3. Build the model"""
-
-import tensorflow as tf
-from tensorflow.keras import layers, models
-from tensorflow.keras.applications import DenseNet121
-from tensorflow.keras.applications.densenet import preprocess_input
-from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
-from tensorflow.keras.models import Model
-from tensorflow.keras.optimizers import Adam
-
-gpus = tf.config.list_physical_devices('GPU')
-print("GPUs:", gpus)
-
-strategy = tf.distribute.MirroredStrategy()
-
-data_augmentation = tf.keras.Sequential([
-    tf.keras.layers.RandomFlip("horizontal"),
-    tf.keras.layers.RandomRotation(0.1),
-    tf.keras.layers.RandomZoom(0.1),
-    tf.keras.layers.RandomContrast(0.1),
-])
-
-base_model = DenseNet121(
-    weights='imagenet',
-    include_top=False,
-    input_shape=(224, 224, 3)
-)
-
-inputs = tf.keras.Input(shape=(224,224,3))
-
-x = data_augmentation(inputs)
-
-x = base_model.output
-x = GlobalAveragePooling2D()(x)
-x = Dense(512, activation='relu')(x) # Added another Dense layer
-x = Dense(256, activation='relu')(x) # Existing Dense layer
-predictions = Dense(1, activation='sigmoid')(x) # Output layer for binary classification
-
-with strategy.scope(): # Use all gpus
-  model = Model(inputs=base_model.input, outputs=predictions)
-  model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
-
-"""## 4. Data Generators
-
-I will prepare data generators for training and validation. The training generator will include data augmentation and preprocessing, while the validation generator will only preprocess the images.
-"""
-
-from tensorflow.keras.preprocessing.image import ImageDataGenerator
-
-# Image dimensions
-IMG_WIDTH = 224
-IMG_HEIGHT = 224
-
-# Data generators
-train_datagen = ImageDataGenerator(
-    preprocessing_function=preprocess_input,
-    rotation_range=20,
-    width_shift_range=0.2,
-    height_shift_range=0.2,
-    shear_range=0.2,
-    zoom_range=0.2,
-    horizontal_flip=True,
-    fill_mode='nearest'
-)
-
-val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
-
-# Flow from dataframe
-train_generator = train_datagen.flow_from_dataframe(
-    dataframe=train_df,
-    x_col='path',
-    y_col='target',
-    target_size=(IMG_WIDTH, IMG_HEIGHT),
-    batch_size=32,
-    class_mode='binary',
-    seed=42
-)
-
-val_generator = val_datagen.flow_from_dataframe(
-    dataframe=val_df,
-    x_col='path',
-    y_col='target',
-    target_size=(IMG_WIDTH, IMG_HEIGHT),
-    batch_size=32,
-    class_mode='binary',
-    seed=42
-)
-
-"""## 6. Train the Model"""
-
-from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
-
-# Callbacks
-early_stopping = EarlyStopping(
-    monitor='val_loss',
-    patience=10,
-    restore_best_weights=True
-)
-
-model_checkpoint = ModelCheckpoint(
-    'best_model.keras',
-    monitor='val_accuracy',
-    save_best_only=True,
-    mode='max'
-)
-
-# Train the model
-history = model.fit(
-    train_generator,
-    epochs=3, # You can adjust the number of epochs
-    validation_data=val_generator,
-    callbacks=[early_stopping, model_checkpoint],
-    class_weight=class_weights # Use class weights to handle imbalance
-)
-
-"""## 7. Evaluation
-
-### Load best model
-"""
-
-from tensorflow.keras.models import load_model
-
-best_model = load_model('best_model.keras')
-
-"""### Evaluate on validation set"""
-
-loss, accuracy = best_model.evaluate(val_generator)
-print(f"Validation Loss: {loss:.4f}")
-print(f"Validation Accuracy: {accuracy:.4f}")
-
-"""### Predictions and Classification Report"""
-
-from sklearn.metrics import classification_report, confusion_matrix
-
-val_generator.reset() # Reset generator to ensure correct order
-y_pred_probs = best_model.predict(val_generator)
-y_pred = (y_pred_probs > 0.5).astype(int)
-
-y_true = val_generator.classes
-
-print("Classification Report:")
-print(classification_report(y_true, y_pred))
-
-"""### Confusion Matrix"""
-
-cm = confusion_matrix(y_true, y_pred)
-plt.figure(figsize=(8, 6))
-sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Benign', 'Malignant'], yticklabels=['Benign', 'Malignant'])
-plt.title('Confusion Matrix')
-plt.xlabel('Predicted Label')
-plt.ylabel('True Label')
-plt.show()
-
-"""### Training History Plots"""
-
-plt.figure(figsize=(12, 5))
-
-# Plot training & validation accuracy values
-plt.subplot(1, 2, 1)
-plt.plot(history.history['accuracy'])
-plt.plot(history.history['val_accuracy'])
-plt.title('Model Accuracy')
-plt.ylabel('Accuracy')
-plt.xlabel('Epoch')
-plt.legend(['Train', 'Validation'], loc='upper left')
-
-# Plot training & validation loss values
-plt.subplot(1, 2, 2)
-plt.plot(history.history['loss'])
-plt.plot(history.history['val_loss'])
-plt.title('Model Loss')
-plt.ylabel('Loss')
-plt.xlabel('Epoch')
-plt.legend(['Train', 'Validation'], loc='upper left')
-
-plt.tight_layout()
-plt.show()
Author	SHA1	Message	Date
BinarySandia04	9478b2427e	Test	2026-06-03 18:08:13 +02:00
BinarySandia04	d616ec2168	Final final	2026-05-26 16:10:39 +02:00
BinarySandia04	637aa2e380	We are done	2026-05-26 16:00:50 +02:00
BinarySandia04	dd3bd507b1	Debug	2026-05-26 15:09:15 +02:00
BinarySandia04	b3d9c2e76e	Si	2026-05-26 15:07:02 +02:00
BinarySandia04	f62842d23e	Whatever	2026-05-26 14:31:57 +02:00
BinarySandia04	1a7deafab7	Test notebook run	2026-05-26 14:14:30 +02:00
BinarySandia04	b930018981	Final2	2026-05-25 20:29:44 +02:00
BinarySandia04	9dc29f19e4	Final?	2026-05-25 20:25:44 +02:00
BinarySandia04	b2c0110d11	ok no	2026-05-21 17:53:27 +02:00
BinarySandia04	4b56e164a1	Parellization ok!	2026-05-21 17:45:28 +02:00
BinarySandia04	2174ab1fb0	Ok now we want to get better accuracy	2026-05-21 17:33:34 +02:00
BinarySandia04	ba3d03b186	now yes	2026-05-21 15:42:07 +02:00
BinarySandia04	e1a8d38cee	typo	2026-05-21 15:10:30 +02:00
BinarySandia04	3162934b99	ok	2026-05-21 15:05:51 +02:00
BinarySandia04	6a25385409	Test	2026-05-21 14:59:29 +02:00