import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = "Qwen/Qwen3.5-0.8B"
cache_path = "./.cache"

# Load tokenizer + model
tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=cache_path)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    cache_dir=cache_path,
)

model.eval()
print(model)

# -----------------------------
# 1. Replace ALL linear layers
# -----------------------------
def replace_weights_with_random(module):
    for name, child in module.named_children():

        # If it's a Linear layer, replace weights
        if isinstance(child, nn.Linear):
            with torch.no_grad():
                child.weight.copy_(
                    torch.randn_like(child.weight)
                )
                if child.bias is not None:
                    child.bias.copy_(
                        torch.randn_like(child.bias)
                    )

        else:
            # recurse into submodules
            replace_weights_with_random(child)

replace_weights_with_random(model)

print("All linear layer weights replaced with random tensors.")

# -----------------------------
# 2. Run inference
# -----------------------------
prompt = "Explain tensor networks in simple terms."

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

with torch.no_grad():
    output = model.generate(
        **inputs,
        max_new_tokens=500,
        temperature=1.0,
        do_sample=True
    )

print("\n=== OUTPUT ===\n")
print(tokenizer.decode(output[0], skip_special_tokens=True))

model.save_pretrained("./.cache/qwen_random")
tokenizer.save_pretrained("./.cache/qwen_random")