"""
NodeMind v4 — verify the size compression yourself.

Doesn't load any NodeMind internals. Reads file sizes from disk and
shows the math behind the compression claims.
"""
import os
import json

print("=" * 60)
print("NodeMind v4 — file size comparison")
print("=" * 60)

files = [
    ("float32_rag_bgem3.pkl",     "Float32 RAG baseline (BGE-M3, what standard RAG keeps)"),
    ("float32_rag_bgebase.pkl",   "Float32 RAG baseline (BGE-base)"),
    ("nodemind_index_32x.pkl",    "NodeMind 32x  binary index (BGE-M3 1024-bit fingerprint)"),
    ("nodemind_index_96x.pkl",    "NodeMind 96x  binary index (BGE-base 256-bit fingerprint)"),
]
sizes = {}
for fname, desc in files:
    if os.path.exists(fname):
        s = os.path.getsize(fname)
        sizes[fname] = s
        print(f"  {fname:30s}  {s/1e6:8.2f} MB   {desc}")
    else:
        print(f"  {fname:30s}  MISSING (download first)")

print()
print("=" * 60)
print("Compression ratios (math)")
print("=" * 60)

if "float32_rag_bgem3.pkl" in sizes and "nodemind_index_32x.pkl" in sizes:
    ratio = sizes["float32_rag_bgem3.pkl"] / sizes["nodemind_index_32x.pkl"]
    print(f"  Float32 RAG / NodeMind 32x = {ratio:.1f}x smaller")

if "float32_rag_bgebase.pkl" in sizes and "nodemind_index_96x.pkl" in sizes:
    ratio = sizes["float32_rag_bgebase.pkl"] / sizes["nodemind_index_96x.pkl"]
    print(f"  Float32 RAG (bge-base) / NodeMind 96x = {ratio:.1f}x smaller")

print()
print("=" * 60)
print("Recall numbers (from benchmark_results.json)")
print("=" * 60)
with open("benchmark_results.json") as f:
    r = json.load(f)
print(f"  Combined corpus: {r['corpus_size']:,} chunks  /  {r['n_queries']:,} queries")
for cell, res in r.items():
    if not isinstance(res, dict) or "encoder" not in res: continue
    print(f"\n  [{cell}]  encoder={res['encoder']}  fp={res['fingerprint_bits']}-bit")
    for m in ["nodemind_a","nodemind_b","faiss_fixed_binary","hnsw_float32","float32_cosine"]:
        v = res.get(m)
        if isinstance(v, dict):
            print(f"    {m:25s} R@10={v['R@10']:.3f}  NDCG@10={v['NDCG@10']:.3f}  MRR@10={v['MRR@10']:.3f}")

print()
print("=" * 60)
print("Want to run queries against the float32 baseline yourself?")
print("=" * 60)
print("""
  import pickle, numpy as np
  from sentence_transformers import SentenceTransformer

  # Load the float32 RAG index
  with open("float32_rag_bgem3.pkl", "rb") as f:
      rag = pickle.load(f)
  embs = rag["embeddings"]                    # (75128, 1024) float32

  # Encode your query the same way (BGE-M3, normalized)
  enc = SentenceTransformer("BAAI/bge-m3")
  q   = enc.encode(["your query here"], normalize_embeddings=True)[0]

  # Cosine top-k
  top = np.argsort(-(embs @ q))[:5]
  print(top)
""")
