[
  {
    "item_id": "txt_000",
    "modality": "text",
    "category": "astronomy",
    "content": "The Milky Way galaxy contains between 100 and 400 billion stars, spread across a disk roughly 100,000 light-years in diameter. Our solar system sits about 26,000 light-years from the galactic centre.",
    "label": "astronomy"
  },
  {
    "item_id": "txt_001",
    "modality": "text",
    "category": "astronomy",
    "content": "Black holes form when massive stars collapse under their own gravity at the end of their life cycle. The event horizon marks the boundary beyond which nothing, not even light, can escape.",
    "label": "astronomy"
  },
  {
    "item_id": "txt_002",
    "modality": "text",
    "category": "astronomy",
    "content": "The James Webb Space Telescope observes the universe in infrared light, allowing it to peer through dust clouds and see some of the earliest galaxies formed after the Big Bang.",
    "label": "astronomy"
  },
  {
    "item_id": "txt_003",
    "modality": "text",
    "category": "astronomy",
    "content": "Neutron stars are the remnants of supernova explosions. They pack more mass than the Sun into a sphere roughly 20 kilometres across, making them the densest objects observable in the universe.",
    "label": "astronomy"
  },
  {
    "item_id": "txt_004",
    "modality": "text",
    "category": "astronomy",
    "content": "Dark matter makes up approximately 27 percent of the universe's total mass-energy content. It does not emit, absorb, or reflect light, making it detectable only through its gravitational effects.",
    "label": "astronomy"
  },
  {
    "item_id": "txt_005",
    "modality": "text",
    "category": "astronomy",
    "content": "The cosmic microwave background radiation is the thermal remnant of the Big Bang, emitted approximately 380,000 years after the universe began when electrons and protons combined to form neutral hydrogen.",
    "label": "astronomy"
  },
  {
    "item_id": "txt_006",
    "modality": "text",
    "category": "astronomy",
    "content": "Jupiter's Great Red Spot is a giant storm that has persisted for over 350 years. It is wide enough to contain two to three Earths and winds within it reach speeds of up to 640 kilometres per hour.",
    "label": "astronomy"
  },
  {
    "item_id": "txt_007",
    "modality": "text",
    "category": "astronomy",
    "content": "The Drake Equation estimates the number of active communicating civilisations in the Milky Way by multiplying factors including stellar formation rates, fraction of stars with planets, and probability of intelligent life.",
    "label": "astronomy"
  },
  {
    "item_id": "txt_008",
    "modality": "text",
    "category": "astronomy",
    "content": "Pulsars are rapidly rotating neutron stars that emit beams of electromagnetic radiation. They are so precise in their rotation that they rival atomic clocks in timekeeping accuracy.",
    "label": "astronomy"
  },
  {
    "item_id": "txt_009",
    "modality": "text",
    "category": "astronomy",
    "content": "The Hubble constant measures the rate of expansion of the universe. Current measurements place it at approximately 70 kilometres per second per megaparsec, though tension between different measurement methods remains unresolved.",
    "label": "astronomy"
  },
  {
    "item_id": "txt_010",
    "modality": "text",
    "category": "biology",
    "content": "DNA replication is a semiconservative process where each strand of the double helix serves as a template for a new complementary strand. DNA polymerase adds nucleotides in the 5 prime to 3 prime direction.",
    "label": "biology"
  },
  {
    "item_id": "txt_011",
    "modality": "text",
    "category": "biology",
    "content": "Mitochondria are membrane-bound organelles that generate most of the cell's supply of ATP through oxidative phosphorylation. They contain their own circular DNA, evidence of their ancient bacterial origin.",
    "label": "biology"
  },
  {
    "item_id": "txt_012",
    "modality": "text",
    "category": "biology",
    "content": "CRISPR-Cas9 is a molecular tool that allows precise editing of DNA sequences. It uses a guide RNA to direct the Cas9 endonuclease to a specific location in the genome where it makes a double-strand break.",
    "label": "biology"
  },
  {
    "item_id": "txt_013",
    "modality": "text",
    "category": "biology",
    "content": "Neurons communicate via electrochemical signals called action potentials. When a neuron fires, sodium ions rush into the cell through voltage-gated channels, reversing the membrane potential from negative to positive.",
    "label": "biology"
  },
  {
    "item_id": "txt_014",
    "modality": "text",
    "category": "biology",
    "content": "Photosynthesis in plants occurs in two stages: the light-dependent reactions in the thylakoid membranes capture solar energy to produce ATP and NADPH, while the Calvin cycle in the stroma fixes carbon dioxide into glucose.",
    "label": "biology"
  },
  {
    "item_id": "txt_015",
    "modality": "text",
    "category": "biology",
    "content": "Viruses are not considered living organisms because they cannot reproduce independently. They hijack the machinery of host cells to replicate, inserting their genetic material and redirecting cellular resources.",
    "label": "biology"
  },
  {
    "item_id": "txt_016",
    "modality": "text",
    "category": "biology",
    "content": "The human immune system has two major branches: the innate immune system provides a rapid non-specific defence, while the adaptive immune system develops specific antibodies against particular pathogens.",
    "label": "biology"
  },
  {
    "item_id": "txt_017",
    "modality": "text",
    "category": "biology",
    "content": "Epigenetics studies heritable changes in gene expression that do not involve changes to the DNA sequence itself. Methylation of cytosine bases and modification of histone proteins are key epigenetic mechanisms.",
    "label": "biology"
  },
  {
    "item_id": "txt_018",
    "modality": "text",
    "category": "biology",
    "content": "Evolution by natural selection requires variation in traits, heritability of those traits, and differential reproductive success. Over many generations this leads to populations better adapted to their environment.",
    "label": "biology"
  },
  {
    "item_id": "txt_019",
    "modality": "text",
    "category": "biology",
    "content": "The human gut microbiome contains trillions of bacteria, fungi, and viruses. It plays a critical role in digestion, immunity, and has been linked to mental health through the gut-brain axis.",
    "label": "biology"
  },
  {
    "item_id": "txt_020",
    "modality": "text",
    "category": "computing",
    "content": "Transformer neural networks use self-attention mechanisms to weigh the importance of different tokens in a sequence when producing representations. This allows them to capture long-range dependencies efficiently.",
    "label": "computing"
  },
  {
    "item_id": "txt_021",
    "modality": "text",
    "category": "computing",
    "content": "Quantum computing leverages quantum mechanical phenomena such as superposition and entanglement to perform certain computations exponentially faster than classical computers.",
    "label": "computing"
  },
  {
    "item_id": "txt_022",
    "modality": "text",
    "category": "computing",
    "content": "The RSA cryptographic algorithm relies on the difficulty of factoring large composite numbers. A public key is used to encrypt messages while only the holder of the private key can decrypt them.",
    "label": "computing"
  },
  {
    "item_id": "txt_023",
    "modality": "text",
    "category": "computing",
    "content": "Gradient descent is an optimisation algorithm used to minimise a loss function by iteratively adjusting parameters in the direction of steepest descent, determined by the negative gradient.",
    "label": "computing"
  },
  {
    "item_id": "txt_024",
    "modality": "text",
    "category": "computing",
    "content": "Database indexing structures like B-trees maintain sorted data in a balanced tree format, enabling search, insertion, and deletion operations in O(log n) time regardless of dataset size.",
    "label": "computing"
  },
  {
    "item_id": "txt_025",
    "modality": "text",
    "category": "computing",
    "content": "Containerisation using Docker packages applications and their dependencies into isolated containers that can run consistently across different computing environments.",
    "label": "computing"
  },
  {
    "item_id": "txt_026",
    "modality": "text",
    "category": "computing",
    "content": "The CAP theorem states that a distributed data store cannot simultaneously guarantee consistency, availability, and partition tolerance. In the presence of a network partition, a trade-off must be made.",
    "label": "computing"
  },
  {
    "item_id": "txt_027",
    "modality": "text",
    "category": "computing",
    "content": "Convolutional neural networks use learnable filters that slide across input images to detect spatial features. Pooling layers reduce spatial dimensions while retaining the most important activations.",
    "label": "computing"
  },
  {
    "item_id": "txt_028",
    "modality": "text",
    "category": "computing",
    "content": "Graph neural networks operate on graph-structured data by iteratively aggregating information from neighbouring nodes. They are used in social network analysis, molecular property prediction, and recommendation systems.",
    "label": "computing"
  },
  {
    "item_id": "txt_029",
    "modality": "text",
    "category": "computing",
    "content": "Zero-knowledge proofs allow one party to prove to another that a statement is true without revealing any information beyond the validity of the statement itself.",
    "label": "computing"
  },
  {
    "item_id": "txt_030",
    "modality": "text",
    "category": "medicine",
    "content": "mRNA vaccines encode instructions for cells to produce a harmless piece of a pathogen, training the immune system to recognise and fight it. Unlike traditional vaccines they do not use live or inactivated virus.",
    "label": "medicine"
  },
  {
    "item_id": "txt_031",
    "modality": "text",
    "category": "medicine",
    "content": "Alzheimer's disease is characterised by the accumulation of amyloid beta plaques and tau protein tangles in the brain, leading to progressive neurodegeneration and cognitive decline.",
    "label": "medicine"
  },
  {
    "item_id": "txt_032",
    "modality": "text",
    "category": "medicine",
    "content": "CRISPR gene therapy has shown promise in treating sickle cell disease by editing the patient's own stem cells to produce functional haemoglobin, effectively curing the condition in clinical trials.",
    "label": "medicine"
  },
  {
    "item_id": "txt_033",
    "modality": "text",
    "category": "medicine",
    "content": "Antibiotic resistance occurs when bacteria evolve mechanisms to survive exposure to drugs designed to kill them. Overuse and misuse of antibiotics accelerates this process, threatening modern medicine.",
    "label": "medicine"
  },
  {
    "item_id": "txt_034",
    "modality": "text",
    "category": "medicine",
    "content": "The blood-brain barrier is a selective semipermeable membrane that separates circulating blood from the brain. It protects the brain from pathogens and toxins but also limits drug delivery to the central nervous system.",
    "label": "medicine"
  },
  {
    "item_id": "txt_035",
    "modality": "text",
    "category": "medicine",
    "content": "CAR-T cell therapy engineers a patient's own T cells to express chimeric antigen receptors targeting cancer cells. It has achieved remarkable remission rates in certain blood cancers.",
    "label": "medicine"
  },
  {
    "item_id": "txt_036",
    "modality": "text",
    "category": "medicine",
    "content": "Type 2 diabetes involves insulin resistance where cells fail to respond effectively to insulin. It is strongly associated with obesity, physical inactivity, and is managed through diet, exercise, and medication.",
    "label": "medicine"
  },
  {
    "item_id": "txt_037",
    "modality": "text",
    "category": "medicine",
    "content": "The placebo effect demonstrates that belief in a treatment can produce measurable physiological changes. It is a key consideration in clinical trial design, necessitating blinded and placebo-controlled studies.",
    "label": "medicine"
  },
  {
    "item_id": "txt_038",
    "modality": "text",
    "category": "medicine",
    "content": "Organ transplantation requires immunosuppressive therapy to prevent rejection. The immune system recognises donor tissue as foreign and attacks it unless suppressed by drugs like tacrolimus and cyclosporine.",
    "label": "medicine"
  },
  {
    "item_id": "txt_039",
    "modality": "text",
    "category": "medicine",
    "content": "Personalised medicine tailors treatment to individual patients based on their genetic profile, lifestyle, and environment. Pharmacogenomics studies how genes affect drug response and guides dosage decisions.",
    "label": "medicine"
  },
  {
    "item_id": "txt_040",
    "modality": "text",
    "category": "climate",
    "content": "The greenhouse effect occurs when atmospheric gases like carbon dioxide, methane, and water vapour trap heat radiating from Earth's surface. Human activity has significantly increased CO2 concentrations since industrialisation.",
    "label": "climate"
  },
  {
    "item_id": "txt_041",
    "modality": "text",
    "category": "climate",
    "content": "Ocean acidification is caused by the absorption of atmospheric carbon dioxide, forming carbonic acid. This lowers ocean pH, threatening marine ecosystems particularly coral reefs and shell-forming organisms.",
    "label": "climate"
  },
  {
    "item_id": "txt_042",
    "modality": "text",
    "category": "climate",
    "content": "The Antarctic ice sheet holds approximately 26.5 million cubic kilometres of ice. Its complete melting would raise global sea levels by around 58 metres, though this would take thousands of years.",
    "label": "climate"
  },
  {
    "item_id": "txt_043",
    "modality": "text",
    "category": "climate",
    "content": "Renewable energy sources including solar, wind, and hydroelectric power produce electricity without direct greenhouse gas emissions. Their falling costs have made them increasingly competitive with fossil fuels.",
    "label": "climate"
  },
  {
    "item_id": "txt_044",
    "modality": "text",
    "category": "climate",
    "content": "The Paris Agreement set a goal of limiting global average temperature increase to 1.5 degrees Celsius above pre-industrial levels. Countries submit nationally determined contributions outlining their emissions reduction plans.",
    "label": "climate"
  },
  {
    "item_id": "txt_045",
    "modality": "text",
    "category": "climate",
    "content": "Permafrost covers approximately 25 percent of the Northern Hemisphere's land surface. As it thaws due to warming temperatures it releases stored carbon dioxide and methane, creating a positive feedback loop.",
    "label": "climate"
  },
  {
    "item_id": "txt_046",
    "modality": "text",
    "category": "climate",
    "content": "Deforestation contributes approximately 10 percent of global greenhouse gas emissions. Tropical forests are particularly critical carbon sinks, with the Amazon alone storing around 150 billion tonnes of carbon.",
    "label": "climate"
  },
  {
    "item_id": "txt_047",
    "modality": "text",
    "category": "climate",
    "content": "Solar geoengineering proposals such as stratospheric aerosol injection aim to reflect sunlight back into space to cool the planet. These approaches carry significant risks and ethical concerns about unilateral deployment.",
    "label": "climate"
  },
  {
    "item_id": "txt_048",
    "modality": "text",
    "category": "climate",
    "content": "Sea level rise is caused by thermal expansion of warming ocean water and melting land ice. Current rates of rise are around 3.7 millimetres per year and are accelerating.",
    "label": "climate"
  },
  {
    "item_id": "txt_049",
    "modality": "text",
    "category": "climate",
    "content": "Electric vehicles reduce tailpipe emissions but their overall environmental impact depends on the electricity source. In regions with high renewable energy penetration they offer significant lifecycle emissions reductions.",
    "label": "climate"
  },
  {
    "item_id": "img_000",
    "modality": "image",
    "category": "cat",
    "content": "/root/multimodal_benchmark/corpus/images/cat_00.jpg",
    "caption": "orange tabby cat with bright green eyes looking at camera",
    "label": "cat",
    "photo_id": "photo-1514888286974-6c03e2ca1dba"
  },
  {
    "item_id": "img_001",
    "modality": "image",
    "category": "cat",
    "content": "/root/multimodal_benchmark/corpus/images/cat_01.jpg",
    "caption": "sleek black cat sitting on a white surface indoors",
    "label": "cat",
    "photo_id": "photo-1533743983669-94fa5c4338ec"
  },
  {
    "item_id": "img_002",
    "modality": "image",
    "category": "cat",
    "content": "/root/multimodal_benchmark/corpus/images/cat_02.jpg",
    "caption": "grey and white cat lying on a wooden floor in sunlight",
    "label": "cat",
    "photo_id": "photo-1574158622682-e40e69881006"
  },
  {
    "item_id": "img_003",
    "modality": "image",
    "category": "cat",
    "content": "/root/multimodal_benchmark/corpus/images/cat_03.jpg",
    "caption": "fluffy white cat curled up sleeping on a soft blanket",
    "label": "cat",
    "photo_id": "photo-1495360010541-f48722b34f7d"
  },
  {
    "item_id": "img_004",
    "modality": "image",
    "category": "cat",
    "content": "/root/multimodal_benchmark/corpus/images/cat_04.jpg",
    "caption": "small kitten with blue eyes looking upward",
    "label": "cat",
    "photo_id": "photo-1543466835-00a7907e9de1"
  },
  {
    "item_id": "img_005",
    "modality": "image",
    "category": "cat",
    "content": "/root/multimodal_benchmark/corpus/images/cat_05.jpg",
    "caption": "ginger tabby cat stretching on a couch",
    "label": "cat",
    "photo_id": "photo-1425082661705-1834bfd09dca"
  },
  {
    "item_id": "img_006",
    "modality": "image",
    "category": "cat",
    "content": "/root/multimodal_benchmark/corpus/images/cat_06.jpg",
    "caption": "brown striped cat sitting outdoors in grass",
    "label": "cat",
    "photo_id": "photo-1592194996308-7b43878e84a6"
  },
  {
    "item_id": "img_007",
    "modality": "image",
    "category": "cat",
    "content": "/root/multimodal_benchmark/corpus/images/cat_07.jpg",
    "caption": "white and grey cat with curious expression on a shelf",
    "label": "cat",
    "photo_id": "photo-1561948955-570b270e7c36"
  },
  {
    "item_id": "img_008",
    "modality": "image",
    "category": "cat",
    "content": "/root/multimodal_benchmark/corpus/images/cat_08.jpg",
    "caption": "calico cat with orange black white markings on a bed",
    "label": "cat",
    "photo_id": "photo-1596854407944-bf87f6fdd49e"
  },
  {
    "item_id": "img_009",
    "modality": "image",
    "category": "cat",
    "content": "/root/multimodal_benchmark/corpus/images/cat_09.jpg",
    "caption": "cat silhouette against window light looking outside",
    "label": "cat",
    "photo_id": "photo-1548247416-ec66f4900b2e"
  },
  {
    "item_id": "img_010",
    "modality": "image",
    "category": "dog",
    "content": "/root/multimodal_benchmark/corpus/images/dog_00.jpg",
    "caption": "yellow labrador retriever dog sitting in a sunny park",
    "label": "dog",
    "photo_id": "photo-1587300003388-59208cc962cb"
  },
  {
    "item_id": "img_011",
    "modality": "image",
    "category": "dog",
    "content": "/root/multimodal_benchmark/corpus/images/dog_01.jpg",
    "caption": "golden retriever dog running through green grass",
    "label": "dog",
    "photo_id": "photo-1552053831-71594a27632d"
  },
  {
    "item_id": "img_012",
    "modality": "image",
    "category": "dog",
    "content": "/root/multimodal_benchmark/corpus/images/dog_02.jpg",
    "caption": "husky dog with blue eyes sitting in snow",
    "label": "dog",
    "photo_id": "photo-1477884213360-7e9d7dcc1e48"
  },
  {
    "item_id": "img_013",
    "modality": "image",
    "category": "dog",
    "content": "/root/multimodal_benchmark/corpus/images/dog_03.jpg",
    "caption": "beagle dog sniffing ground in an outdoor park",
    "label": "dog",
    "photo_id": "photo-1558788353-f76d92427f16"
  },
  {
    "item_id": "img_014",
    "modality": "image",
    "category": "dog",
    "content": "/root/multimodal_benchmark/corpus/images/dog_04.jpg",
    "caption": "white fluffy poodle dog sitting on a red chair",
    "label": "dog",
    "photo_id": "photo-1537151625747-768eb6cf92b2"
  },
  {
    "item_id": "img_015",
    "modality": "image",
    "category": "dog",
    "content": "/root/multimodal_benchmark/corpus/images/dog_05.jpg",
    "caption": "dalmatian dog with black and white spots outdoors",
    "label": "dog",
    "photo_id": "photo-1588943211346-0908a1fb0b01"
  },
  {
    "item_id": "img_016",
    "modality": "image",
    "category": "dog",
    "content": "/root/multimodal_benchmark/corpus/images/dog_06.jpg",
    "caption": "brown and black german shepherd dog looking alert",
    "label": "dog",
    "photo_id": "photo-1601979031925-424e53b6caaa"
  },
  {
    "item_id": "img_017",
    "modality": "image",
    "category": "dog",
    "content": "/root/multimodal_benchmark/corpus/images/dog_07.jpg",
    "caption": "old english bulldog lying on a wooden deck",
    "label": "dog",
    "photo_id": "photo-1518717758536-85ae29035b6d"
  },
  {
    "item_id": "img_018",
    "modality": "image",
    "category": "dog",
    "content": "/root/multimodal_benchmark/corpus/images/dog_08.jpg",
    "caption": "border collie dog jumping to catch a frisbee",
    "label": "dog",
    "photo_id": "photo-1450778869180-41d0601e046e"
  },
  {
    "item_id": "img_019",
    "modality": "image",
    "category": "dog",
    "content": "/root/multimodal_benchmark/corpus/images/dog_09.jpg",
    "caption": "tiny chihuahua dog in a cozy sweater",
    "label": "dog",
    "photo_id": "photo-1583511655857-d19b40a7a54e"
  },
  {
    "item_id": "img_020",
    "modality": "image",
    "category": "car",
    "content": "/root/multimodal_benchmark/corpus/images/car_00.jpg",
    "caption": "silver sports car driving on an open mountain road",
    "label": "car",
    "photo_id": "photo-1494976388531-d1058494cdd8"
  },
  {
    "item_id": "img_021",
    "modality": "image",
    "category": "car",
    "content": "/root/multimodal_benchmark/corpus/images/car_01.jpg",
    "caption": "red luxury sports car parked in front of a building",
    "label": "car",
    "photo_id": "photo-1503376780353-7e6692767b70"
  },
  {
    "item_id": "img_022",
    "modality": "image",
    "category": "car",
    "content": "/root/multimodal_benchmark/corpus/images/car_02.jpg",
    "caption": "black sports car on a racetrack at high speed",
    "label": "car",
    "photo_id": "photo-1555215695-3004980ad54e"
  },
  {
    "item_id": "img_023",
    "modality": "image",
    "category": "car",
    "content": "/root/multimodal_benchmark/corpus/images/car_03.jpg",
    "caption": "white electric car plugged in charging at station",
    "label": "car",
    "photo_id": "photo-1532581291347-9c39cf10a73c"
  },
  {
    "item_id": "img_024",
    "modality": "image",
    "category": "car",
    "content": "/root/multimodal_benchmark/corpus/images/car_04.jpg",
    "caption": "orange vintage classic car at a car show",
    "label": "car",
    "photo_id": "photo-1533473359331-0135ef1b58bf"
  },
  {
    "item_id": "img_025",
    "modality": "image",
    "category": "car",
    "content": "/root/multimodal_benchmark/corpus/images/car_05.jpg",
    "caption": "yellow convertible car on a sunny coastal road",
    "label": "car",
    "photo_id": "photo-1568605117036-5fe5e7bab0b7"
  },
  {
    "item_id": "img_026",
    "modality": "image",
    "category": "car",
    "content": "/root/multimodal_benchmark/corpus/images/car_06.jpg",
    "caption": "dark blue pickup truck on a dirt road in mountains",
    "label": "car",
    "photo_id": "photo-1549317661-bd32c8ce0db2"
  },
  {
    "item_id": "img_027",
    "modality": "image",
    "category": "car",
    "content": "/root/multimodal_benchmark/corpus/images/car_07.jpg",
    "caption": "green hatchback car parked on a city street",
    "label": "car",
    "photo_id": "photo-1606016159991-dfe4f2746ad5"
  },
  {
    "item_id": "img_028",
    "modality": "image",
    "category": "car",
    "content": "/root/multimodal_benchmark/corpus/images/car_08.jpg",
    "caption": "grey sedan at an urban traffic intersection",
    "label": "car",
    "photo_id": "photo-1567818735868-e71b99932e29"
  },
  {
    "item_id": "img_029",
    "modality": "image",
    "category": "car",
    "content": "/root/multimodal_benchmark/corpus/images/car_09.jpg",
    "caption": "white SUV parked in a forest campsite",
    "label": "car",
    "photo_id": "photo-1544636331-e26879cd4d9b"
  },
  {
    "item_id": "img_030",
    "modality": "image",
    "category": "food",
    "content": "/root/multimodal_benchmark/corpus/images/food_00.jpg",
    "caption": "fresh hot pizza with melted cheese and tomato sauce",
    "label": "food",
    "photo_id": "photo-1565299624946-b28f40a0ae38"
  },
  {
    "item_id": "img_031",
    "modality": "image",
    "category": "food",
    "content": "/root/multimodal_benchmark/corpus/images/food_01.jpg",
    "caption": "stack of golden fluffy pancakes with maple syrup",
    "label": "food",
    "photo_id": "photo-1567620905732-2d1ec7ab7445"
  },
  {
    "item_id": "img_032",
    "modality": "image",
    "category": "food",
    "content": "/root/multimodal_benchmark/corpus/images/food_02.jpg",
    "caption": "colorful bowl of fresh fruit salad with strawberries",
    "label": "food",
    "photo_id": "photo-1540189549336-e6e99c3679fe"
  },
  {
    "item_id": "img_033",
    "modality": "image",
    "category": "food",
    "content": "/root/multimodal_benchmark/corpus/images/food_03.jpg",
    "caption": "healthy green salad with cherry tomatoes and avocado",
    "label": "food",
    "photo_id": "photo-1512621776951-a57141f2eefd"
  },
  {
    "item_id": "img_034",
    "modality": "image",
    "category": "food",
    "content": "/root/multimodal_benchmark/corpus/images/food_04.jpg",
    "caption": "grilled salmon fillet with herbs and lemon",
    "label": "food",
    "photo_id": "photo-1569050467447-ce54b3bbc37d"
  },
  {
    "item_id": "img_035",
    "modality": "image",
    "category": "food",
    "content": "/root/multimodal_benchmark/corpus/images/food_05.jpg",
    "caption": "slice of rich chocolate cake with cream frosting",
    "label": "food",
    "photo_id": "photo-1578985545062-69928b1d9587"
  },
  {
    "item_id": "img_036",
    "modality": "image",
    "category": "food",
    "content": "/root/multimodal_benchmark/corpus/images/food_06.jpg",
    "caption": "steaming bowl of ramen noodles with soft boiled egg",
    "label": "food",
    "photo_id": "photo-1569718212165-3a8278d5f624"
  },
  {
    "item_id": "img_037",
    "modality": "image",
    "category": "food",
    "content": "/root/multimodal_benchmark/corpus/images/food_07.jpg",
    "caption": "classic american cheeseburger with lettuce and tomato",
    "label": "food",
    "photo_id": "photo-1550547660-d9450f859349"
  },
  {
    "item_id": "img_038",
    "modality": "image",
    "category": "food",
    "content": "/root/multimodal_benchmark/corpus/images/food_08.jpg",
    "caption": "assorted sushi rolls on a wooden serving board",
    "label": "food",
    "photo_id": "photo-1611270629569-8b357cb88da9"
  },
  {
    "item_id": "img_039",
    "modality": "image",
    "category": "food",
    "content": "/root/multimodal_benchmark/corpus/images/food_09.jpg",
    "caption": "full english breakfast plate with eggs bacon and toast",
    "label": "food",
    "photo_id": "photo-1484723091739-30a097e8f929"
  },
  {
    "item_id": "img_040",
    "modality": "image",
    "category": "nature",
    "content": "/root/multimodal_benchmark/corpus/images/nature_00.jpg",
    "caption": "crystal clear mountain lake reflecting snow capped peaks",
    "label": "nature",
    "photo_id": "photo-1506905925346-21bda4d32df4"
  },
  {
    "item_id": "img_041",
    "modality": "image",
    "category": "nature",
    "content": "/root/multimodal_benchmark/corpus/images/nature_01.jpg",
    "caption": "dense green forest with sunlight streaming through tall trees",
    "label": "nature",
    "photo_id": "photo-1448375240586-882707db888b"
  },
  {
    "item_id": "img_042",
    "modality": "image",
    "category": "nature",
    "content": "/root/multimodal_benchmark/corpus/images/nature_02.jpg",
    "caption": "tropical sandy beach with clear turquoise ocean waves",
    "label": "nature",
    "photo_id": "photo-1507525428034-b723cf961d3e"
  },
  {
    "item_id": "img_043",
    "modality": "image",
    "category": "nature",
    "content": "/root/multimodal_benchmark/corpus/images/nature_03.jpg",
    "caption": "vast field of bright yellow sunflowers under blue sky",
    "label": "nature",
    "photo_id": "photo-1470770841072-f978cf4d019e"
  },
  {
    "item_id": "img_044",
    "modality": "image",
    "category": "nature",
    "content": "/root/multimodal_benchmark/corpus/images/nature_04.jpg",
    "caption": "powerful waterfall cascading over dark mossy rocks",
    "label": "nature",
    "photo_id": "photo-1432405972618-c60b0225b8f9"
  },
  {
    "item_id": "img_045",
    "modality": "image",
    "category": "nature",
    "content": "/root/multimodal_benchmark/corpus/images/nature_05.jpg",
    "caption": "autumn forest trail covered in orange and red fallen leaves",
    "label": "nature",
    "photo_id": "photo-1476231682828-37e571bc172f"
  },
  {
    "item_id": "img_046",
    "modality": "image",
    "category": "nature",
    "content": "/root/multimodal_benchmark/corpus/images/nature_06.jpg",
    "caption": "rolling sand dunes in a vast desert at golden sunset",
    "label": "nature",
    "photo_id": "photo-1509316785289-025f5b846b35"
  },
  {
    "item_id": "img_047",
    "modality": "image",
    "category": "nature",
    "content": "/root/multimodal_benchmark/corpus/images/nature_07.jpg",
    "caption": "green valley with winding river seen from mountain top",
    "label": "nature",
    "photo_id": "photo-1501854140801-50d01698950b"
  },
  {
    "item_id": "img_048",
    "modality": "image",
    "category": "nature",
    "content": "/root/multimodal_benchmark/corpus/images/nature_08.jpg",
    "caption": "dramatic stormy ocean with large crashing waves",
    "label": "nature",
    "photo_id": "photo-1505118380757-91f5f5632de0"
  },
  {
    "item_id": "img_049",
    "modality": "image",
    "category": "nature",
    "content": "/root/multimodal_benchmark/corpus/images/nature_09.jpg",
    "caption": "cherry blossom trees in full pink bloom along a path",
    "label": "nature",
    "photo_id": "photo-1522383225653-ed111181a951"
  },
  {
    "item_id": "aud_000",
    "modality": "audio",
    "category": "dog_bark",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_dog_bark_00.wav",
    "label": "dog_bark",
    "label_text": "dog barking loudly outdoors clip zero",
    "clip_idx": 0
  },
  {
    "item_id": "aud_001",
    "modality": "audio",
    "category": "dog_bark",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_dog_bark_01.wav",
    "label": "dog_bark",
    "label_text": "single dog bark short burst clip one",
    "clip_idx": 1
  },
  {
    "item_id": "aud_002",
    "modality": "audio",
    "category": "dog_bark",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_dog_bark_02.wav",
    "label": "dog_bark",
    "label_text": "dog yelping and barking repeatedly clip two",
    "clip_idx": 2
  },
  {
    "item_id": "aud_003",
    "modality": "audio",
    "category": "dog_bark",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_dog_bark_03.wav",
    "label": "dog_bark",
    "label_text": "aggressive dog barking at stranger clip three",
    "clip_idx": 3
  },
  {
    "item_id": "aud_004",
    "modality": "audio",
    "category": "dog_bark",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_dog_bark_04.wav",
    "label": "dog_bark",
    "label_text": "small dog high-pitched bark clip four",
    "clip_idx": 4
  },
  {
    "item_id": "aud_005",
    "modality": "audio",
    "category": "dog_bark",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_dog_bark_05.wav",
    "label": "dog_bark",
    "label_text": "dog bark echoing in an empty room clip five",
    "clip_idx": 5
  },
  {
    "item_id": "aud_006",
    "modality": "audio",
    "category": "dog_bark",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_dog_bark_06.wav",
    "label": "dog_bark",
    "label_text": "continuous dog barking in distance clip six",
    "clip_idx": 6
  },
  {
    "item_id": "aud_007",
    "modality": "audio",
    "category": "dog_bark",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_dog_bark_07.wav",
    "label": "dog_bark",
    "label_text": "dog bark followed by growl clip seven",
    "clip_idx": 7
  },
  {
    "item_id": "aud_008",
    "modality": "audio",
    "category": "dog_bark",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_dog_bark_08.wav",
    "label": "dog_bark",
    "label_text": "playful dog barking excitedly clip eight",
    "clip_idx": 8
  },
  {
    "item_id": "aud_009",
    "modality": "audio",
    "category": "dog_bark",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_dog_bark_09.wav",
    "label": "dog_bark",
    "label_text": "dog bark warning sound clip nine",
    "clip_idx": 9
  },
  {
    "item_id": "aud_010",
    "modality": "audio",
    "category": "rain",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_rain_00.wav",
    "label": "rain",
    "label_text": "light rain falling on a rooftop clip zero",
    "clip_idx": 0
  },
  {
    "item_id": "aud_011",
    "modality": "audio",
    "category": "rain",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_rain_01.wav",
    "label": "rain",
    "label_text": "heavy rain downpour on pavement clip one",
    "clip_idx": 1
  },
  {
    "item_id": "aud_012",
    "modality": "audio",
    "category": "rain",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_rain_02.wav",
    "label": "rain",
    "label_text": "rain pattering on window glass clip two",
    "clip_idx": 2
  },
  {
    "item_id": "aud_013",
    "modality": "audio",
    "category": "rain",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_rain_03.wav",
    "label": "rain",
    "label_text": "thunder and rain storm outdoors clip three",
    "clip_idx": 3
  },
  {
    "item_id": "aud_014",
    "modality": "audio",
    "category": "rain",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_rain_04.wav",
    "label": "rain",
    "label_text": "gentle steady rain in a forest clip four",
    "clip_idx": 4
  },
  {
    "item_id": "aud_015",
    "modality": "audio",
    "category": "rain",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_rain_05.wav",
    "label": "rain",
    "label_text": "rain falling on leaves in garden clip five",
    "clip_idx": 5
  },
  {
    "item_id": "aud_016",
    "modality": "audio",
    "category": "rain",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_rain_06.wav",
    "label": "rain",
    "label_text": "intense rain on metal roof clip six",
    "clip_idx": 6
  },
  {
    "item_id": "aud_017",
    "modality": "audio",
    "category": "rain",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_rain_07.wav",
    "label": "rain",
    "label_text": "light drizzle and rain clip seven",
    "clip_idx": 7
  },
  {
    "item_id": "aud_018",
    "modality": "audio",
    "category": "rain",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_rain_08.wav",
    "label": "rain",
    "label_text": "rain shower beginning outdoors clip eight",
    "clip_idx": 8
  },
  {
    "item_id": "aud_019",
    "modality": "audio",
    "category": "rain",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_rain_09.wav",
    "label": "rain",
    "label_text": "rain recorded in an open field clip nine",
    "clip_idx": 9
  },
  {
    "item_id": "aud_020",
    "modality": "audio",
    "category": "sea_waves",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_sea_waves_00.wav",
    "label": "sea_waves",
    "label_text": "ocean waves crashing on sandy beach clip zero",
    "clip_idx": 0
  },
  {
    "item_id": "aud_021",
    "modality": "audio",
    "category": "sea_waves",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_sea_waves_01.wav",
    "label": "sea_waves",
    "label_text": "large sea waves breaking on rocks clip one",
    "clip_idx": 1
  },
  {
    "item_id": "aud_022",
    "modality": "audio",
    "category": "sea_waves",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_sea_waves_02.wav",
    "label": "sea_waves",
    "label_text": "gentle sea waves lapping shore clip two",
    "clip_idx": 2
  },
  {
    "item_id": "aud_023",
    "modality": "audio",
    "category": "sea_waves",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_sea_waves_03.wav",
    "label": "sea_waves",
    "label_text": "strong surf and ocean waves clip three",
    "clip_idx": 3
  },
  {
    "item_id": "aud_024",
    "modality": "audio",
    "category": "sea_waves",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_sea_waves_04.wav",
    "label": "sea_waves",
    "label_text": "sea waves with distant seagulls clip four",
    "clip_idx": 4
  },
  {
    "item_id": "aud_025",
    "modality": "audio",
    "category": "sea_waves",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_sea_waves_05.wav",
    "label": "sea_waves",
    "label_text": "rhythmic ocean waves on a calm day clip five",
    "clip_idx": 5
  },
  {
    "item_id": "aud_026",
    "modality": "audio",
    "category": "sea_waves",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_sea_waves_06.wav",
    "label": "sea_waves",
    "label_text": "rough sea waves during storm clip six",
    "clip_idx": 6
  },
  {
    "item_id": "aud_027",
    "modality": "audio",
    "category": "sea_waves",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_sea_waves_07.wav",
    "label": "sea_waves",
    "label_text": "waves washing over pebbles clip seven",
    "clip_idx": 7
  },
  {
    "item_id": "aud_028",
    "modality": "audio",
    "category": "sea_waves",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_sea_waves_08.wav",
    "label": "sea_waves",
    "label_text": "ocean waves at sunrise clip eight",
    "clip_idx": 8
  },
  {
    "item_id": "aud_029",
    "modality": "audio",
    "category": "sea_waves",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_sea_waves_09.wav",
    "label": "sea_waves",
    "label_text": "sea waves retreating from shore clip nine",
    "clip_idx": 9
  },
  {
    "item_id": "aud_030",
    "modality": "audio",
    "category": "crackling_fire",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_crackling_fire_00.wav",
    "label": "crackling_fire",
    "label_text": "wood fire crackling and popping clip zero",
    "clip_idx": 0
  },
  {
    "item_id": "aud_031",
    "modality": "audio",
    "category": "crackling_fire",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_crackling_fire_01.wav",
    "label": "crackling_fire",
    "label_text": "campfire burning with loud crackles clip one",
    "clip_idx": 1
  },
  {
    "item_id": "aud_032",
    "modality": "audio",
    "category": "crackling_fire",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_crackling_fire_02.wav",
    "label": "crackling_fire",
    "label_text": "fireplace crackling warm and steady clip two",
    "clip_idx": 2
  },
  {
    "item_id": "aud_033",
    "modality": "audio",
    "category": "crackling_fire",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_crackling_fire_03.wav",
    "label": "crackling_fire",
    "label_text": "bonfire crackling in the open air clip three",
    "clip_idx": 3
  },
  {
    "item_id": "aud_034",
    "modality": "audio",
    "category": "crackling_fire",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_crackling_fire_04.wav",
    "label": "crackling_fire",
    "label_text": "fire crackling with hissing wood clip four",
    "clip_idx": 4
  },
  {
    "item_id": "aud_035",
    "modality": "audio",
    "category": "crackling_fire",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_crackling_fire_05.wav",
    "label": "crackling_fire",
    "label_text": "small fire crackling quietly clip five",
    "clip_idx": 5
  },
  {
    "item_id": "aud_036",
    "modality": "audio",
    "category": "crackling_fire",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_crackling_fire_06.wav",
    "label": "crackling_fire",
    "label_text": "large fire burning with deep crackles clip six",
    "clip_idx": 6
  },
  {
    "item_id": "aud_037",
    "modality": "audio",
    "category": "crackling_fire",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_crackling_fire_07.wav",
    "label": "crackling_fire",
    "label_text": "fire crackling after adding fresh logs clip seven",
    "clip_idx": 7
  },
  {
    "item_id": "aud_038",
    "modality": "audio",
    "category": "crackling_fire",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_crackling_fire_08.wav",
    "label": "crackling_fire",
    "label_text": "hot coals crackling in fireplace clip eight",
    "clip_idx": 8
  },
  {
    "item_id": "aud_039",
    "modality": "audio",
    "category": "crackling_fire",
    "content": "/root/multimodal_benchmark/corpus/audio/audio_crackling_fire_09.wav",
    "label": "crackling_fire",
    "label_text": "fire crackling and wind sounds clip nine",
    "clip_idx": 9
  },
  {
    "item_id": "tbl_000",
    "modality": "table",
    "category": "sales_data",
    "content": "# Apex Corp Quarterly Sales Report 2020\nQuarter,Product_A,Product_B,Product_C,Total\nQ1,27611,16271,22716,66598\nQ2,25455,72937,55870,154262\nQ3,68915,69898,48702,187515\nQ4,59756,35519,12151,107426",
    "description": "quarterly sales revenue for company variant 0",
    "label": "sales_data"
  },
  {
    "item_id": "tbl_001",
    "modality": "table",
    "category": "sales_data",
    "content": "# BlueStar Ltd Quarterly Sales Report 2021\nQuarter,Product_A,Product_B,Product_C,Total\nQ1,31605,70847,45423,147875\nQ2,79814,70557,18021,168392\nQ3,35154,69312,57049,161515\nQ4,32858,11152,45648,89658",
    "description": "quarterly sales revenue for company variant 1",
    "label": "sales_data"
  },
  {
    "item_id": "tbl_002",
    "modality": "table",
    "category": "sales_data",
    "content": "# Crimson Industries Quarterly Sales Report 2022\nQuarter,Product_A,Product_B,Product_C,Total\nQ1,69566,30925,47259,147750\nQ2,12606,67432,35400,115438\nQ3,56736,46729,30372,133837\nQ4,29038,27052,33100,89190",
    "description": "quarterly sales revenue for company variant 2",
    "label": "sales_data"
  },
  {
    "item_id": "tbl_003",
    "modality": "table",
    "category": "sales_data",
    "content": "# Delta Systems Quarterly Sales Report 2023\nQuarter,Product_A,Product_B,Product_C,Total\nQ1,10461,74368,6459,91288\nQ2,21282,48558,42452,112292\nQ3,43846,21472,37772,103090\nQ4,10589,61365,23753,95707",
    "description": "quarterly sales revenue for company variant 3",
    "label": "sales_data"
  },
  {
    "item_id": "tbl_004",
    "modality": "table",
    "category": "sales_data",
    "content": "# Echo Ventures Quarterly Sales Report 2024\nQuarter,Product_A,Product_B,Product_C,Total\nQ1,57824,35097,12270,105191\nQ2,72144,19942,47523,139609\nQ3,25317,51032,34723,111072\nQ4,82870,62090,59230,204190",
    "description": "quarterly sales revenue for company variant 4",
    "label": "sales_data"
  },
  {
    "item_id": "tbl_005",
    "modality": "table",
    "category": "sales_data",
    "content": "# Falcon Group Quarterly Sales Report 2025\nQuarter,Product_A,Product_B,Product_C,Total\nQ1,68736,55498,52102,176336\nQ2,76951,65175,32894,175020\nQ3,84019,61277,7160,152456\nQ4,44122,28202,27537,99861",
    "description": "quarterly sales revenue for company variant 5",
    "label": "sales_data"
  },
  {
    "item_id": "tbl_006",
    "modality": "table",
    "category": "sales_data",
    "content": "# Genesis Holdings Quarterly Sales Report 2026\nQuarter,Product_A,Product_B,Product_C,Total\nQ1,63524,73800,45992,183316\nQ2,20672,26459,56672,103803\nQ3,63599,14124,8182,85905\nQ4,80481,14909,18449,113839",
    "description": "quarterly sales revenue for company variant 6",
    "label": "sales_data"
  },
  {
    "item_id": "tbl_007",
    "modality": "table",
    "category": "sales_data",
    "content": "# Horizon Tech Quarterly Sales Report 2027\nQuarter,Product_A,Product_B,Product_C,Total\nQ1,69719,8335,33122,111176\nQ2,35436,24907,41898,102241\nQ3,79643,59926,13807,153376\nQ4,67772,58614,6516,132902",
    "description": "quarterly sales revenue for company variant 7",
    "label": "sales_data"
  },
  {
    "item_id": "tbl_008",
    "modality": "table",
    "category": "sales_data",
    "content": "# Ironclad Inc Quarterly Sales Report 2028\nQuarter,Product_A,Product_B,Product_C,Total\nQ1,29986,55898,48566,134450\nQ2,84946,16816,42647,144409\nQ3,10236,66150,18004,94390\nQ4,57557,69576,40861,167994",
    "description": "quarterly sales revenue for company variant 8",
    "label": "sales_data"
  },
  {
    "item_id": "tbl_009",
    "modality": "table",
    "category": "sales_data",
    "content": "# Jupiter Solutions Quarterly Sales Report 2029\nQuarter,Product_A,Product_B,Product_C,Total\nQ1,20443,63351,42078,125872\nQ2,23065,13017,6789,42871\nQ3,44396,52995,32189,129580\nQ4,39749,49033,36333,125115",
    "description": "quarterly sales revenue for company variant 9",
    "label": "sales_data"
  },
  {
    "item_id": "tbl_010",
    "modality": "table",
    "category": "temperature_data",
    "content": "# Sydney Monthly Climate Data 2014\nMonth,Min_C,Max_C,Avg_C,Rainfall_mm\nJan,5,22,13,111\nFeb,-4,20,8,147\nMar,-2,29,13,159\nApr,-4,34,15,64\nMay,-4,20,8,121\nJun,8,20,14,71\nJul,-3,35,16,118\nAug,-4,36,16,41\nSep,2,38,20,170\nOct,-4,36,16,159\nNov,7,19,13,66\nDec,-4,35,15,44",
    "description": "monthly climate temperature data for city variant 0",
    "label": "temperature_data"
  },
  {
    "item_id": "tbl_011",
    "modality": "table",
    "category": "temperature_data",
    "content": "# Melbourne Monthly Climate Data 2015\nMonth,Min_C,Max_C,Avg_C,Rainfall_mm\nJan,10,36,23,56\nFeb,-1,38,18,72\nMar,-2,33,15,53\nApr,5,36,20,135\nMay,2,33,17,51\nJun,10,32,21,82\nJul,-2,19,8,60\nAug,-1,24,11,65\nSep,-5,26,10,33\nOct,-2,24,11,173\nNov,-1,20,9,154\nDec,4,33,18,116",
    "description": "monthly climate temperature data for city variant 1",
    "label": "temperature_data"
  },
  {
    "item_id": "tbl_012",
    "modality": "table",
    "category": "temperature_data",
    "content": "# Brisbane Monthly Climate Data 2016\nMonth,Min_C,Max_C,Avg_C,Rainfall_mm\nJan,-4,33,14,147\nFeb,9,33,21,163\nMar,-1,19,9,171\nApr,-5,24,9,120\nMay,1,34,17,73\nJun,-5,19,7,90\nJul,10,21,15,173\nAug,3,24,13,52\nSep,2,25,13,18\nOct,-5,20,7,145\nNov,4,32,18,22\nDec,7,23,15,140",
    "description": "monthly climate temperature data for city variant 2",
    "label": "temperature_data"
  },
  {
    "item_id": "tbl_013",
    "modality": "table",
    "category": "temperature_data",
    "content": "# Perth Monthly Climate Data 2017\nMonth,Min_C,Max_C,Avg_C,Rainfall_mm\nJan,-5,29,12,116\nFeb,2,31,16,12\nMar,6,28,17,13\nApr,2,29,15,53\nMay,-5,23,9,26\nJun,9,30,19,117\nJul,5,29,17,123\nAug,6,26,16,23\nSep,-1,37,18,20\nOct,-1,38,18,138\nNov,10,37,23,166\nDec,-3,28,12,62",
    "description": "monthly climate temperature data for city variant 3",
    "label": "temperature_data"
  },
  {
    "item_id": "tbl_014",
    "modality": "table",
    "category": "temperature_data",
    "content": "# Adelaide Monthly Climate Data 2018\nMonth,Min_C,Max_C,Avg_C,Rainfall_mm\nJan,-5,32,13,20\nFeb,-2,21,9,77\nMar,-1,26,12,27\nApr,4,30,17,162\nMay,-4,35,15,35\nJun,9,32,20,131\nJul,-3,20,8,10\nAug,10,27,18,10\nSep,-1,33,16,85\nOct,1,26,13,16\nNov,3,18,10,171\nDec,0,24,12,167",
    "description": "monthly climate temperature data for city variant 4",
    "label": "temperature_data"
  },
  {
    "item_id": "tbl_015",
    "modality": "table",
    "category": "temperature_data",
    "content": "# Auckland Monthly Climate Data 2019\nMonth,Min_C,Max_C,Avg_C,Rainfall_mm\nJan,8,29,18,81\nFeb,-1,18,8,38\nMar,3,25,14,44\nApr,5,34,19,83\nMay,-3,21,9,68\nJun,8,28,18,26\nJul,-1,18,8,25\nAug,6,22,14,100\nSep,9,21,15,55\nOct,10,34,22,15\nNov,4,21,12,111\nDec,2,37,19,23",
    "description": "monthly climate temperature data for city variant 5",
    "label": "temperature_data"
  },
  {
    "item_id": "tbl_016",
    "modality": "table",
    "category": "temperature_data",
    "content": "# Wellington Monthly Climate Data 2020\nMonth,Min_C,Max_C,Avg_C,Rainfall_mm\nJan,2,23,12,167\nFeb,-4,30,13,106\nMar,8,33,20,21\nApr,4,37,20,60\nMay,9,27,18,125\nJun,1,24,12,78\nJul,9,33,21,47\nAug,-4,21,8,13\nSep,4,30,17,156\nOct,1,36,18,21\nNov,-1,20,9,90\nDec,2,36,19,101",
    "description": "monthly climate temperature data for city variant 6",
    "label": "temperature_data"
  },
  {
    "item_id": "tbl_017",
    "modality": "table",
    "category": "temperature_data",
    "content": "# Christchurch Monthly Climate Data 2021\nMonth,Min_C,Max_C,Avg_C,Rainfall_mm\nJan,5,38,21,119\nFeb,-3,26,11,135\nMar,0,20,10,111\nApr,7,20,13,127\nMay,3,25,14,84\nJun,7,35,21,19\nJul,7,23,15,126\nAug,-3,32,14,177\nSep,4,27,15,104\nOct,-1,25,12,57\nNov,-4,24,10,167\nDec,0,27,13,102",
    "description": "monthly climate temperature data for city variant 7",
    "label": "temperature_data"
  },
  {
    "item_id": "tbl_018",
    "modality": "table",
    "category": "temperature_data",
    "content": "# Darwin Monthly Climate Data 2022\nMonth,Min_C,Max_C,Avg_C,Rainfall_mm\nJan,-4,23,9,86\nFeb,1,30,15,52\nMar,-3,26,11,161\nApr,0,28,14,38\nMay,6,29,17,179\nJun,-5,32,13,173\nJul,9,32,20,155\nAug,-5,37,16,13\nSep,7,35,21,67\nOct,-2,38,18,109\nNov,-2,32,15,83\nDec,-4,33,14,77",
    "description": "monthly climate temperature data for city variant 8",
    "label": "temperature_data"
  },
  {
    "item_id": "tbl_019",
    "modality": "table",
    "category": "temperature_data",
    "content": "# Hobart Monthly Climate Data 2023\nMonth,Min_C,Max_C,Avg_C,Rainfall_mm\nJan,6,37,21,59\nFeb,-1,22,10,75\nMar,2,24,13,160\nApr,8,34,21,91\nMay,4,33,18,23\nJun,3,25,14,140\nJul,10,22,16,158\nAug,5,32,18,164\nSep,-4,19,7,37\nOct,9,26,17,108\nNov,0,19,9,166\nDec,6,27,16,148",
    "description": "monthly climate temperature data for city variant 9",
    "label": "temperature_data"
  },
  {
    "item_id": "tbl_020",
    "modality": "table",
    "category": "stock_data",
    "content": "# Stock Market OHLCV Data 2024-01-15 Portfolio-1\nTicker,Open,High,Low,Close,Volume\nAPEX,293,332,284,307,41028387\nAPXC,535,544,511,511,31989649\nAPLX,315,355,303,315,48626990",
    "description": "stock market OHLCV data portfolio variant 0",
    "label": "stock_data"
  },
  {
    "item_id": "tbl_021",
    "modality": "table",
    "category": "stock_data",
    "content": "# Stock Market OHLCV Data 2024-02-15 Portfolio-2\nTicker,Open,High,Low,Close,Volume\nBLUE,272,291,243,278,3335228\nBLSR,659,673,646,663,33349819\nBLST,479,491,462,481,14927585",
    "description": "stock market OHLCV data portfolio variant 1",
    "label": "stock_data"
  },
  {
    "item_id": "tbl_022",
    "modality": "table",
    "category": "stock_data",
    "content": "# Stock Market OHLCV Data 2024-03-15 Portfolio-3\nTicker,Open,High,Low,Close,Volume\nCRMS,683,691,660,689,3181542\nCRIM,98,138,89,121,39569828\nCRMD,189,194,179,190,5541068",
    "description": "stock market OHLCV data portfolio variant 2",
    "label": "stock_data"
  },
  {
    "item_id": "tbl_023",
    "modality": "table",
    "category": "stock_data",
    "content": "# Stock Market OHLCV Data 2024-04-15 Portfolio-4\nTicker,Open,High,Low,Close,Volume\nDLTS,366,399,345,398,39192425\nDELT,382,392,359,371,37716456\nDLTA,401,437,390,432,34886152",
    "description": "stock market OHLCV data portfolio variant 3",
    "label": "stock_data"
  },
  {
    "item_id": "tbl_024",
    "modality": "table",
    "category": "stock_data",
    "content": "# Stock Market OHLCV Data 2024-05-15 Portfolio-5\nTicker,Open,High,Low,Close,Volume\nECHO,384,411,358,404,24200715\nECHV,551,586,536,570,1632972\nECVC,549,574,534,560,20863497",
    "description": "stock market OHLCV data portfolio variant 4",
    "label": "stock_data"
  },
  {
    "item_id": "tbl_025",
    "modality": "table",
    "category": "stock_data",
    "content": "# Stock Market OHLCV Data 2024-06-15 Portfolio-6\nTicker,Open,High,Low,Close,Volume\nFALC,695,735,688,705,21112933\nFLCN,65,102,40,94,37196675\nFLGP,758,775,728,728,4544502",
    "description": "stock market OHLCV data portfolio variant 5",
    "label": "stock_data"
  },
  {
    "item_id": "tbl_026",
    "modality": "table",
    "category": "stock_data",
    "content": "# Stock Market OHLCV Data 2024-07-15 Portfolio-7\nTicker,Open,High,Low,Close,Volume\nGNSS,104,143,77,85,9617294\nGNES,687,693,661,683,46859383\nGNSH,571,599,556,556,28888431",
    "description": "stock market OHLCV data portfolio variant 6",
    "label": "stock_data"
  },
  {
    "item_id": "tbl_027",
    "modality": "table",
    "category": "stock_data",
    "content": "# Stock Market OHLCV Data 2024-08-15 Portfolio-8\nTicker,Open,High,Low,Close,Volume\nHRZN,430,462,421,436,24828364\nHRZT,794,811,776,805,34322463\nHOTZ,290,307,285,305,22096156",
    "description": "stock market OHLCV data portfolio variant 7",
    "label": "stock_data"
  },
  {
    "item_id": "tbl_028",
    "modality": "table",
    "category": "stock_data",
    "content": "# Stock Market OHLCV Data 2024-09-15 Portfolio-9\nTicker,Open,High,Low,Close,Volume\nIRND,306,326,287,299,14806934\nIRCL,542,557,532,550,42039551\nIRNC,532,548,507,522,42579962",
    "description": "stock market OHLCV data portfolio variant 8",
    "label": "stock_data"
  },
  {
    "item_id": "tbl_029",
    "modality": "table",
    "category": "stock_data",
    "content": "# Stock Market OHLCV Data 2024-01-15 Portfolio-10\nTicker,Open,High,Low,Close,Volume\nJPSS,770,801,756,758,16796118\nJUPX,734,773,710,718,17268011\nJPTS,781,815,758,762,5780717",
    "description": "stock market OHLCV data portfolio variant 9",
    "label": "stock_data"
  },
  {
    "item_id": "code_000",
    "modality": "code",
    "category": "python_algorithms",
    "content": "def binary_search(arr, target):\n    left, right = 0, len(arr) - 1\n    while left <= right:\n        mid = (left + right) // 2\n        if arr[mid] == target: return mid\n        elif arr[mid] < target: left = mid + 1\n        else: right = mid - 1\n    return -1",
    "label": "python_algorithms"
  },
  {
    "item_id": "code_001",
    "modality": "code",
    "category": "python_algorithms",
    "content": "def quick_sort(arr):\n    if len(arr) <= 1: return arr\n    pivot = arr[len(arr) // 2]\n    left  = [x for x in arr if x < pivot]\n    mid   = [x for x in arr if x == pivot]\n    right = [x for x in arr if x > pivot]\n    return quick_sort(left) + mid + quick_sort(right)",
    "label": "python_algorithms"
  },
  {
    "item_id": "code_002",
    "modality": "code",
    "category": "python_algorithms",
    "content": "class Node:\n    def __init__(self, data): self.data = data; self.next = None\nclass LinkedList:\n    def __init__(self): self.head = None\n    def append(self, data):\n        n = Node(data)\n        if not self.head: self.head = n; return\n        c = self.head\n        while c.next: c = c.next\n        c.next = n",
    "label": "python_algorithms"
  },
  {
    "item_id": "code_003",
    "modality": "code",
    "category": "python_algorithms",
    "content": "def fibonacci(n):\n    \"\"\"Return nth Fibonacci number using iterative approach.\"\"\"\n    if n <= 0: return 0\n    if n == 1: return 1\n    a, b = 0, 1\n    for _ in range(2, n + 1):\n        a, b = b, a + b\n    return b",
    "label": "python_algorithms"
  },
  {
    "item_id": "code_004",
    "modality": "code",
    "category": "python_algorithms",
    "content": "class HashMap:\n    def __init__(self, size=256):\n        self.buckets = [[] for _ in range(size)]\n        self.size = size\n    def _hash(self, k): return hash(k) % self.size\n    def put(self, k, v):\n        b = self.buckets[self._hash(k)]\n        for i, (ek, ev) in enumerate(b):\n            if ek == k: b[i] = (k, v); return\n        b.append((k, v))\n    def get(self, k):\n        for ek, ev in self.buckets[self._hash(k)]:\n            if ek == k: return ev",
    "label": "python_algorithms"
  },
  {
    "item_id": "code_005",
    "modality": "code",
    "category": "python_algorithms",
    "content": "def merge_sort(arr):\n    if len(arr) <= 1: return arr\n    mid   = len(arr) // 2\n    left  = merge_sort(arr[:mid])\n    right = merge_sort(arr[mid:])\n    result, i, j = [], 0, 0\n    while i < len(left) and j < len(right):\n        if left[i] <= right[j]: result.append(left[i]); i += 1\n        else: result.append(right[j]); j += 1\n    return result + left[i:] + right[j:]",
    "label": "python_algorithms"
  },
  {
    "item_id": "code_006",
    "modality": "code",
    "category": "python_algorithms",
    "content": "def dfs(graph, start, visited=None):\n    \"\"\"Depth-first search traversal of a graph.\"\"\"\n    if visited is None: visited = set()\n    visited.add(start)\n    for neighbor in graph.get(start, []):\n        if neighbor not in visited:\n            dfs(graph, neighbor, visited)\n    return visited",
    "label": "python_algorithms"
  },
  {
    "item_id": "code_007",
    "modality": "code",
    "category": "python_algorithms",
    "content": "from collections import OrderedDict\nclass LRUCache:\n    def __init__(self, capacity):\n        self.cache = OrderedDict()\n        self.cap = capacity\n    def get(self, key):\n        if key not in self.cache: return -1\n        self.cache.move_to_end(key)\n        return self.cache[key]\n    def put(self, key, value):\n        self.cache[key] = value\n        self.cache.move_to_end(key)\n        if len(self.cache) > self.cap:\n            self.cache.popitem(last=False)",
    "label": "python_algorithms"
  },
  {
    "item_id": "code_008",
    "modality": "code",
    "category": "python_algorithms",
    "content": "class TrieNode:\n    def __init__(self): self.children = {}; self.is_end = False\nclass Trie:\n    def __init__(self): self.root = TrieNode()\n    def insert(self, word):\n        node = self.root\n        for ch in word:\n            if ch not in node.children: node.children[ch] = TrieNode()\n            node = node.children[ch]\n        node.is_end = True\n    def search(self, word):\n        node = self.root\n        for ch in word: \n            if ch not in node.children: return False\n            node = node.children[ch]\n        return node.is_end",
    "label": "python_algorithms"
  },
  {
    "item_id": "code_009",
    "modality": "code",
    "category": "python_algorithms",
    "content": "import heapq\ndef dijkstra(graph, start):\n    \"\"\"Dijkstra shortest path algorithm.\"\"\"\n    dist = {node: float('inf') for node in graph}\n    dist[start] = 0\n    pq = [(0, start)]\n    while pq:\n        d, u = heapq.heappop(pq)\n        if d > dist[u]: continue\n        for v, w in graph[u]:\n            if dist[u] + w < dist[v]:\n                dist[v] = dist[u] + w\n                heapq.heappush(pq, (dist[v], v))\n    return dist",
    "label": "python_algorithms"
  },
  {
    "item_id": "code_010",
    "modality": "code",
    "category": "sql_queries",
    "content": "-- customer_total_spending: JOIN customers and orders, aggregate total spend\nSELECT c.customer_name, SUM(o.total_amount) AS total_spent\nFROM customers c JOIN orders o ON c.customer_id = o.customer_id\nWHERE o.status = 'completed'\nGROUP BY c.customer_name HAVING SUM(o.total_amount) > 500\nORDER BY total_spent DESC LIMIT 20;",
    "label": "sql_queries"
  },
  {
    "item_id": "code_011",
    "modality": "code",
    "category": "sql_queries",
    "content": "-- employee_salary_window_rank: window function AVG and RANK per department\nSELECT employee_id, department, salary,\n    AVG(salary) OVER (PARTITION BY department) AS dept_avg,\n    RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS salary_rank\nFROM employees ORDER BY department, salary_rank;",
    "label": "sql_queries"
  },
  {
    "item_id": "code_012",
    "modality": "code",
    "category": "sql_queries",
    "content": "-- recursive_org_chart: recursive CTE to traverse employee hierarchy\nWITH RECURSIVE org AS (\n    SELECT id, name, manager_id, 1 AS level FROM employees WHERE manager_id IS NULL\n    UNION ALL\n    SELECT e.id, e.name, e.manager_id, o.level+1 FROM employees e JOIN org o ON e.manager_id=o.id\n)\nSELECT id, name, level FROM org ORDER BY level;",
    "label": "sql_queries"
  },
  {
    "item_id": "code_013",
    "modality": "code",
    "category": "sql_queries",
    "content": "-- inventory_transaction: atomic inventory deduction with audit log\nBEGIN;\nUPDATE inventory SET quantity = quantity - 10\n    WHERE product_id = 42 AND quantity >= 10;\nINSERT INTO order_log(product_id, quantity, transaction_date)\n    VALUES (42, 10, CURRENT_DATE);\nCOMMIT;",
    "label": "sql_queries"
  },
  {
    "item_id": "code_014",
    "modality": "code",
    "category": "sql_queries",
    "content": "-- create_documents_table: DDL with full-text search index\nCREATE TABLE documents(\n    id SERIAL PRIMARY KEY,\n    title VARCHAR(500),\n    content TEXT,\n    created_at TIMESTAMP DEFAULT NOW()\n);\nCREATE INDEX idx_doc_title ON documents(title);\nCREATE INDEX idx_doc_created ON documents(created_at DESC);",
    "label": "sql_queries"
  },
  {
    "item_id": "code_015",
    "modality": "code",
    "category": "sql_queries",
    "content": "-- product_revenue_pivot: pivot monthly revenue with CASE WHEN\nSELECT product_id,\n    SUM(CASE WHEN MONTH(sale_date)=1 THEN amount ELSE 0 END) AS jan,\n    SUM(CASE WHEN MONTH(sale_date)=2 THEN amount ELSE 0 END) AS feb,\n    SUM(CASE WHEN MONTH(sale_date)=3 THEN amount ELSE 0 END) AS mar,\n    SUM(amount) AS total\nFROM sales GROUP BY product_id ORDER BY total DESC;",
    "label": "sql_queries"
  },
  {
    "item_id": "code_016",
    "modality": "code",
    "category": "sql_queries",
    "content": "-- running_total_cumulative: cumulative SUM window function over time\nSELECT order_date, daily_revenue,\n    SUM(daily_revenue) OVER (ORDER BY order_date ROWS UNBOUNDED PRECEDING) AS running_total,\n    AVG(daily_revenue) OVER (ORDER BY order_date ROWS 6 PRECEDING) AS rolling_7day_avg\nFROM daily_sales ORDER BY order_date;",
    "label": "sql_queries"
  },
  {
    "item_id": "code_017",
    "modality": "code",
    "category": "sql_queries",
    "content": "-- find_duplicate_emails: detect duplicates with GROUP BY HAVING COUNT\nSELECT email, COUNT(*) AS occurrences\nFROM users\nGROUP BY email\nHAVING COUNT(*) > 1\nORDER BY occurrences DESC;",
    "label": "sql_queries"
  },
  {
    "item_id": "code_018",
    "modality": "code",
    "category": "sql_queries",
    "content": "-- upsert_user_profile: INSERT with ON CONFLICT DO UPDATE (upsert)\nINSERT INTO user_profiles (user_id, display_name, bio, updated_at)\nVALUES (42, 'Alice', 'Software engineer', NOW())\nON CONFLICT (user_id)\nDO UPDATE SET\n    display_name = EXCLUDED.display_name,\n    bio = EXCLUDED.bio,\n    updated_at = EXCLUDED.updated_at;",
    "label": "sql_queries"
  },
  {
    "item_id": "code_019",
    "modality": "code",
    "category": "sql_queries",
    "content": "-- delete_old_audit_logs: delete logs older than 90 days using partitioned DELETE\nDELETE FROM audit_logs\nWHERE created_at < NOW() - INTERVAL '90 days'\n    AND log_level IN ('DEBUG', 'INFO');\nVACUUM ANALYZE audit_logs;",
    "label": "sql_queries"
  },
  {
    "item_id": "code_020",
    "modality": "code",
    "category": "bash_scripts",
    "content": "#!/bin/bash\n# batch_file_word_count: count lines in each .txt file in ./input/\nfor f in ./input/*.txt; do\n    base=$(basename \"$f\" .txt)\n    wc -l \"$f\" > \"./output/${base}_count.txt\"\ndone",
    "label": "bash_scripts"
  },
  {
    "item_id": "code_021",
    "modality": "code",
    "category": "bash_scripts",
    "content": "#!/bin/bash\n# cpu_usage_monitor: log CPU usage every 60 seconds indefinitely\nwhile true; do\n    CPU=$(top -bn1 | grep 'Cpu(s)' | awk '{print $2}')\n    echo \"$(date '+%Y-%m-%d %H:%M:%S') CPU=${CPU}%\" >> /var/log/cpu_monitor.log\n    sleep 60\ndone",
    "label": "bash_scripts"
  },
  {
    "item_id": "code_022",
    "modality": "code",
    "category": "bash_scripts",
    "content": "#!/bin/bash\n# daily_backup_tar: create dated tarball backup and clean up old backups\nDEST=\"/backup/$(date +%Y%m%d)\"\nmkdir -p \"$DEST\"\ntar -czf \"$DEST/data_backup.tar.gz\" /home/user/data\nfind /backup -maxdepth 1 -mtime +30 -type d -exec rm -rf {} +\necho \"Backup complete: $DEST\"",
    "label": "bash_scripts"
  },
  {
    "item_id": "code_023",
    "modality": "code",
    "category": "bash_scripts",
    "content": "#!/bin/bash\n# deploy_application: pull latest code, install deps, migrate, restart\nset -e\ncd /opt/app\ngit pull origin main\npip install -r requirements.txt -q\npython manage.py migrate --no-input\nsystemctl restart app\necho \"Deployment complete\"",
    "label": "bash_scripts"
  },
  {
    "item_id": "code_024",
    "modality": "code",
    "category": "bash_scripts",
    "content": "#!/bin/bash\n# csv_dedup_sort: remove header, sort by column 2, deduplicate, take top 100\ntail -n +2 \"$1\" | sort -t',' -k2 -rn | awk -F',' '!seen[$1]++' | head -100 > top100.csv",
    "label": "bash_scripts"
  },
  {
    "item_id": "code_025",
    "modality": "code",
    "category": "bash_scripts",
    "content": "#!/bin/bash\n# disk_usage_alert: send email alert when disk usage exceeds 80 percent\nTHRESHOLD=80\nUSAGE=$(df / | tail -1 | awk '{print $5}' | tr -d '%')\nif [ \"$USAGE\" -gt \"$THRESHOLD\" ]; then\n    echo \"Disk usage at ${USAGE}%\" | mail -s \"DISK ALERT $(hostname)\" admin@example.com\nfi",
    "label": "bash_scripts"
  },
  {
    "item_id": "code_026",
    "modality": "code",
    "category": "bash_scripts",
    "content": "#!/bin/bash\n# log_rotation_compress: compress logs older than 7 days in /var/log/app\nfind /var/log/app -name '*.log' -mtime +7 | while read f; do\n    gzip -9 \"$f\"\n    echo \"Compressed: $f\"\ndone",
    "label": "bash_scripts"
  },
  {
    "item_id": "code_027",
    "modality": "code",
    "category": "bash_scripts",
    "content": "#!/bin/bash\n# ssh_tunnel_setup: create persistent reverse SSH tunnel with autossh\nautossh -M 0 -f -N \\\n    -o 'ServerAliveInterval 30' \\\n    -o 'ServerAliveCountMax 3' \\\n    -R 2222:localhost:22 \\\n    user@remote.example.com",
    "label": "bash_scripts"
  },
  {
    "item_id": "code_028",
    "modality": "code",
    "category": "bash_scripts",
    "content": "#!/bin/bash\n# docker_cleanup: remove stopped containers, dangling images, unused volumes\ndocker container prune -f\ndocker image prune -f\ndocker volume prune -f\ndocker network prune -f\necho \"Docker cleanup complete. Disk freed:\" && docker system df",
    "label": "bash_scripts"
  },
  {
    "item_id": "code_029",
    "modality": "code",
    "category": "bash_scripts",
    "content": "#!/bin/bash\n# git_branch_cleanup: delete local branches already merged into main\ngit fetch --prune\ngit branch --merged main | grep -v '\\* main' | grep -v 'main' | xargs -r git branch -d\necho \"Merged branches cleaned up\"",
    "label": "bash_scripts"
  }
]