Source code for stable_datasets.images.cifar100_c

import tarfile
from io import BytesIO

import datasets
import numpy as np
from PIL import Image

from stable_datasets.utils import BaseDatasetBuilder


[docs] class CIFAR100C(BaseDatasetBuilder): """CIFAR-100-C dataset with corrupted CIFAR-100 images.""" VERSION = datasets.Version("1.0.0") # Single source-of-truth for dataset provenance + download locations. SOURCE = { "homepage": "https://zenodo.org/records/3555552", "assets": { "test": "https://zenodo.org/records/3555552/files/CIFAR-100-C.tar?download=1", }, "citation": """@article{hendrycks2019robustness, title={Benchmarking Neural Network Robustness to Common Corruptions and Perturbations}, author={Dan Hendrycks and Thomas Dietterich}, journal={Proceedings of the International Conference on Learning Representations}, year={2019}}""", } def _info(self): return datasets.DatasetInfo( description="""CIFAR-100-C is a corrupted version of the CIFAR-100 dataset, with 19 different types of corruptions applied to the images. The dataset consists of 100 classes and 5 levels of severity per corruption type.""", features=datasets.Features( { "image": datasets.Image(), "label": datasets.ClassLabel(names=self._fine_labels()), "corruption_name": datasets.Value("string"), "corruption_level": datasets.Value("int32"), } ), supervised_keys=("image", "label"), homepage=self.SOURCE["homepage"], license="CC BY 4.0", citation=self.SOURCE["citation"], ) def _generate_examples(self, data_path, split, corruptions=None): """Generate examples from the tar archive. Note: split parameter is unused as CIFAR-100-C only contains a test split. """ with tarfile.open(data_path, "r") as tar: array_file = BytesIO() array_file.write(tar.extractfile("CIFAR-100-C/labels.npy").read()) array_file.seek(0) labels = np.load(array_file) # Determine corruptions to load if isinstance(corruptions, str): corruptions = [corruptions] elif corruptions is None: corruptions = self._corruptions() images, labels_list, corruption_names, corruption_levels = [], [], [], [] for corruption in corruptions: assert corruption in self._corruptions(), f"Unknown corruption type: {corruption}" print(f"Loading corruption: {corruption}") array_file = BytesIO() array_file.write(tar.extractfile(f"CIFAR-100-C/{corruption}.npy").read()) array_file.seek(0) corrupted_images = np.load(array_file) for level in range(1, 6): start_idx, end_idx = (level - 1) * 10000, level * 10000 images.extend(corrupted_images[start_idx:end_idx]) labels_list.extend(labels) corruption_names.extend([corruption] * 10000) corruption_levels.extend([level] * 10000) for idx, (image, label, corruption_name, corruption_level) in enumerate( zip(images, labels_list, corruption_names, corruption_levels) ): yield ( idx, { "image": Image.fromarray(image), "label": label, "corruption_name": corruption_name, "corruption_level": corruption_level, }, ) @staticmethod def _corruptions(): return [ "zoom_blur", "speckle_noise", "spatter", "snow", "shot_noise", "saturate", "pixelate", "motion_blur", "jpeg_compression", "impulse_noise", "glass_blur", "gaussian_noise", "gaussian_blur", "frost", "fog", "elastic_transform", "defocus_blur", "contrast", "brightness", ] @staticmethod def _fine_labels(): """Returns the list of CIFAR-100 fine labels (100 classes).""" return [ "apple", "aquarium_fish", "baby", "bear", "beaver", "bed", "bee", "beetle", "bicycle", "bottle", "bowl", "boy", "bridge", "bus", "butterfly", "camel", "can", "castle", "caterpillar", "cattle", "chair", "chimpanzee", "clock", "cloud", "cockroach", "couch", "crab", "crocodile", "cup", "dinosaur", "dolphin", "elephant", "flatfish", "forest", "fox", "girl", "hamster", "house", "kangaroo", "keyboard", "lamp", "lawn_mower", "leopard", "lion", "lizard", "lobster", "man", "maple_tree", "motorcycle", "mountain", "mouse", "mushroom", "oak_tree", "orange", "orchid", "otter", "palm_tree", "pear", "pickup_truck", "pine_tree", "plain", "plate", "poppy", "porcupine", "possum", "rabbit", "raccoon", "ray", "road", "rocket", "rose", "sea", "seal", "shark", "shrew", "skunk", "skyscraper", "snail", "snake", "spider", "squirrel", "streetcar", "sunflower", "sweet_pepper", "table", "tank", "telephone", "television", "tiger", "tractor", "train", "trout", "tulip", "turtle", "wardrobe", "whale", "willow_tree", "wolf", "woman", "worm", ]