Source code for stable_datasets.images.beans

import zipfile

import datasets
from PIL import Image


[docs] class Beans(datasets.GeneratorBasedBuilder): """Bean disease dataset for classification of three classes: Angular Leaf Spot, Bean Rust, and Healthy leaves.""" VERSION = datasets.Version("1.0.0") def _info(self): return datasets.DatasetInfo( description="""The IBeans dataset contains leaf images representing three classes: 1) Healthy leaves, 2) Angular Leaf Spot, and 3) Bean Rust. Images are collected in Uganda for disease classification in the field.""", features=datasets.Features( { "image": datasets.Image(), "label": datasets.ClassLabel(names=["healthy", "angular_leaf_spot", "bean_rust"]), } ), supervised_keys=("image", "label"), license="MIT License", citation="""@misc{makerere2020beans, author = "{Makerere AI Lab}", title = "{Bean Disease Dataset}", year = "2020", month = "January", url = "https://github.com/AI-Lab-Makerere/ibean/"}""", ) def _split_generators(self, dl_manager): urls = { "train": "https://storage.googleapis.com/ibeans/train.zip", "test": "https://storage.googleapis.com/ibeans/test.zip", "validation": "https://storage.googleapis.com/ibeans/validation.zip", } downloaded_files = dl_manager.download(urls) return [ datasets.SplitGenerator( name=datasets.Split.TRAIN, gen_kwargs={"zip_path": downloaded_files["train"]}, ), datasets.SplitGenerator( name=datasets.Split.TEST, gen_kwargs={"zip_path": downloaded_files["test"]}, ), datasets.SplitGenerator( name=datasets.Split.VALIDATION, gen_kwargs={"zip_path": downloaded_files["validation"]}, ), ] def _generate_examples(self, zip_path): with zipfile.ZipFile(zip_path, "r") as archive: for file_name in archive.namelist(): if file_name.endswith(".jpg"): with archive.open(file_name) as file: image_data = Image.open(file) label_name = file_name.split("/")[1] label = self.info.features["label"].str2int(label_name) yield file_name, {"image": image_data, "label": label}