Python

from pycocotools.coco import COCO
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import os
from os.path import join
from tqdm import tqdm
import json

class coco_category_filter:
 """
 Downloads images of one category & filters jsons 
 to only keep annotations of this category
 """
 def __init__(self, json_path, imgs_dir, categ='person'):
     self.coco = COCO(json_path) # instanciate coco class
     self.json_path = json_path
     self.imgs_dir = imgs_dir
     self.categ = categ
     self.images = self.get_imgs_from_json()        
     
 def get_imgs_from_json(self):
     """returns image names of the desired category"""
     # instantiate COCO specifying the annotations json path
     # Specify a list of category names of interest
     catIds = self.coco.getCatIds(catNms=[self.categ])
     print("catIds: ", catIds)
     # Get the corresponding image ids and images using loadImgs
     imgIds = self.coco.getImgIds(catIds=catIds)
     images = self.coco.loadImgs(imgIds)
     print(f"{len(images)} images in '{self.json_path}' with '{self.categ}' instances")
     self.catIds = catIds # list
     return images
 
 def save_imgs(self):
     """saves the images of this category"""
     print("Saving the images with required categories ...")
     os.makedirs(self.imgs_dir, exist_ok=True)
     # Save the images into a local folder
     ################################################# Modified lines
     session = requests.Session()
     retry = Retry(connect=3, backoff_factor=0.5)
     adapter = HTTPAdapter(max_retries=retry)
     session.mount('http://', adapter)
     session.mount('https://', adapter)
     #################################################
     for im in tqdm(self.images):
         img_data = session.get(im['coco_url']).content
         with open(os.path.join(self.imgs_dir, im['file_name']), 'wb') as handler:
             handler.write(img_data)
 
 def filter_json_by_category(self, new_json_path):
     """creates a new json with the desired category"""
     # {'supercategory': 'person', 'id': 1, 'name': 'person'}
     ### Filter images:
     print("Filtering the annotations ... ")
     json_parent = os.path.split(new_json_path)[0]
     os.makedirs(json_parent, exist_ok=True)
     imgs_ids = [x['id'] for x in self.images] # get img_ids of imgs with the category
     new_imgs = [x for x in self.coco.dataset['images'] if x['id'] in imgs_ids]
     catIds = self.catIds
     ### Filter annotations
     new_annots = [x for x in self.coco.dataset['annotations'] if x['category_id'] in catIds]
     ### Reorganize the ids
     new_imgs, annotations = self.modify_ids(new_imgs, new_annots)
     ### Filter categories
     new_categories = [x for x in self.coco.dataset['categories'] if x['id'] in catIds]
     print("new_categories: ", new_categories)
     data = {
         "info": self.coco.dataset['info'],
         "licenses": self.coco.dataset['licenses'],
         "images": new_imgs, 
         "annotations": new_annots,
         "categories": new_categories 
         }
     print("saving json: ")
     with open(new_json_path, 'w') as f:
         json.dump(data, f)

 def modify_ids(self, images, annotations):
     """
     creates new ids for the images. I.e., reorganizes the ids and returns the dictionaries back
     images: list of images dictionaries
     imId_counter: image id starting from one (each dicto will start with id of last json +1)
     """
     print("Reinitialicing images and annotation IDs ...")
     ### Images
     old_new_imgs_ids = {}  # necessary for the annotations!
     for n,im in enumerate(images):
         old_new_imgs_ids[images[n]['id']] = n+1  # dicto with old im_ids and new im_ids
         images[n]['id'] = n+1 # reorganize the ids
     ### Annotations
     for n,ann in enumerate(annotations):
         annotations[n]['id'] = n+1
         old_image_id = annotations[n]['image_id']
         annotations[n]['image_id'] = old_new_imgs_ids[old_image_id]  # replace im_ids in the annotations as well
     return images, annotations


def main(subset, year, root_dir, category='person'):
 json_file = join(os.path.split(root_dir)[0], 'instances_'+subset+year+'.json')   # local path
 imgs_dir = join(root_dir, category + '_' + subset)
 new_json_file = join(root_dir, 'annotations', subset+".json")
 coco_filter = coco_category_filter(json_file, imgs_dir, categ=category) # instanciate class
 coco_filter.save_imgs()
 coco_filter.filter_json_by_category(new_json_file)


if __name__ == '__main__':
 subset, year='train', '2017'
 root_dir = './datasets/COCO/annotations'
 main(subset, year, root_dir, category='person')
Search

delet categories from coco dataset

ADD CONTENT