Source code for dgs.models.dataset.alphapose

"""
Load bboxes and poses from existing .json file, generated by AP.



By default, AlphaPose saves the results for all images in one json file,
which is similar to the output box_format used by COCO.
For the basic AlphaPose output box_format see https://github.com/MVIG-SJTU/AlphaPose/blob/master/docs/output.md.

Within the .json files, AlphaPose uses the following overall structure:

image_id
    The name of the image as string. No additional path information is given.

keypoints
    The body part locations and detection confidence formatted as array ``[x1, y1, c1, x2, y2, c2, ...]``.
    Where c is the confidence score in the range [0,1] for the MPII dataset and range [0,6] for the COCO dataset.

    There can be an arbitrary number of key points, it is just known, that the length has to be divisible by three.

score
    The confidence score for the whole person, computed by AlphaPoses' parametric pose NMS.

box
    The detected bounding box as an array of floats in XYWH box_format.

idx
    The integer index of the detected person.
"""

import imagesize
import torch as t
from torchvision import tv_tensors as tvte

from dgs.models.dataset.dataset import BBoxDataset
from dgs.utils.files import read_json
from dgs.utils.state import State
from dgs.utils.types import Config, ImgShape, NodePath, Validations

ap_load_validations: Validations = {"data_path": [str, "file exists in project", ("endswith", ".json")]}



[docs]
class AlphaPoseLoader(BBoxDataset):
    """Load precomputed json files."""


[docs]
    def __init__(self, config: Config, path: NodePath) -> None:
        super().__init__(config, path)

        self.validate_params(ap_load_validations)

        json = read_json(self.params["data_path"])

        if isinstance(json, list):
            self.data: list[dict] = json
        else:
            raise NotImplementedError(f"JSON file {self.params['data_path']} does not contain known instances.")

        canvas_sizes: set[ImgShape] = set()

        for detection in json:
            path = self.get_path_in_dataset(detection["image_id"])
            detection["full_img_path"] = tuple([path])
            # imagesize.get() output = (w,h) and our own format = (h, w)
            canvas_sizes.add(imagesize.get(path)[::-1])

        if len(canvas_sizes) > 1:
            raise ValueError(f"Expected all images to have the same shape, but found {canvas_sizes}")
        self.canvas_size: ImgShape = canvas_sizes.pop()



[docs]
    def arbitrary_to_ds(self, a, idx: int) -> State:
        """Here `a` is one dict of the AP-JSON containing image_id, category_id, keypoints, score, box, and idx."""
        keypoints, visibility = (
            t.tensor(a["keypoints"], dtype=t.float32, device=self.device).reshape((1, -1, 3)).split([2, 1], dim=-1)
        )

        return State(
            filepath=a["full_img_path"],
            bbox=tvte.BoundingBoxes(a["bboxes"], format="XYWH", canvas_size=self.canvas_size),
            keypoints=keypoints,
            person_id=a["idx"],
            # additional values which are not required
            image_id=a["image_id"],
            joint_weight=visibility,
            person_score=a["score"],  # fixme divide by 6 for COCO, by 1 for MPII...?
        )


    def __getitems__(self, indices: list[int]) -> State:
        def stack_key(key: str) -> t.Tensor:
            return t.stack([t.tensor(self.data[i][key], device=self.device) for i in indices])

        keypoints, visibility = (
            t.tensor(
                t.stack([t.tensor(self.data[i]["keypoints"]).reshape((-1, 3)) for i in indices]),
            )
            .to(device=self.device, dtype=t.float32)
            .split([2, 1], dim=-1)
        )
        ds = State(
            validate=False,
            filepath=tuple(self.data[i]["full_img_path"] for i in indices),
            bbox=tvte.BoundingBoxes(stack_key("bboxes"), format="XYWH", canvas_size=self.canvas_size),
            keypoints=keypoints,
            person_id=stack_key("idx").int(),
            # additional values which are not required
            joint_weight=visibility,
            image_id=stack_key("image_id").int(),
        )
        # make sure to get image crop for batch
        self.get_image_crops(ds)
        return ds