Weighted Boxes Fusion (WBF) - BBox ensemble 방식 with code

728x90

WBF란 object detection task에서 여러 모델로 추론된 결과 bbox들을 ensemble 하는 방법입니다.
서로 다른 network 혹은 같은 network이지만 다른 epoch에서 학습된 model을 사용하여 test data를 추론하게 되면 서로 다른 bbox들을 예측하게 됩니다.
이때 이 예측 값들을 효과적으로 ensemble을 한다면 더 좋은 성능을 낼 수 있게 됩니다.
주로 kaggle과 같은 경진 대회에서 성능을 더 높이기 위한 방법으로 사용이 됩니다.

기존 중복된 bbox나 불필요한 bbox를 제거하는데 NMS(Non Maximum Suppression)과 같은 방법들이 존재하지만 WBF는 조금 다른 방식으로 불필요한 bbox를 제거하게 됩니다.

NMS의 경우엔 단순히 더 나은 bbox를 찾기 위해서 다른 후보군의 bbox를 제거하는 방식으로 bbox의 개수를 줄여나가게 됩니다.
하지만 WBF의 경우 예측된 bbox를 모두 사용해서 더 나은 bbox를 만들어 나가는 과정을 거치게 됩니다.

실제 활용 코드 (공식 github - https://github.com/ZFTurbo/Weighted-Boxes-Fusion를 참고하였습니다.)

대회에 사용하기 위해서 코드를 작성하여서 대회용 data 형식에 맞춰져 있습니다.
만약 사용하신다면 본인 data 형식에 맞춰서 사용하시기 바랍니다.

import numpy as np
import json
import time
import os
from glob import glob
from multiprocessing import Pool, Process, Manager, cpu_count
from ensemble_boxes import *

# 이미지 사이즈
img_h, img_w = float(1080), float(1920)

def process_single_id(id, res_boxes, weights, params):
    boxes_list = []
    scores_list = []
    labels_list = []
    labels_to_use_forward = dict()
    labels_to_use_backward = dict()

    for i in range(len(res_boxes[id])):
        boxes = []
        scores = []
        labels = []

        dt = res_boxes[id][i]
        
        # bbox의 결과는 0~1 사이로 normalize 해줍니다.
        for j in range(0, len(dt)):
            lbl = dt[j][2]
            scr = float(dt[j][1])
            box_x1 = float(dt[j][0][0] /  img_w)
            box_y1 = float(dt[j][0][1] / img_h)
            box_x2 = float((dt[j][0][0] + dt[j][0][2]) / img_w)
            box_y2 = float((dt[j][0][1] + dt[j][0][3]) / img_h)
        	
            if scr <= 0:
                print(f'Problem with box score: {scr}. skip it')
            
            boxes.append([box_x1, box_y1, box_x2, box_y2])
            scores.append(scr)
            if lbl not in labels_to_use_forward:
                cur_point = len(labels_to_use_forward)
                labels_to_use_forward[lbl] = cur_point
                labels_to_use_backward[cur_point] = lbl
            labels.append(labels_to_use_forward[lbl])
        
        boxes = np.array(boxes, dtype=np.float32)
        scores = np.array(scores, dtype=np.float32)
        labels = np.array(labels, dtype=np.int32)

        boxes_list.append(boxes)
        scores_list.append(scores)
        labels_list.append(labels)
    
    if len(boxes_list) == 0:
        return np.array([]), np.array([]), np.array([])
    merged_boxes, merged_scores, merged_labels = weighted_boxes_fusion(boxes_list, scores_list, labels_list, weights=weights, iou_thr=params['iou_thr'], skip_box_thr=params['skip_box_thr'], conf_type=params['conf_type'])
    
    merged_labels_string = []
    for m in merged_labels:
        merged_labels_string.append(labels_to_use_backward[m])
    merged_labels = np.array(merged_labels_string, dtype=np.str)

    ids_list = [id] * len(merged_labels)

    return merged_boxes.copy(), merged_scores.copy(), merged_labels.copy(), ids_list.copy()        

# WBF 과정은 multi process로 진행되게 됩니다.
def process_part_of_data(proc_number, return_dict, ids_to_use, res_boxes, weights, params):
    print(f'Start {proc_number} IDs to proc: {len(ids_to_use)}')
    result = []
    for id in ids_to_use:
        merged_boxes, merged_scores, merged_labels, ids_list = process_single_id(id, res_boxes, weights, params)
        result.append((merged_boxes, merged_scores, merged_labels, ids_list))
    return_dict[proc_number] = result.copy()
    
def ensemble_predictions(pred_filenames, weights, params):
    start_time = time.time()
    procs_to_use = max(cpu_count()//2, 1)
    print(f'Use processes: {procs_to_use}')
    weights = np.array(weights)

    res_boxes = dict()
    ref_ids = None
    for j in range(len(pred_filenames)):
        if weights[j]==0:
            continue
        print(f'Read {pred_filenames[j]}')

        with open(pred_filenames[j], 'r') as f:
            json_data = json.load(f)
        json_data.sort(key=lambda x:x.get('image_id'))
        ids = [x.get('image_id') for x in json_data]
        unique_ids = sorted(list(set(ids)))
        if ref_ids is None:
            ref_ids = unique_ids
        else:
            if ref_ids != unique_ids:
                print(f'Differenct IDs in ensembled jsons! {len(ref_ids)} != {len(unique_ids)}')
                json_data = [x for x in json_data if x.get('image_id') in ref_ids]
                json_data.sort(key=lambda x:x.get('image_id'))
                ids = [x.get('image_id') for x in json_data]

        preds = [[x.get('bbox'), x.get('score'), x.get('category_id')] for x in json_data]
        single_res = dict()
        for i in range(len(ids)):
            id = ids[i]
            if id not in single_res:
                single_res[id] = []
            single_res[id].append(preds[i])
        for el in single_res:
            if el not in res_boxes:
                res_boxes[el] = []
            res_boxes[el].append(single_res[el])
    
    weights = weights[weights != 0]

    ids_to_use = sorted(list(res_boxes.keys()))
    manager = Manager()
    return_dict = manager.dict()
    jobs = []
    for i in range(procs_to_use):
        start = i * len(ids_to_use)
        end = (i+1) * len(ids_to_use)
        if i==procs_to_use - 1:
            end = len(ids_to_use)
        p = Process(target=process_part_of_data, args=(i, return_dict, ids_to_use[start:end], res_boxes, weights, params))
        jobs.append(p)
        p.start()
    
    for i in range(len(jobs)):
        jobs[i].join()
    
    results = []
    for i in range(len(jobs)):
        results += return_dict[i]

    all_ids = []
    all_boxes = []
    all_scores = []
    all_labels = []

    for boxes, scores, labels, ids_list in results:
        if boxes is None:
            continue
        all_boxes.append(boxes)
        all_scores.append(scores)
        all_labels.append(labels)
        all_ids.append(ids_list)

    all_ids = np.concatenate(all_ids)
    all_boxes = np.concatenate(all_boxes)
    all_scores = np.concatenate(all_scores)
    all_labels = np.concatenate(all_labels)
    print(len(all_ids), len(all_boxes), len(all_scores), len(all_labels))

    res = list()

    for i in range(len(all_ids)):
        tmp_dict = dict()
        x_min = all_boxes[i][0] * img_w
        y_min = all_boxes[i][1] * img_h
        x_max = all_boxes[i][2] * img_w
        y_max = all_boxes[i][3] * img_h
        width = float(x_max - x_min)
        height = float(y_max - y_min)
        tmp_dict['bbox'] = [x_min, y_min, width, height]
        tmp_dict['category_id'] = int(all_labels[i])
        tmp_dict['score'] = float(all_scores[i])
        tmp_dict['image_id'] = int(all_ids[i])
        tmp_dict['segmentation'] = [[0, 0]]

        res.append(tmp_dict)
    print('Run time: {:.2f}'.format(time.time() - start_time))
    return res
    

def ensemble(json_files, weights, params):
    ensemble_preds = ensemble_predictions(json_files, weights, params)
    result_name = params['result_name']
    with open(f'{result_name}', 'w', encoding='utf-8') as f:
        json.dump(ensemble_preds, f, ensure_ascii=False)

if __name__ == '__main__':
	# parameter를 조정하면 됩니다. result_name 같은 경우 dir과 저장 파일의 이름을 설정합니다.
    params = {
        'skip_box_thr': 0.0001,
        'iou_thr': 0.4,
        'conf_type': 'avg',
        'result_name': '../output/submit.json'
    }
    # fusion할 bbox의 파일들을 읽어옵니다. json 형식으로 저장되어있는 파일로 사용합니다.
    json_files = list(glob(f'../output/*/*.json'))
    weights = []
    for w in range(len(json_files)):
        weights.append(1)
    ensemble(
        json_files,
        weights,
        params
    )

728x90

티스토리툴바