Spaces:
Running
Running
| import cv2 | |
| import numpy as np | |
| def padding_image(img, size=(640, 640)): | |
| """ | |
| Padding an image using OpenCV: | |
| - If the image is smaller than the target size, pad it to 640x640. | |
| - If the image is larger than the target size, split it into multiple 640x640 images and record positions. | |
| :param image_path: Path to the input image. | |
| :param output_dir: Directory to save the output images. | |
| :param size: The target size for padding or splitting (default 640x640). | |
| :return: List of tuples containing the coordinates of the top-left corner of each cropped 640x640 image. | |
| """ | |
| img_height, img_width = img.shape[:2] | |
| target_width, target_height = size | |
| # If image is smaller than target size, pad the image to 640x640 | |
| # Calculate padding amounts (top, bottom, left, right) | |
| pad_top = 0 | |
| pad_bottom = target_height - img_height | |
| pad_left = 0 | |
| pad_right = target_width - img_width | |
| # Pad the image (white padding, border type: constant) | |
| padded_img = cv2.copyMakeBorder(img, | |
| pad_top, | |
| pad_bottom, | |
| pad_left, | |
| pad_right, | |
| cv2.BORDER_CONSTANT, | |
| value=[0, 0, 0]) | |
| # Return the padded area positions (top-left and bottom-right coordinates of the original image) | |
| return padded_img | |
| def is_poly_outside_rect(poly, x, y, w, h): | |
| poly = np.array(poly) | |
| if poly[:, 0].max() < x or poly[:, 0].min() > x + w: | |
| return True | |
| if poly[:, 1].max() < y or poly[:, 1].min() > y + h: | |
| return True | |
| return False | |
| def split_regions(axis): | |
| regions = [] | |
| min_axis = 0 | |
| for i in range(1, axis.shape[0]): | |
| if axis[i] != axis[i - 1] + 1: | |
| region = axis[min_axis:i] | |
| min_axis = i | |
| regions.append(region) | |
| return regions | |
| def random_select(axis, max_size): | |
| xx = np.random.choice(axis, size=2) | |
| xmin = np.min(xx) | |
| xmax = np.max(xx) | |
| xmin = np.clip(xmin, 0, max_size - 1) | |
| xmax = np.clip(xmax, 0, max_size - 1) | |
| return xmin, xmax | |
| def region_wise_random_select(regions, max_size): | |
| selected_index = list(np.random.choice(len(regions), 2)) | |
| selected_values = [] | |
| for index in selected_index: | |
| axis = regions[index] | |
| xx = int(np.random.choice(axis, size=1)) | |
| selected_values.append(xx) | |
| xmin = min(selected_values) | |
| xmax = max(selected_values) | |
| return xmin, xmax | |
| def crop_area(im, text_polys, min_crop_side_ratio, max_tries): | |
| h, w, _ = im.shape | |
| h_array = np.zeros(h, dtype=np.int32) | |
| w_array = np.zeros(w, dtype=np.int32) | |
| for points in text_polys: | |
| points = np.round(points, decimals=0).astype(np.int32) | |
| minx = np.min(points[:, 0]) | |
| maxx = np.max(points[:, 0]) | |
| w_array[minx:maxx] = 1 | |
| miny = np.min(points[:, 1]) | |
| maxy = np.max(points[:, 1]) | |
| h_array[miny:maxy] = 1 | |
| # ensure the cropped area not across a text | |
| h_axis = np.where(h_array == 0)[0] | |
| w_axis = np.where(w_array == 0)[0] | |
| if len(h_axis) == 0 or len(w_axis) == 0: | |
| return 0, 0, w, h | |
| h_regions = split_regions(h_axis) | |
| w_regions = split_regions(w_axis) | |
| for i in range(max_tries): | |
| if len(w_regions) > 1: | |
| xmin, xmax = region_wise_random_select(w_regions, w) | |
| else: | |
| xmin, xmax = random_select(w_axis, w) | |
| if len(h_regions) > 1: | |
| ymin, ymax = region_wise_random_select(h_regions, h) | |
| else: | |
| ymin, ymax = random_select(h_axis, h) | |
| if (xmax - xmin < min_crop_side_ratio * w or ymax - ymin < min_crop_side_ratio * h): | |
| # area too small | |
| continue | |
| num_poly_in_rect = 0 | |
| for poly in text_polys: | |
| if not is_poly_outside_rect(poly, xmin, ymin, xmax - xmin, | |
| ymax - ymin): | |
| num_poly_in_rect += 1 | |
| break | |
| if num_poly_in_rect > 0: | |
| return xmin, ymin, xmax - xmin, ymax - ymin | |
| return 0, 0, w, h | |
| class EastRandomCropData(object): | |
| def __init__( | |
| self, | |
| size=(640, 640), | |
| max_tries=10, | |
| min_crop_side_ratio=0.1, | |
| keep_ratio=True, | |
| **kwargs, | |
| ): | |
| self.size = size | |
| self.max_tries = max_tries | |
| self.min_crop_side_ratio = min_crop_side_ratio | |
| self.keep_ratio = keep_ratio | |
| def __call__(self, data): | |
| img = data['image'] | |
| text_polys = data['polys'] | |
| ignore_tags = data['ignore_tags'] | |
| texts = data['texts'] | |
| all_care_polys = [ | |
| text_polys[i] for i, tag in enumerate(ignore_tags) if not tag | |
| ] | |
| # 计算crop区域 | |
| crop_x, crop_y, crop_w, crop_h = crop_area(img, all_care_polys, | |
| self.min_crop_side_ratio, | |
| self.max_tries) | |
| # crop 图片 保持比例填充 | |
| scale_w = self.size[0] / crop_w | |
| scale_h = self.size[1] / crop_h | |
| scale = min(scale_w, scale_h) | |
| h = int(crop_h * scale) | |
| w = int(crop_w * scale) | |
| if self.keep_ratio: | |
| padimg = np.zeros((self.size[1], self.size[0], img.shape[2]), | |
| img.dtype) | |
| padimg[:h, :w] = cv2.resize( | |
| img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h)) | |
| img = padimg | |
| else: | |
| img = cv2.resize( | |
| img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], | |
| tuple(self.size), | |
| ) | |
| # crop 文本框 | |
| text_polys_crop = [] | |
| ignore_tags_crop = [] | |
| texts_crop = [] | |
| for poly, text, tag in zip(text_polys, texts, ignore_tags): | |
| poly = ((poly - (crop_x, crop_y)) * scale).tolist() | |
| if not is_poly_outside_rect(poly, 0, 0, w, h): | |
| text_polys_crop.append(poly) | |
| ignore_tags_crop.append(tag) | |
| texts_crop.append(text) | |
| data['image'] = img | |
| data['polys'] = np.array(text_polys_crop) | |
| data['ignore_tags'] = ignore_tags_crop | |
| data['texts'] = texts_crop | |
| return data | |
| class CropResize(object): | |
| def __init__(self, size=(640, 640), interpolation=cv2.INTER_LINEAR): | |
| self.size = size | |
| self.interpolation = interpolation | |
| def __call__(self, data): | |
| """ | |
| Resize an image using OpenCV: | |
| - If the image is smaller than the target size, pad it to 640x640. | |
| - If the image is larger than the target size, split it into multiple 640x640 images and record positions. | |
| :param image_path: Path to the input image. | |
| :param output_dir: Directory to save the output images. | |
| :param size: The target size for padding or splitting (default 640x640). | |
| :return: List of tuples containing the coordinates of the top-left corner of each cropped 640x640 image. | |
| """ | |
| img = data['image'] | |
| img_height, img_width = img.shape[:2] | |
| target_width, target_height = self.size | |
| # If image is smaller than target size, pad the image to 640x640 | |
| if img_width <= target_width and img_height <= target_height: | |
| # Calculate padding amounts (top, bottom, left, right) | |
| if img_width == target_width and img_height == target_height: | |
| return [img], [[0, 0, img_width, img_height]] | |
| padded_img = padding_image(img, self.size) | |
| # Return the padded area positions (top-left and bottom-right coordinates of the original image) | |
| return [padded_img], [[0, 0, img_width, img_height]] | |
| if img_width < target_width: | |
| img = cv2.copyMakeBorder(img, | |
| 0, | |
| 0, | |
| 0, | |
| target_width - img_width, | |
| cv2.BORDER_CONSTANT, | |
| value=[0, 0, 0]) | |
| if img_height < target_height: | |
| img = cv2.copyMakeBorder(img, | |
| 0, | |
| target_height - img_height, | |
| 0, | |
| 0, | |
| cv2.BORDER_CONSTANT, | |
| value=[0, 0, 0]) | |
| # raise ValueError("Image dimensions must be greater than or equal to target size") | |
| img_height, img_width = img.shape[:2] | |
| # If image is larger than or equal to target size, crop it into 640x640 tiles | |
| crop_positions = [] | |
| count = 0 | |
| cropped_img_list = [] | |
| for top in range(0, img_height - target_height // 2, | |
| target_height // 2): | |
| for left in range(0, img_width - target_height // 2, | |
| target_width // 2): | |
| # Calculate the bottom and right boundaries for the crop | |
| right = min(left + target_width, img_width) | |
| bottom = min(top + target_height, img_height) | |
| if right > img_width: | |
| right = img_width | |
| left = max(0, right - target_width) | |
| if bottom > img_height: | |
| bottom = img_height | |
| top = max(0, bottom - target_height) | |
| # Crop the image | |
| cropped_img = img[top:bottom, left:right] | |
| if bottom - top < target_height or right - left < target_width: | |
| cropped_img = padding_image(cropped_img, self.size) | |
| count += 1 | |
| cropped_img_list.append(cropped_img) | |
| # Record the position of the cropped image | |
| crop_positions.append([left, top, right, bottom]) | |
| # print(f"Images cropped and saved at {output_dir}.") | |
| return cropped_img_list, crop_positions | |