"""OpenCV preprocessing pipeline for raster-to-vector conversion. Supports three conversion modes: - **bw** (default): Full pipeline → binary image for potrace/vtracer. - **grayscale**: Decode → grayscale → denoise → contrast → 8-bit output for vtracer. - **color**: Decode → denoise (bilateral on BGR) → full-color output for vtracer. """ import cv2 import numpy as np # Valid conversion modes VALID_MODES = {"bw", "grayscale", "color"} def decode_image(raw_bytes: bytes) -> np.ndarray: """Decode raw image bytes into a BGR numpy array.""" buf = np.frombuffer(raw_bytes, dtype=np.uint8) img = cv2.imdecode(buf, cv2.IMREAD_COLOR) if img is None: raise ValueError("Failed to decode image from provided bytes") return img def to_grayscale(img: np.ndarray) -> np.ndarray: """Convert BGR image to single-channel grayscale.""" if len(img.shape) == 2: return img return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) def denoise( img: np.ndarray, d: int = 9, sigma_color: float = 75.0, sigma_space: float = 75.0, ) -> np.ndarray: """Apply bilateral filter for edge-preserving denoising.""" return cv2.bilateralFilter(img, d, sigma_color, sigma_space) def enhance_contrast( img: np.ndarray, clip_limit: float = 2.0, tile_grid_size: tuple[int, int] = (8, 8), ) -> np.ndarray: """Apply CLAHE contrast enhancement (grayscale only).""" clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=tile_grid_size) return clahe.apply(img) def threshold( img: np.ndarray, manual_thresh: int | None = None, ) -> np.ndarray: """Apply thresholding — Otsu auto by default, manual override if provided.""" if manual_thresh is not None: _, result = cv2.threshold(img, manual_thresh, 255, cv2.THRESH_BINARY) else: _, result = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) return result def edge_detect( img: np.ndarray, low: int = 50, high: int = 150, ) -> np.ndarray: """Apply Canny edge detection.""" return cv2.Canny(img, low, high) def morphological_ops( img: np.ndarray, kernel_size: int = 3, dilate_iterations: int = 1, erode_iterations: int = 1, ) -> np.ndarray: """Apply dilation then erosion (closing-style) to clean up binary image.""" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size)) result = cv2.dilate(img, kernel, iterations=dilate_iterations) result = cv2.erode(result, kernel, iterations=erode_iterations) return result def apply_mask(img: np.ndarray, mask_regions: list[dict]) -> np.ndarray: """Zero out rectangular regions of the image. Each region is a dict with keys: x, y, width, height (in pixel coordinates). Masked areas become white (255) for grayscale/bw or white (255,255,255) for color, effectively removing them from vectorization. """ for region in mask_regions: x = int(region.get("x", 0)) y = int(region.get("y", 0)) w = int(region.get("width", 0)) h = int(region.get("height", 0)) if w <= 0 or h <= 0: continue if img.ndim == 3: img[y:y + h, x:x + w] = 255 else: img[y:y + h, x:x + w] = 255 return img def preprocess( raw_bytes: bytes, params: dict | None = None, ) -> np.ndarray: """Run the preprocessing pipeline on raw image bytes. The pipeline varies by ``conversion_mode``: **bw** (default): decode → grayscale → denoise → contrast → threshold → [invert] → [edge detect] → morphological ops → binary output **grayscale**: decode → grayscale → denoise → contrast → 8-bit grayscale output **color**: decode → denoise (bilateral on BGR) → full-color BGR output Mask regions (if provided) are applied after decoding, before any processing — masked pixels are set to white. Params dict keys: conversion_mode: 'bw' | 'grayscale' | 'color' invert: bool (B&W mode only) mask_regions: list of {x, y, width, height} dicts denoise_d, denoise_sigma_color, denoise_sigma_space, clahe_clip_limit, clahe_tile_grid_size, threshold_manual, edge_detect (bool), edge_low, edge_high, morph_kernel_size, morph_dilate_iterations, morph_erode_iterations """ p = params or {} mode = p.get("conversion_mode", "bw") if mode not in VALID_MODES: raise ValueError( f"Invalid conversion_mode '{mode}'. Must be one of: {', '.join(sorted(VALID_MODES))}" ) img = decode_image(raw_bytes) # Apply mask regions early — before any color conversion mask_regions = p.get("mask_regions") if mask_regions: img = apply_mask(img, mask_regions) # ── Color mode: denoise BGR, return as-is ── if mode == "color": img = denoise( img, d=p.get("denoise_d", 9), sigma_color=p.get("denoise_sigma_color", 75.0), sigma_space=p.get("denoise_sigma_space", 75.0), ) return img # ── Grayscale and B&W both start with grayscale conversion ── img = to_grayscale(img) img = denoise( img, d=p.get("denoise_d", 9), sigma_color=p.get("denoise_sigma_color", 75.0), sigma_space=p.get("denoise_sigma_space", 75.0), ) img = enhance_contrast( img, clip_limit=p.get("clahe_clip_limit", 2.0), tile_grid_size=p.get("clahe_tile_grid_size", (8, 8)), ) # ── Grayscale mode: return 8-bit grayscale (no threshold) ── if mode == "grayscale": return img # ── B&W mode: threshold → invert → edge detect → morphological ── img = threshold( img, manual_thresh=p.get("threshold_manual"), ) if p.get("invert", False): img = cv2.bitwise_not(img) if p.get("edge_detect", False): img = edge_detect( img, low=p.get("edge_low", 50), high=p.get("edge_high", 150), ) img = morphological_ops( img, kernel_size=p.get("morph_kernel_size", 3), dilate_iterations=p.get("morph_dilate_iterations", 1), erode_iterations=p.get("morph_erode_iterations", 1), ) return img