Spaces:

NghiTran1009
/

CO-SPY

Sleeping

App Files Files Community

CO-SPY / Detectors /semantic_detector.py

NghiTran1009

Upload clean CO-SPY project

cab012d 16 days ago

raw

history blame contribute delete

2.96 kB

	import torch
	import open_clip
	from torchvision import transforms
	from utils import data_augment


	# Semantic Detector (Extract semantic features using CLIP)
	class SemanticDetector(torch.nn.Module):
	def __init__(self, dim_clip=1152, num_classes=1):
	super(SemanticDetector, self).__init__()

	# Get the pre-trained CLIP
	model_name = "ViT-SO400M-14-SigLIP-384"
	version = "webli"
	self.clip, _, _ = open_clip.create_model_and_transforms(model_name, pretrained=version)
	# Freeze the CLIP visual encoder
	self.clip.requires_grad_(False)

	# Classifier
	self.fc = torch.nn.Linear(dim_clip, num_classes)

	# Normalization
	self.mean = [0.5, 0.5, 0.5]
	self.std = [0.5, 0.5, 0.5]

	# Resolution
	self.loadSize = 384
	self.cropSize = 384

	# Data augmentation
	self.blur_prob = 0.5
	self.blur_sig = [0.0, 3.0]
	self.jpg_prob = 0.5
	self.jpg_method = ['cv2', 'pil']
	self.jpg_qual = list(range(30, 101))

	# Define the augmentation configuration
	self.aug_config = {
	"blur_prob": self.blur_prob,
	"blur_sig": self.blur_sig,
	"jpg_prob": self.jpg_prob,
	"jpg_method": self.jpg_method,
	"jpg_qual": self.jpg_qual,
	}

	# Pre-processing
	crop_func = transforms.RandomCrop(self.cropSize)
	flip_func = transforms.RandomHorizontalFlip()
	rz_func = transforms.Resize(self.loadSize)
	aug_func = transforms.Lambda(lambda x: data_augment(x, self.aug_config))

	self.train_transform = transforms.Compose([
	rz_func,
	aug_func,
	crop_func,
	flip_func,
	transforms.ToTensor(),
	transforms.Normalize(mean=self.mean, std=self.std),
	])

	self.test_transform = transforms.Compose([
	rz_func,
	crop_func,
	transforms.ToTensor(),
	transforms.Normalize(mean=self.mean, std=self.std),
	])

	def forward(self, x, return_feat=False):
	device = next(self.fc.parameters()).device # lấy device của fc
	x = x.to(device) # đảm bảo input cùng device
	feat = self.clip.encode_image(x)
	feat = feat.to(device) # đảm bảo feat cùng device với fc
	out = self.fc(feat)
	if return_feat:
	return feat, out
	return out

	def save_weights(self, weights_path):
	save_params = {"fc.weight": self.fc.weight.cpu(), "fc.bias": self.fc.bias.cpu()}
	torch.save(save_params, weights_path)

	def load_weights(self, weights_path):
	device = next(self.fc.parameters()).device # lấy device hiện tại của model
	weights = torch.load(weights_path, map_location=device)
	self.fc.weight.data = weights["fc.weight"].to(device)
	self.fc.bias.data = weights["fc.bias"].to(device)