Spaces:

NghiTran1009
/

CO-SPY

Sleeping

App Files Files Community

NghiTran1009 commited on 16 days ago

Commit

cab012d

1 Parent(s): f1aeecb

Upload clean CO-SPY project

Browse files

Files changed (32) hide show

.gitignore +0 -0
Datasets/__init__.py +11 -0
Datasets/dataset.py +105 -0
Datasets/flickr.py +30 -0
Datasets/mscoco.py +34 -0
Detectors/__init__.py +6 -0
Detectors/artifact_detector.py +82 -0
Detectors/artifact_extractor.py +162 -0
Detectors/cospy_calibrate_detector.py +96 -0
Detectors/cospy_detector.py +124 -0
Detectors/semantic_detector.py +86 -0
LICENSE +21 -0
ProGANDetectors/__init__.py +5 -0
ProGANDetectors/artifact_detector.py +196 -0
ProGANDetectors/cospy_calibrate_detector.py +96 -0
ProGANDetectors/semantic_detector.py +82 -0
__pycache__/utils.cpython-311.pyc +0 -0
calibrate_combine.py +297 -0
data/in_the_wild/README.md +14 -0
data/in_the_wild/urls/flux.txt +0 -0
data/in_the_wild/urls/lexica.txt +0 -0
data/test/README.md +13 -0
data/train/download.sh +16 -0
environment.yml +105 -0
evaluate.py +227 -0
pretrained/classifer_weights.pth +3 -0
pretrained/classifier_weights.pth +3 -0
pretrained/semantic_weights.pth +3 -0
requirements.txt +8 -0
train.py +271 -0
train_single.py +293 -0
utils.py +162 -0

.gitignore ADDED Viewed

Binary file (16 Bytes). View file

Datasets/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from .dataset import TrainDataset, TestDataset
+# List of evaluated real datasets
+EVAL_DATASET_LIST = [
+    "real"
+]
+# Danh sách model generative
+EVAL_MODEL_LIST = [
+    "stable_diffusion"
+]
+__all__ = ["TrainDataset", "TestDataset", "EVAL_DATASET_LIST", "EVAL_MODEL_LIST"]

Datasets/dataset.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import os
+import numpy as np
+from PIL import Image
+from tqdm import tqdm
+from torch.utils.data import Dataset
+from utils import get_list, png_to_jpeg
+from .mscoco import MSCOCO2017
+from .flickr import Flickr30k
+class TrainDataset(Dataset):
+    def __init__(self, data_path, split="train", transform=None, add_jpeg=True):
+        assert split in ["train", "val"]
+        # Load the dataset for training
+        real_list = get_list(os.path.join(data_path, "mscoco2017", f"{split}2017"))
+        fake_list = get_list(os.path.join(data_path, "stable-diffusion-v1-4", f"{split}2017"))
+        # Setting the labels for the dataset
+        self.labels_dict = {}
+        for i in real_list:
+            self.labels_dict[i] = 0
+        for i in fake_list:
+            self.labels_dict[i] = 1
+        # Construct the entire dataset
+        self.total_list = real_list + fake_list
+        np.random.shuffle(self.total_list)
+        # JPEG compression
+        self.add_jpeg = add_jpeg
+        # Transformations
+        self.transform = transform
+    def __len__(self):
+        return len(self.total_list)
+    def __getitem__(self, idx):
+        img_path = self.total_list[idx]
+        label = self.labels_dict[img_path]
+        image = Image.open(img_path).convert("RGB")
+        # Add JPEG compression
+        if self.add_jpeg:
+            image = png_to_jpeg(image, quality=95)
+        # Apply the transformation
+        if self.transform is not None:
+            image = self.transform(image)
+        return image, label
+class TestDataset(Dataset):
+    def __init__(self, dataset, model, root_path, transform=None, add_jpeg=True):
+        fake_dir = os.path.join(root_path, dataset, model)
+        self.fake = sorted([
+            os.path.join(fake_dir, i)
+            for i in os.listdir(fake_dir)
+            if i.lower().endswith((".png", ".jpg", ".jpeg"))
+        ])
+        real_dir = os.path.join(root_path, dataset, "real")
+        if not os.path.exists(real_dir):
+            raise ValueError(f"Real images directory not found: {real_dir}")
+        self.real = sorted([
+            os.path.join(real_dir, i)
+            for i in os.listdir(real_dir)
+            if i.lower().endswith((".png", ".jpg", ".jpeg"))
+        ])
+        self.image_idx = list(range(len(self.real) + len(self.fake)))
+        self.labels = [0] * len(self.real) + [1] * len(self.fake)
+        self.image_paths = self.real + self.fake
+        self.add_jpeg = add_jpeg
+        self.transform = transform
+    def __len__(self):
+        return len(self.image_idx)
+    def __getitem__(self, idx):
+        if idx < len(self.real):
+            img_path = self.real[idx]
+        else:
+            img_path = self.fake[idx - len(self.real)]
+        # ---- FIX: Bỏ qua ảnh hỏng / lỗi ----
+        try:
+            image = Image.open(img_path).convert("RGB")
+        except Exception:
+            print("Lỗi ảnh hỏng:", img_path)
+            # load ảnh kế tiếp thay thế
+            return self.__getitem__((idx + 1) % len(self))
+        if self.add_jpeg:
+            image = png_to_jpeg(image, quality=95)
+        if self.transform is not None:
+            image = self.transform(image)
+        label = self.labels[idx]
+        return image, label, img_path

Datasets/flickr.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import os
+import json
+import torch
+import numpy as np
+from PIL import Image
+import datasets as ds
+class Flickr30k(torch.utils.data.Dataset):
+    def __init__(self, split='test', transform=None):
+        # Split [test: 31014]
+        self.dataset = ds.load_dataset("nlphuji/flickr30k")[split]
+        # Preprocess the images
+        self.transform = transform
+    def __len__(self):
+        return len(self.dataset)
+    def __getitem__(self, idx):
+        example = self.dataset[idx]
+        # PIL RGB image
+        image = example['image']
+        if self.transform:
+            image = self.transform(image)
+        # A list of valid captions
+        caption_list = example['caption']
+        # Randomly select a caption
+        caption = np.random.choice(caption_list)
+        return image, caption

Datasets/mscoco.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import os
+import json
+import torch
+import numpy as np
+from PIL import Image
+import datasets as ds
+class MSCOCO2017(torch.utils.data.Dataset):
+    def __init__(self, split='train', transform=None):
+        # Split [train: 118287, val: 5000]
+        self.dataset = ds.load_dataset(
+            "shunk031/MSCOCO",
+            year=2017,
+            coco_task="captions"
+            )[split]
+        # Preprocess the images
+        self.transform = transform
+    def __len__(self):
+        return len(self.dataset)
+    def __getitem__(self, idx):
+        example = self.dataset[idx]
+        # PIL RGB image
+        image = example['image'].convert('RGB')
+        if self.transform:
+            image = self.transform(image)
+        # A list of valid captions
+        caption_list = example['annotations']['caption']
+        # Randomly select a caption
+        caption = np.random.choice(caption_list)
+        return image, caption

Detectors/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from .artifact_detector import ArtifactDetector
+from .semantic_detector import SemanticDetector
+from .cospy_calibrate_detector import CospyCalibrateDetector
+from .cospy_detector import CospyDetector, LabelSmoothingBCEWithLogits
+__all__ = ["ArtifactDetector", "SemanticDetector", "CospyCalibrateDetector", "CospyDetector", "LabelSmoothingBCEWithLogits"]

Detectors/artifact_detector.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import torch
+from diffusers import StableDiffusionPipeline
+from .artifact_extractor import VAEReconEncoder
+from torchvision import transforms
+from utils import data_augment
+# Artifact Detector (Extract artifact features using VAE)
+class ArtifactDetector(torch.nn.Module):
+    def __init__(self, dim_artifact=512, num_classes=1):
+        super(ArtifactDetector, self).__init__()
+        # Load the pre-trained VAE
+        model_id = "CompVis/stable-diffusion-v1-4"
+        vae = StableDiffusionPipeline.from_pretrained(model_id).vae
+        # Freeze the VAE visual encoder
+        vae.requires_grad_(False)
+        self.artifact_encoder = VAEReconEncoder(vae)
+        # Classifier
+        self.fc = torch.nn.Linear(dim_artifact, num_classes)
+        # Normalization
+        self.mean = [0.0, 0.0, 0.0]
+        self.std = [1.0, 1.0, 1.0]
+        # Resolution
+        self.loadSize = 256
+        self.cropSize = 224
+        # Data augmentation
+        self.blur_prob = 0.0
+        self.blur_sig = [0.0, 3.0]
+        self.jpg_prob = 0.5
+        self.jpg_method = ['cv2', 'pil']
+        self.jpg_qual = list(range(70, 96))
+        # Define the augmentation configuration
+        self.aug_config = {
+            "blur_prob": self.blur_prob,
+            "blur_sig": self.blur_sig,
+            "jpg_prob": self.jpg_prob,
+            "jpg_method": self.jpg_method,
+            "jpg_qual": self.jpg_qual,
+        }
+        # Pre-processing
+        crop_func = transforms.RandomCrop(self.cropSize)
+        flip_func = transforms.RandomHorizontalFlip()
+        rz_func = transforms.Resize(self.loadSize)
+        aug_func = transforms.Lambda(lambda x: data_augment(x, self.aug_config))
+        self.train_transform = transforms.Compose([
+            aug_func,
+            rz_func,
+            crop_func,
+            flip_func,
+            transforms.ToTensor(),
+            transforms.Normalize(mean=self.mean, std=self.std),
+        ])
+        self.test_transform = transforms.Compose([
+            rz_func,
+            crop_func,
+            transforms.ToTensor(),
+            transforms.Normalize(mean=self.mean, std=self.std),
+        ])
+    def forward(self, x, return_feat=False):
+        feat = self.artifact_encoder(x)
+        out = self.fc(feat)
+        if return_feat:
+            return feat, out
+        return out
+    def save_weights(self, weights_path):
+        save_params = {k: v.cpu() for k, v in self.state_dict().items()}
+        torch.save(save_params, weights_path)
+    def load_weights(self, weights_path):
+        weights = torch.load(weights_path)
+        self.load_state_dict(weights)

Detectors/artifact_extractor.py ADDED Viewed

	@@ -0,0 +1,162 @@

+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = conv1x1(inplanes, planes)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = conv3x3(planes, planes, stride)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = conv1x1(planes, planes * self.expansion)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class VAEReconEncoder(nn.Module):
+    def __init__(self, vae, block=Bottleneck):
+        super(VAEReconEncoder, self).__init__()
+        # Define the ResNet model
+        self.inplanes = 64
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
+        # self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        # ResNet-50 is [3, 4, 6, 3]
+        self.layer1 = self._make_layer(block, 64 , 3)
+        self.layer2 = self._make_layer(block, 128, 4, stride=2)
+        # self.layer3 = self._make_layer(block, 256, 6, stride=2)
+        # self.layer4 = self._make_layer(block, 512, 3, stride=2)
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        # Kaiming initialization
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+        # Load the VAE model
+        self.vae = vae
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def reconstruct(self, x):
+        with torch.no_grad():
+            # `.sample()` means to sample a latent vector from the distribution
+            # `.mean` means to use the mean of the distribution
+            latent = self.vae.encode(x).latent_dist.mean
+            decoded = self.vae.decode(latent).sample
+        return decoded
+    def forward(self, x):
+        # Reconstruct
+        x_recon = self.reconstruct(x)
+        # Compute the artifacts
+        x = x - x_recon
+        # Scale the artifacts
+        x = x / 7. * 100.
+        # Forward pass
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        return x

Detectors/cospy_calibrate_detector.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import torch
+from torchvision import transforms
+from utils import data_augment
+from .semantic_detector import SemanticDetector
+from .artifact_detector import ArtifactDetector
+# CO-SPY Calibrate Detector (Calibrate the integration of semantic and artifact detectors)
+class CospyCalibrateDetector(torch.nn.Module):
+    def __init__(self, semantic_weights_path, artifact_weights_path, num_classes=1):
+        super(CospyCalibrateDetector, self).__init__()
+        # Load the semantic detector
+        self.sem = SemanticDetector()
+        self.sem.load_weights(semantic_weights_path)
+        # Load the artifact detector
+        self.art = ArtifactDetector()
+        self.art.load_weights(artifact_weights_path)
+        # Freeze the two pre-trained models
+        for param in self.sem.parameters():
+            param.requires_grad = False
+        for param in self.art.parameters():
+            param.requires_grad = False
+        # Classifier
+        self.fc = torch.nn.Linear(2, num_classes)
+        # Transformations inside the forward function
+        # Including the normalization and resizing (only for the artifact detector)
+        self.sem_transform = transforms.Compose([
+            transforms.Normalize(self.sem.mean, self.sem.std)
+        ])
+        self.art_transform = transforms.Compose([
+            transforms.Resize(self.art.cropSize, antialias=False),
+            transforms.Normalize(self.art.mean, self.art.std)
+        ])
+        # Resolution
+        self.loadSize = 384
+        self.cropSize = 384
+        # Data augmentation
+        self.blur_prob = 0.0
+        self.blur_sig = [0.0, 3.0]
+        self.jpg_prob = 0.5
+        self.jpg_method = ['cv2', 'pil']
+        self.jpg_qual = list(range(70, 96))
+        # Define the augmentation configuration
+        self.aug_config = {
+            "blur_prob": self.blur_prob,
+            "blur_sig": self.blur_sig,
+            "jpg_prob": self.jpg_prob,
+            "jpg_method": self.jpg_method,
+            "jpg_qual": self.jpg_qual,
+        }
+        # Pre-processing
+        crop_func = transforms.RandomCrop(self.cropSize)
+        flip_func = transforms.RandomHorizontalFlip()
+        rz_func = transforms.Resize(self.loadSize)
+        aug_func = transforms.Lambda(lambda x: data_augment(x, self.aug_config))
+        self.train_transform = transforms.Compose([
+            flip_func,
+            aug_func,
+            rz_func,
+            crop_func,
+            transforms.ToTensor(),
+        ])
+        self.test_transform = transforms.Compose([
+            rz_func,
+            crop_func,
+            transforms.ToTensor(),
+        ])
+    def forward(self, x):
+        x_sem = self.sem_transform(x)
+        x_art = self.art_transform(x)
+        pred_sem = self.sem(x_sem)
+        pred_art = self.art(x_art)
+        x = torch.cat([pred_sem, pred_art], dim=1)
+        x = self.fc(x)
+        return x
+    def save_weights(self, weights_path):
+        save_params = {"fc.weight": self.fc.weight.cpu(), "fc.bias": self.fc.bias.cpu()}
+        torch.save(save_params, weights_path)
+    def load_weights(self, weights_path):
+        weights = torch.load(weights_path)
+        self.fc.weight.data = weights["fc.weight"]
+        self.fc.bias.data = weights["fc.bias"]

Detectors/cospy_detector.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import torch
+import random
+from torchvision import transforms
+from utils import data_augment, weights2cpu
+from .semantic_detector import SemanticDetector
+from .artifact_detector import ArtifactDetector
+# CO-SPY Detector
+class CospyDetector(torch.nn.Module):
+    def __init__(self, num_classes=1):
+        super(CospyDetector, self).__init__()
+        # Load the semantic detector
+        self.sem = SemanticDetector()
+        self.sem_dim = self.sem.fc.in_features
+        # Load the artifact detector
+        self.art = ArtifactDetector()
+        self.art_dim = self.art.fc.in_features
+        # Classifier
+        self.fc = torch.nn.Linear(self.sem_dim + self.art_dim, num_classes)
+        # Transformations inside the forward function
+        # Including the normalization and resizing (only for the artifact detector)
+        self.sem_transform = transforms.Compose([
+            transforms.Normalize(self.sem.mean, self.sem.std)
+        ])
+        self.art_transform = transforms.Compose([
+            transforms.Resize(self.art.cropSize, antialias=False),
+            transforms.Normalize(self.art.mean, self.art.std)
+        ])
+        # Resolution
+        self.loadSize = 384
+        self.cropSize = 384
+        # Data augmentation
+        self.blur_prob = 0.0
+        self.blur_sig = [0.0, 3.0]
+        self.jpg_prob = 0.5
+        self.jpg_method = ['cv2', 'pil']
+        self.jpg_qual = list(range(70, 96))
+        # Define the augmentation configuration
+        self.aug_config = {
+            "blur_prob": self.blur_prob,
+            "blur_sig": self.blur_sig,
+            "jpg_prob": self.jpg_prob,
+            "jpg_method": self.jpg_method,
+            "jpg_qual": self.jpg_qual,
+        }
+        # Pre-processing
+        crop_func = transforms.RandomCrop(self.cropSize)
+        flip_func = transforms.RandomHorizontalFlip()
+        rz_func = transforms.Resize(self.loadSize)
+        aug_func = transforms.Lambda(lambda x: data_augment(x, self.aug_config))
+        self.train_transform = transforms.Compose([
+            flip_func,
+            aug_func,
+            rz_func,
+            crop_func,
+            transforms.ToTensor(),
+        ])
+        self.test_transform = transforms.Compose([
+            rz_func,
+            crop_func,
+            transforms.ToTensor(),
+        ])
+    def forward(self, x, dropout_rate=0.3):
+        x_sem = self.sem_transform(x)
+        x_art = self.art_transform(x)
+        # Forward pass
+        sem_feat, sem_coeff = self.sem(x_sem, return_feat=True)
+        art_feat, art_coeff = self.art(x_art, return_feat=True)
+        # Dropout
+        if self.train():
+            # Random dropout
+            if random.random() < dropout_rate:
+                # Randomly select a feature to drop
+                idx_drop = random.randint(0, 1)
+                if idx_drop == 0:
+                    sem_coeff = torch.zeros_like(sem_coeff)
+                else:
+                    art_coeff = torch.zeros_like(art_coeff)
+        # Concatenate the features
+        x = torch.cat([sem_coeff * sem_feat, art_coeff * art_feat], dim=1)
+        x = self.fc(x)
+        return x
+    def save_weights(self, weights_path):
+        save_params = {
+            "sem_fc": weights2cpu(self.sem.fc.state_dict()),
+            "art_fc": weights2cpu(self.art.fc.state_dict()),
+            "art_encoder": weights2cpu(self.art.artifact_encoder.state_dict()),
+            "classifier": weights2cpu(self.fc.state_dict()),
+        }
+        torch.save(save_params, weights_path)
+    def load_weights(self, weights_path):
+        weights = torch.load(weights_path)
+        self.sem.fc.load_state_dict(weights["sem_fc"])
+        self.art.fc.load_state_dict(weights["art_fc"])
+        self.art.artifact_encoder.load_state_dict(weights["art_encoder"])
+        self.fc.load_state_dict(weights["classifier"])
+# Define the label smoothing loss
+class LabelSmoothingBCEWithLogits(torch.nn.Module):
+    def __init__(self, smoothing=0.1):
+        super(LabelSmoothingBCEWithLogits, self).__init__()
+        self.smoothing = smoothing
+    def forward(self, pred, target):
+        target = target.float() * (1.0 - self.smoothing) + 0.5 * self.smoothing
+        loss = torch.nn.functional.binary_cross_entropy_with_logits(pred, target, reduction='mean')
+        return

Detectors/semantic_detector.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import torch
+import open_clip
+from torchvision import transforms
+from utils import data_augment
+# Semantic Detector (Extract semantic features using CLIP)
+class SemanticDetector(torch.nn.Module):
+    def __init__(self, dim_clip=1152, num_classes=1):
+        super(SemanticDetector, self).__init__()
+        # Get the pre-trained CLIP
+        model_name = "ViT-SO400M-14-SigLIP-384"
+        version = "webli"
+        self.clip, _, _ = open_clip.create_model_and_transforms(model_name, pretrained=version)
+        # Freeze the CLIP visual encoder
+        self.clip.requires_grad_(False)
+        # Classifier
+        self.fc = torch.nn.Linear(dim_clip, num_classes)
+        # Normalization
+        self.mean = [0.5, 0.5, 0.5]
+        self.std = [0.5, 0.5, 0.5]
+        # Resolution
+        self.loadSize = 384
+        self.cropSize = 384
+        # Data augmentation
+        self.blur_prob = 0.5
+        self.blur_sig = [0.0, 3.0]
+        self.jpg_prob = 0.5
+        self.jpg_method = ['cv2', 'pil']
+        self.jpg_qual = list(range(30, 101))
+        # Define the augmentation configuration
+        self.aug_config = {
+            "blur_prob": self.blur_prob,
+            "blur_sig": self.blur_sig,
+            "jpg_prob": self.jpg_prob,
+            "jpg_method": self.jpg_method,
+            "jpg_qual": self.jpg_qual,
+        }
+        # Pre-processing
+        crop_func = transforms.RandomCrop(self.cropSize)
+        flip_func = transforms.RandomHorizontalFlip()
+        rz_func = transforms.Resize(self.loadSize)
+        aug_func = transforms.Lambda(lambda x: data_augment(x, self.aug_config))
+        self.train_transform = transforms.Compose([
+            rz_func,
+            aug_func,
+            crop_func,
+            flip_func,
+            transforms.ToTensor(),
+            transforms.Normalize(mean=self.mean, std=self.std),
+        ])
+        self.test_transform = transforms.Compose([
+            rz_func,
+            crop_func,
+            transforms.ToTensor(),
+            transforms.Normalize(mean=self.mean, std=self.std),
+        ])
+    def forward(self, x, return_feat=False):
+        device = next(self.fc.parameters()).device  # lấy device của fc
+        x = x.to(device)  # đảm bảo input cùng device
+        feat = self.clip.encode_image(x)
+        feat = feat.to(device)  # đảm bảo feat cùng device với fc
+        out = self.fc(feat)
+        if return_feat:
+            return feat, out
+        return out
+    def save_weights(self, weights_path):
+        save_params = {"fc.weight": self.fc.weight.cpu(), "fc.bias": self.fc.bias.cpu()}
+        torch.save(save_params, weights_path)
+    def load_weights(self, weights_path):
+        device = next(self.fc.parameters()).device  # lấy device hiện tại của model
+        weights = torch.load(weights_path, map_location=device)
+        self.fc.weight.data = weights["fc.weight"].to(device)
+        self.fc.bias.data = weights["fc.bias"].to(device)

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Siyuan Cheng
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

ProGANDetectors/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .artifact_detector import ArtifactDetectorProGAN
+from .semantic_detector import SemanticDetectorProGAN
+from .cospy_calibrate_detector import CospyCalibrateDetectorProGAN
+__all__ = ["ArtifactDetectorProGAN", "SemanticDetectorProGAN", "CospyCalibrateDetectorProGAN"]

ProGANDetectors/artifact_detector.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from torchvision import transforms
+from utils import data_augment
+# Artifact Detector (Extract artifact features using VAE)
+class ArtifactDetectorProGAN(torch.nn.Module):
+    def __init__(self, dim_artifact=512, num_classes=1):
+        super(ArtifactDetectorProGAN, self).__init__()
+        # Load the artifact encoder based on NPR
+        self.artifact_encoder = ResNet(Bottleneck, [3, 4, 6, 3])
+        # Classifier
+        self.fc = torch.nn.Linear(dim_artifact, num_classes)
+        # Normalization
+        self.mean = [0.485, 0.456, 0.406]
+        self.std = [0.229, 0.224, 0.225]
+        # Resolution
+        self.loadSize = 256
+        self.cropSize = 224
+        # Data augmentation
+        self.blur_prob = 0.0
+        self.blur_sig = [0.0, 3.0]
+        self.jpg_prob = 0.0
+        self.jpg_method = ['cv2', 'pil']
+        self.jpg_qual = list(range(70, 96))
+        # Define the augmentation configuration
+        self.aug_config = {
+            "blur_prob": self.blur_prob,
+            "blur_sig": self.blur_sig,
+            "jpg_prob": self.jpg_prob,
+            "jpg_method": self.jpg_method,
+            "jpg_qual": self.jpg_qual,
+        }
+        # Pre-processing
+        crop_func = transforms.RandomCrop(self.cropSize)
+        flip_func = transforms.RandomHorizontalFlip()
+        rz_func = transforms.Resize(self.loadSize)
+        aug_func = transforms.Lambda(lambda x: data_augment(x, self.aug_config))
+        self.train_transform = transforms.Compose([
+            aug_func,
+            rz_func,
+            crop_func,
+            flip_func,
+            transforms.ToTensor(),
+            transforms.Normalize(mean=self.mean, std=self.std),
+        ])
+        self.test_transform = transforms.Compose([
+            rz_func,
+            crop_func,
+            transforms.ToTensor(),
+            transforms.Normalize(mean=self.mean, std=self.std),
+        ])
+    def forward(self, x, return_feat=False):
+        feat = self.artifact_encoder(x)
+        out = self.fc(feat)
+        if return_feat:
+            return feat, out
+        return out
+    def save_weights(self, weights_path):
+        save_params = {k: v.cpu() for k, v in self.state_dict().items()}
+        torch.save(save_params, weights_path)
+    def load_weights(self, weights_path):
+        weights = torch.load(weights_path)
+        self.load_state_dict(weights)
+# Define the artifact encoder (based on NPR)
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = conv1x1(inplanes, planes)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = conv3x3(planes, planes, stride)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = conv1x1(planes, planes * self.expansion)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(self, block, layers, num_classes=1):
+        super(ResNet, self).__init__()
+        self.unfoldSize = 2
+        self.unfoldIndex = 0
+        assert self.unfoldSize > 1
+        assert -1 < self.unfoldIndex and self.unfoldIndex < self.unfoldSize*self.unfoldSize
+        self.inplanes = 64
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64 , layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc1 = nn.Linear(512, num_classes)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def interpolate(self, img, factor):
+        return F.interpolate(
+            F.interpolate(img,
+                          scale_factor=factor,
+                          mode='nearest',
+                          recompute_scale_factor=True),
+            scale_factor=1 / factor,
+            mode='nearest',
+            recompute_scale_factor=True)
+    def forward(self, x):
+        artifact = x - self.interpolate(x, 0.5)
+        x = self.conv1(artifact * 2.0 / 3.0)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        return x

ProGANDetectors/cospy_calibrate_detector.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import torch
+from torchvision import transforms
+from utils import data_augment
+from .semantic_detector import SemanticDetectorProGAN
+from .artifact_detector import ArtifactDetectorProGAN
+# CO-SPY Calibrate Detector (Calibrate the integration of semantic and artifact detectors)
+class CospyCalibrateDetectorProGAN(torch.nn.Module):
+    def __init__(self, semantic_weights_path, artifact_weights_path, num_classes=1):
+        super(CospyCalibrateDetectorProGAN, self).__init__()
+        # Load the semantic detector
+        self.sem = SemanticDetectorProGAN()
+        self.sem.load_weights(semantic_weights_path)
+        # Load the artifact detector
+        self.art = ArtifactDetectorProGAN()
+        self.art.load_weights(artifact_weights_path)
+        # Freeze the two pre-trained models
+        for param in self.sem.parameters():
+            param.requires_grad = False
+        for param in self.art.parameters():
+            param.requires_grad = False
+        # Classifier
+        self.fc = torch.nn.Linear(2, num_classes)
+        # Transformations inside the forward function
+        # Including the normalization and resizing (only for the artifact detector)
+        self.sem_transform = transforms.Compose([
+            transforms.Normalize(self.sem.mean, self.sem.std)
+        ])
+        self.art_transform = transforms.Compose([
+            transforms.Resize(self.art.cropSize, antialias=False),
+            transforms.Normalize(self.art.mean, self.art.std)
+        ])
+        # Resolution
+        self.loadSize = 256
+        self.cropSize = 224
+        # Data augmentation
+        self.blur_prob = 0.0
+        self.blur_sig = [0.0, 3.0]
+        self.jpg_prob = 0.0
+        self.jpg_method = ['cv2', 'pil']
+        self.jpg_qual = list(range(70, 96))
+        # Define the augmentation configuration
+        self.aug_config = {
+            "blur_prob": self.blur_prob,
+            "blur_sig": self.blur_sig,
+            "jpg_prob": self.jpg_prob,
+            "jpg_method": self.jpg_method,
+            "jpg_qual": self.jpg_qual,
+        }
+        # Pre-processing
+        crop_func = transforms.RandomCrop(self.cropSize)
+        flip_func = transforms.RandomHorizontalFlip()
+        rz_func = transforms.Resize(self.loadSize)
+        aug_func = transforms.Lambda(lambda x: data_augment(x, self.aug_config))
+        self.train_transform = transforms.Compose([
+            flip_func,
+            aug_func,
+            rz_func,
+            crop_func,
+            transforms.ToTensor(),
+        ])
+        self.test_transform = transforms.Compose([
+            rz_func,
+            crop_func,
+            transforms.ToTensor(),
+        ])
+    def forward(self, x):
+        x_sem = self.sem_transform(x)
+        x_art = self.art_transform(x)
+        pred_sem = self.sem(x_sem)
+        pred_art = self.art(x_art)
+        x = torch.cat([pred_sem, pred_art], dim=1)
+        x = self.fc(x)
+        return x
+    def save_weights(self, weights_path):
+        save_params = {"fc.weight": self.fc.weight.cpu(), "fc.bias": self.fc.bias.cpu()}
+        torch.save(save_params, weights_path)
+    def load_weights(self, weights_path):
+        weights = torch.load(weights_path)
+        self.fc.weight.data = weights["fc.weight"]
+        self.fc.bias.data = weights["fc.bias"]

ProGANDetectors/semantic_detector.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import torch
+from transformers import CLIPModel
+from torchvision import transforms
+from utils import data_augment
+# Semantic Detector (Extract semantic features using CLIP)
+class SemanticDetectorProGAN(torch.nn.Module):
+    def __init__(self, dim_clip=768, num_classes=1):
+        super(SemanticDetectorProGAN, self).__init__()
+        # Get the pre-trained CLIP
+        model_name = "openai/clip-vit-large-patch14"
+        self.clip = CLIPModel.from_pretrained(model_name)
+        # Freeze the CLIP visual encoder
+        self.clip.requires_grad_(False)
+        # Classifier
+        self.fc = torch.nn.Linear(dim_clip, num_classes)
+        # Normalization
+        self.mean = [0.48145466, 0.4578275, 0.40821073]
+        self.std = [0.26862954, 0.26130258, 0.27577711]
+        # Resolution
+        self.loadSize = 256
+        self.cropSize = 224
+        # Data augmentation
+        self.blur_prob = 0.5
+        self.blur_sig = [0.0, 3.0]
+        self.jpg_prob = 0.5
+        self.jpg_method = ['cv2', 'pil']
+        self.jpg_qual = list(range(30, 101))
+        # Define the augmentation configuration
+        self.aug_config = {
+            "blur_prob": self.blur_prob,
+            "blur_sig": self.blur_sig,
+            "jpg_prob": self.jpg_prob,
+            "jpg_method": self.jpg_method,
+            "jpg_qual": self.jpg_qual,
+        }
+        # Pre-processing
+        crop_func = transforms.RandomCrop(self.cropSize)
+        flip_func = transforms.RandomHorizontalFlip()
+        rz_func = transforms.Resize(self.loadSize)
+        aug_func = transforms.Lambda(lambda x: data_augment(x, self.aug_config))
+        self.train_transform = transforms.Compose([
+            rz_func,
+            aug_func,
+            crop_func,
+            flip_func,
+            transforms.ToTensor(),
+            transforms.Normalize(mean=self.mean, std=self.std),
+        ])
+        self.test_transform = transforms.Compose([
+            rz_func,
+            crop_func,
+            transforms.ToTensor(),
+            transforms.Normalize(mean=self.mean, std=self.std),
+        ])
+    def forward(self, x, return_feat=False):
+        feat = self.clip.get_image_features(x)
+        out = self.fc(feat)
+        if return_feat:
+            return feat, out
+        return out
+    def save_weights(self, weights_path):
+        save_params = {"fc.weight": self.fc.weight.cpu(), "fc.bias": self.fc.bias.cpu()}
+        torch.save(save_params, weights_path)
+    def load_weights(self, weights_path):
+        weights = torch.load(weights_path)
+        self.fc.weight.data = weights["fc.weight"]
+        self.fc.bias.data = weights["fc.bias"]

__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (9.5 kB). View file

calibrate_combine.py ADDED Viewed

	@@ -0,0 +1,297 @@

+import os
+import time
+import json
+import torch
+import numpy as np
+from tqdm import tqdm
+from loguru import logger
+from sklearn.metrics import average_precision_score
+from Detectors import CospyCalibrateDetector
+from Datasets import TrainDataset, TestDataset, EVAL_DATASET_LIST, EVAL_MODEL_LIST
+from utils import seed_torch
+import warnings
+warnings.filterwarnings("ignore")
+class Detector():
+    def __init__(self, args):
+        super(Detector, self).__init__()
+        # Device
+        self.device = args.device
+        # ===== Khởi tạo model =====
+        self.model = CospyCalibrateDetector(
+            semantic_weights_path=args.semantic_weights_path,
+            artifact_weights_path=args.artifact_weights_path
+        )
+        self.model.to(self.device)
+        # Khởi tạo fc layer nếu muốn
+        torch.nn.init.normal_(self.model.fc.weight.data, 0.0, 0.02)
+        # ===== Optimizer =====
+        _lr = 1e-1
+        _beta1 = 0.9
+        _weight_decay = 0.0
+        params = [p for p in self.model.parameters() if p.requires_grad]
+        print(f'Trainable parameters: {len(params)}')
+        self.optimizer = torch.optim.AdamW(params, lr=_lr, betas=(_beta1, 0.999), weight_decay=_weight_decay)
+        # ===== Loss =====
+        self.criterion = torch.nn.BCEWithLogitsLoss()
+        # Scheduler
+        self.delr_freq = 10
+        # ===== Load checkpoint nếu có =====
+        if args.resume is not None:
+            print(f"Loading checkpoint from {args.resume}")
+            state = torch.load(args.resume, map_location=self.device)
+            # hỗ trợ cả 2 dạng: {'model': state_dict} hoặc state_dict trực tiếp
+            if isinstance(state, dict) and "model" in state:
+                state = state["model"]
+            self.model.load_state_dict(state, strict=False)
+            print("Checkpoint loaded. Continue training...")
+        self.model.to(self.device)
+        self.model.train()
+    # Training function for the detector
+    def train_step(self, batch_data):
+        # Decompose the batch data
+        inputs, labels = batch_data
+        inputs, labels = inputs.to(self.device), labels.to(self.device)
+        self.optimizer.zero_grad()
+        outputs = self.model(inputs)
+        loss = self.criterion(outputs, labels.unsqueeze(1).float())
+        loss.backward()
+        self.optimizer.step()
+        eval_loss = loss.item()
+        y_pred = outputs.sigmoid().flatten().tolist()
+        y_true = labels.tolist()
+        return eval_loss, y_pred, y_true
+    # Schedule the training
+    # Early stopping / learning rate adjustment
+    def scheduler(self, status_dict):
+        epoch = status_dict['epoch']
+        if epoch % self.delr_freq == 0 and epoch != 0:
+            for param_group in self.optimizer.param_groups:
+                param_group['lr'] *= 0.9
+            self.lr = param_group['lr']
+        return True
+    # Prediction function
+    def predict(self, inputs):
+        inputs = inputs.to(self.device)
+        outputs = self.model(inputs)
+        prediction = outputs.sigmoid().flatten().tolist()
+        return prediction
+def evaluate(y_pred, y_true):
+    ap = average_precision_score(y_true, y_pred)
+    accuracy = ((np.array(y_pred) > 0.5) == y_true).mean()
+    return ap, accuracy
+def train(args):
+    # Set the saving directory **trước**
+    model_dir = os.path.join(args.ckpt, "cospy_calibrate")
+    if not os.path.exists(model_dir):
+        os.makedirs(model_dir)
+    log_path = f"{model_dir}/training.log"
+    if os.path.exists(log_path):
+        os.remove(log_path)
+    logger_id = logger.add(
+        log_path,
+        format="{time:MM-DD at HH:mm:ss} | {level} | {module}:{line} | {message}",
+        level="DEBUG",
+    )
+    # Get the detector
+    detector = Detector(args)
+    # --- Resume checkpoint ---
+    start_epoch = 0
+    best_acc = 0
+    if args.resume:
+        resume_path = os.path.join(model_dir, "best_model.pth")
+        if os.path.exists(resume_path):
+            print(f"Resuming from {resume_path} ...")
+            detector.model.load_weights(resume_path)
+            detector.model.to(args.device)
+    # Load the calibration dataset using the "val" split
+    train_dataset = TrainDataset(data_path=args.calibration_dirpath,
+                                 split="val",
+                                 transform=detector.model.test_transform)
+    train_loader  = torch.utils.data.DataLoader(train_dataset,
+                                                batch_size=args.batch_size,
+                                                shuffle=True,
+                                                num_workers=4,
+                                                pin_memory=True)
+    logger.info(f"Train size {len(train_dataset)}")
+    # Set the saving directory
+    model_dir = os.path.join(args.ckpt, "cospy_calibrate")
+    if not os.path.exists(model_dir):
+        os.makedirs(model_dir)
+    log_path = f"{model_dir}/training.log"
+    if os.path.exists(log_path):
+        os.remove(log_path)
+    logger_id = logger.add(
+        log_path,
+        format="{time:MM-DD at HH:mm:ss} | {level} | {module}:{line} | {message}",
+        level="DEBUG",
+    )
+    # Train the detector
+    best_acc = 0
+    for epoch in range(start_epoch, args.epochs):
+        # Set the model to training mode
+        detector.model.train()
+        time_start = time.time()
+        for step_id, batch_data in enumerate(train_loader):
+            eval_loss, y_pred, y_true = detector.train_step(batch_data)
+            ap, accuracy = evaluate(y_pred, y_true)
+            # Log the training information
+            if (step_id + 1) % 100 == 0:
+                time_end = time.time()
+                logger.info(f"Epoch {epoch} | Batch {step_id + 1}/{len(train_loader)} | Loss {eval_loss:.4f} | AP {ap*100:.2f}% | Accuracy {accuracy*100:.2f}% | Time {time_end-time_start:.2f}s")
+                time_start = time.time()
+        # Evaluate the model
+        detector.model.eval()
+        y_pred, y_true = [], []
+        for inputs in train_loader:
+            inputs, labels = inputs
+            y_pred.extend(detector.predict(inputs))
+            y_true.extend(labels.tolist())
+        ap, accuracy = evaluate(y_pred, y_true)
+        logger.info(f"Epoch {epoch} | Total AP {ap*100:.2f}% | Total Accuracy {accuracy*100:.2f}%")
+        # Schedule the training
+        status_dict = {'epoch': epoch, 'AP': ap, 'Accuracy': accuracy}
+        proceed = detector.scheduler(status_dict)
+        if not proceed:
+            logger.info("Early stopping")
+            break
+        # Save the model
+        if accuracy >= best_acc:
+            best_acc = accuracy
+            detector.model.save_weights(f"{model_dir}/best_model.pth")
+            logger.info(f"Best model saved with accuracy {best_acc.mean()*100:.2f}%")
+        if epoch % 5 == 0:
+            detector.model.save_weights(f"{model_dir}/epoch_{epoch}.pth")
+            logger.info(f"Model saved at epoch {epoch}")
+    # Save the final model
+    detector.model.save_weights(f"{model_dir}/final_model.pth")
+    logger.info("Final model saved")
+    # Remove the logger
+    logger.remove(logger_id)
+def test(args):
+    # Initialize the detector
+    detector = Detector(args)
+    # Load the [best/final] model
+    weights_path = os.path.join(args.ckpt, "cospy_calibrate", "best_model.pth")
+    detector.model.load_weights(weights_path)
+    detector.model.to(args.device)
+    detector.model.eval()
+    # Set the pre-processing function
+    test_transform = detector.model.test_transform
+    # Set the saving directory
+    save_dir = os.path.join(args.ckpt, "cospy_calibrate")
+    save_result_path = os.path.join(save_dir, "result.json")
+    save_output_path = os.path.join(save_dir, "output.json")
+    # Begin the evaluation
+    result_all = {}
+    output_all = {}
+    for dataset_name in EVAL_DATASET_LIST:
+        result_all[dataset_name] = {}
+        output_all[dataset_name] = {}
+        for model_name in EVAL_MODEL_LIST:
+            test_dataset = TestDataset(dataset=dataset_name, model=model_name, root_path=args.testset_dirpath, transform=test_transform)
+            test_loader = torch.utils.data.DataLoader(test_dataset,
+                                                      batch_size=args.batch_size,
+                                                      shuffle=False,
+                                                      num_workers=4,
+                                                      pin_memory=True)
+            # Evaluate the model
+            y_pred, y_true = [], []
+            for (images, labels) in tqdm(test_loader, desc=f"Evaluating {dataset_name} {model_name}"):
+                y_pred.extend(detector.predict(images))
+                y_true.extend(labels.tolist())
+            ap, accuracy = evaluate(y_pred, y_true)
+            print(f"Evaluate on {dataset_name} {model_name} | Size {len(y_true)} | AP {ap*100:.2f}% | Accuracy {accuracy*100:.2f}%")
+            result_all[dataset_name][model_name] = {"size": len(y_true), "AP": ap, "Accuracy": accuracy}
+            output_all[dataset_name][model_name] = {"y_pred": y_pred, "y_true": y_true}
+    # Save the results
+    with open(save_result_path, "w") as f:
+        json.dump(result_all, f, indent=4)
+    with open(save_output_path, "w") as f:
+        json.dump(output_all, f, indent=4)
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser("Deep Fake Detection")
+    parser.add_argument("--gpu", type=int, default=0, help="GPU ID")
+    parser.add_argument("--phase", type=str, default="test", choices=["train", "test"], help="Phase of the experiment")
+    parser.add_argument("--semantic_weights_path", type=str, default="ckpt/semantic/best_model.pth", help="Semantic weights path")
+    parser.add_argument("--artifact_weights_path", type=str, default="ckpt/artifact/best_model.pth", help="Artifact weights path")
+    parser.add_argument("--calibration_dirpath", type=str, default="data/train", help="Calibration directory")
+    parser.add_argument("--testset_dirpath", type=str, default="data/test", help="Testset directory")
+    parser.add_argument("--ckpt", type=str, default="ckpt", help="Checkpoint directory")
+    parser.add_argument("--epochs", type=int, default=10, help="Number of epochs")
+    parser.add_argument("--batch_size", type=int, default=32, help="Batch size")
+    parser.add_argument("--seed", type=int, default=1024, help="Random seed")
+    parser.add_argument("--resume", type=str, default=None, help="Path to checkpoint to resume training")
+    args = parser.parse_args()
+    # Set the random seed
+    seed_torch(args.seed)
+    # Set the GPU ID
+    args.device = f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu"
+    # Begin the experiment
+    if args.phase == "train":
+        train(args)
+    elif args.phase == "test":
+        test(args)
+    else:
+        raise ValueError("Unknown phase")

data/in_the_wild/README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+Our in-the-wild evaluation dataset is constructed from five sources:
+(1) Civitai [https://civitai.com/]
+(2) DALL-E 3 [https://huggingface.co/datasets/ProGamerGov/synthetic-dataset-1m-dalle3-high-quality-captions]
+(3) instavibe.ai [https://www.instavibe.ai/discover]
+(4) Lexica [https://lexica.art/]
+(5) Midjourney-v6 [https://huggingface.co/datasets/terminusresearch/midjourney-v6-520k-raw]
+Data from sources (1), (2), (5) can be easily accessed and downloaded.
+For sources (3) and (4), we provide the image URLs used in our dataset under the `./urls` directory for your convenience.

data/in_the_wild/urls/flux.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

data/in_the_wild/urls/lexica.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

data/test/README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+Please download the test samples from [Co-Spy-Bench](https://huggingface.co/datasets/ruojiruoli/Co-Spy-Bench) and place them in this directory.
+For real images:
+* **CC3M**, **MSCOCO**, **TextCaps**, **Flickr**, and **SBU** are used.
+* For **MSCOCO** and **Flickr**, refer to `Datasets/mscoco.py` and `Datasets/flickr.py` for instructions on downloading via HuggingFace Datasets.
+* For the remaining datasets, download from their original sources:
+  * [CC3M](https://ai.google.com/research/ConceptualCaptions/download)
+  * [TextCaps](https://textvqa.org/textcaps/dataset/)
+  * [SBU](https://huggingface.co/datasets/vicenteor/sbu_captions)
+Example test samples are also available on [Google Drive](https://drive.google.com/file/d/1JaaIGItyDYprr4_k0C_90MGIIRVQpmIP/view?usp=sharing). Please ensure their use complies with the original licenses.

data/train/download.sh ADDED Viewed

	@@ -0,0 +1,16 @@

+# Download and unzip the synthetic training dataset from DRCT
+# Reference: https://icml.cc/virtual/2024/poster/33086
+# Data source: https://github.com/beibuwandeluori/DRCT
+wget --no-check-certificate https://modelscope.cn/datasets/BokingChen/DRCT-2M/resolve/master/images/stable-diffusion-v1-4.zip
+unzip stable-diffusion-v1-4.zip
+# Download the real training dataset from MSCOCO2017
+# Reference: https://arxiv.org/pdf/1405.0312
+# Data source: https://cocodataset.org/#download
+mkdir mscoco2017
+cd mscoco2017
+wget http://images.cocodataset.org/zips/train2017.zip
+wget http://images.cocodataset.org/zips/val2017.zip
+unzip train2017.zip
+unzip val2017.zip
+cd ..

environment.yml ADDED Viewed

	@@ -0,0 +1,105 @@

+name: cospy
+channels:
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - ca-certificates=2025.2.25=h06a4308_0
+  - ld_impl_linux-64=2.40=h12ee557_0
+  - libffi=3.4.4=h6a678d5_1
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgomp=11.2.0=h1234567_1
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - ncurses=6.4=h6a678d5_0
+  - openssl=3.0.16=h5eee18b_0
+  - pip=24.2=py38h06a4308_0
+  - python=3.8.18=h955ad1f_0
+  - readline=8.2=h5eee18b_0
+  - setuptools=75.1.0=py38h06a4308_0
+  - sqlite=3.45.3=h5eee18b_0
+  - tk=8.6.14=h39e8969_0
+  - wheel=0.44.0=py38h06a4308_0
+  - xz=5.6.4=h5eee18b_1
+  - zlib=1.2.13=h5eee18b_1
+  - pip:
+    - accelerate==1.0.1
+    - aiohappyeyeballs==2.4.4
+    - aiohttp==3.10.11
+    - aiosignal==1.3.1
+    - async-timeout==5.0.1
+    - attrs==25.3.0
+    - certifi==2025.1.31
+    - charset-normalizer==3.4.1
+    - contourpy==1.1.1
+    - cycler==0.12.1
+    - datasets==3.1.0
+    - diffusers==0.32.2
+    - dill==0.3.8
+    - filelock==3.16.1
+    - fonttools==4.56.0
+    - frozenlist==1.5.0
+    - fsspec==2024.9.0
+    - ftfy==6.2.3
+    - huggingface-hub==0.29.3
+    - idna==3.10
+    - importlib-metadata==8.5.0
+    - importlib-resources==6.4.5
+    - jinja2==3.1.6
+    - joblib==1.4.2
+    - kiwisolver==1.4.7
+    - loguru==0.7.3
+    - markupsafe==2.1.5
+    - matplotlib==3.7.5
+    - mpmath==1.3.0
+    - multidict==6.1.0
+    - multiprocess==0.70.16
+    - networkx==3.1
+    - numpy==1.24.4
+    - nvidia-cublas-cu12==12.1.3.1
+    - nvidia-cuda-cupti-cu12==12.1.105
+    - nvidia-cuda-nvrtc-cu12==12.1.105
+    - nvidia-cuda-runtime-cu12==12.1.105
+    - nvidia-cudnn-cu12==9.1.0.70
+    - nvidia-cufft-cu12==11.0.2.54
+    - nvidia-curand-cu12==10.3.2.106
+    - nvidia-cusolver-cu12==11.4.5.107
+    - nvidia-cusparse-cu12==12.1.0.106
+    - nvidia-nccl-cu12==2.20.5
+    - nvidia-nvjitlink-cu12==12.8.93
+    - nvidia-nvtx-cu12==12.1.105
+    - open-clip-torch==2.31.0
+    - opencv-python==4.11.0.86
+    - packaging==24.2
+    - pandas==2.0.3
+    - pillow==10.4.0
+    - propcache==0.2.0
+    - psutil==7.0.0
+    - pyarrow==17.0.0
+    - pycocotools==2.0.7
+    - pyparsing==3.1.4
+    - python-dateutil==2.9.0.post0
+    - pytz==2025.1
+    - pyyaml==6.0.2
+    - regex==2024.11.6
+    - requests==2.32.3
+    - safetensors==0.5.3
+    - scikit-learn==1.3.2
+    - scipy==1.10.1
+    - six==1.17.0
+    - sympy==1.13.3
+    - threadpoolctl==3.5.0
+    - timm==1.0.15
+    - tokenizers==0.20.3
+    - torch==2.4.1
+    - torchvision==0.19.1
+    - tqdm==4.67.1
+    - transformers==4.46.3
+    - triton==3.0.0
+    - typing-extensions==4.12.2
+    - tzdata==2025.1
+    - urllib3==2.2.3
+    - wcwidth==0.2.13
+    - xxhash==3.5.0
+    - yarl==1.15.2
+    - zipp==3.20.2
+prefix: /connect4/cheng535-new/anaconda3/envs/cospy

evaluate.py ADDED Viewed

	@@ -0,0 +1,227 @@

+import os
+import json
+import torch
+import numpy as np
+from tqdm import tqdm
+from PIL import Image
+from sklearn.metrics import average_precision_score
+import csv
+from Detectors import CospyCalibrateDetector
+from Datasets import TestDataset, EVAL_DATASET_LIST, EVAL_MODEL_LIST
+from utils import seed_torch
+from sklearn.metrics import (
+    accuracy_score, log_loss, average_precision_score, f1_score,
+    roc_auc_score, balanced_accuracy_score, confusion_matrix, recall_score
+)
+import numpy as np
+import warnings
+warnings.filterwarnings("ignore")
+class Detector():
+    def __init__(self, args):
+        super(Detector, self).__init__()
+        # Device
+        self.device = args.device
+        # Initialize the detector
+        self.model = CospyCalibrateDetector(
+            semantic_weights_path=args.semantic_weights_path,
+            artifact_weights_path=args.artifact_weights_path)
+        # Load the pre-trained weights
+        self.model.load_weights(args.classifier_weights_path)
+        self.model.eval()
+        # Put the model on the device
+        self.model.to(self.device)
+    # Prediction function
+    def predict(self, inputs):
+        inputs = inputs.to(self.device)
+        outputs = self.model(inputs)
+        prediction = outputs.sigmoid().flatten().tolist()
+        return prediction
+def expected_calibration_error(y_true, y_prob, n_bins=10):
+    """Tính ECE (Expected Calibration Error)"""
+    y_true = np.array(y_true)
+    y_prob = np.array(y_prob)
+    bins = np.linspace(0.0, 1.0, n_bins + 1)
+    ece = 0.0
+    for i in range(n_bins):
+        mask = (y_prob > bins[i]) & (y_prob <= bins[i+1])
+        if np.sum(mask) > 0:
+            prob_mean = y_prob[mask].mean()
+            acc = y_true[mask].mean()
+            ece += np.sum(mask) / len(y_true) * abs(acc - prob_mean)
+    return ece
+def evaluate(y_pred, y_true):
+    y_pred = np.array(y_pred)
+    y_true = np.array(y_true)
+    pred_label = y_pred > 0.5
+    # Metrics
+    acc = accuracy_score(y_true, pred_label)
+    nll = log_loss(y_true, y_pred, eps=1e-7)
+    ap = average_precision_score(y_true, y_pred)
+    ece = expected_calibration_error(y_true, y_pred)
+    f1 = f1_score(y_true, pred_label)
+    try:
+        auc = roc_auc_score(y_true, y_pred)
+    except:
+        auc = float('nan')
+    bacc = balanced_accuracy_score(y_true, pred_label)
+    tn, fp, fn, tp = confusion_matrix(y_true, pred_label).ravel()
+    fnr = fn / (fn + tp) if (fn + tp) > 0 else float('nan')
+    recall_total = recall_score(y_true, pred_label)  # recall tổng thể
+    return {
+        "ACC": acc,
+        "NLL": nll,
+        "AP": ap,
+        "ECE": ece,
+        "F1": f1,
+        "AUC": auc,
+        "bAcc": bacc,
+        "FNR": fnr,
+        "Recall": recall_total
+    }
+def test(args):
+    # Initialize the detector
+    detector = Detector(args)
+    # Set the saving directory
+    if not os.path.exists(args.save_dir):
+        os.makedirs(args.save_dir)
+    save_result_path = os.path.join(args.save_dir, "result.json")
+    save_output_path = os.path.join(args.save_dir, "output.json")
+    # Begin the evaluation
+    result_all = {}
+    output_all = {}
+    for dataset_name in EVAL_DATASET_LIST:
+        result_all[dataset_name] = {}
+        output_all[dataset_name] = {}
+        for model_name in EVAL_MODEL_LIST:
+            test_dataset = TestDataset(dataset=dataset_name, model=model_name, root_path=args.testset_dirpath, transform=detector.model.test_transform)
+            test_loader = torch.utils.data.DataLoader(test_dataset,
+                                                      batch_size=args.batch_size,
+                                                      shuffle=False,
+                                                      num_workers=4,
+                                                      pin_memory=True)
+            # Evaluate the model
+            y_pred, y_true = [], []
+            for images, labels, _ in tqdm(test_loader, desc=f"Evaluating {dataset_name} {model_name}"):
+                y_pred.extend(detector.predict(images))
+                y_true.extend(labels.tolist())
+            metrics = evaluate(y_pred, y_true)
+            print(f"Evaluate on {dataset_name} {model_name} | Size {len(y_true)} | "
+                  f"ACC {metrics['ACC']*100:.2f}% | Recall {metrics['Recall']*100:.2f}% | "
+                  f"NLL {metrics['NLL']:.4f} | AP {metrics['AP']*100:.2f}% | "
+                  f"ECE {metrics['ECE']:.4f} | F1 {metrics['F1']*100:.2f}% | "
+                  f"AUC {metrics['AUC']*100:.2f}% | bAcc {metrics['bAcc']*100:.2f}% | "
+                  f"FNR {metrics['FNR']*100:.2f}%")
+            result_all[dataset_name][model_name] = {"size": len(y_true), **metrics}
+            csv_dir = os.path.join(args.save_dir, "csv_outputs")
+            os.makedirs(csv_dir, exist_ok=True)
+            csv_path = os.path.join(csv_dir, f"{dataset_name}_{model_name}.csv")
+            with open(csv_path, mode="w", newline="", encoding="utf-8") as f:
+                writer = csv.writer(f)
+                writer.writerow(["path_to_image", "true_label", "pred_percentage", "pred_label"])
+                idx = 0
+                for img_path in test_dataset.image_paths:
+                    pred_score = float(y_pred[idx])
+                    pred_label = 1 if pred_score > 0.5 else 0
+                    true_label = int(y_true[idx])
+                    writer.writerow([
+                        img_path,
+                        true_label,
+                        pred_score,
+                        pred_label
+                    ])
+                    idx += 1
+            print(f"[CSV SAVED] {csv_path}")
+            output_all[dataset_name][model_name] = {"y_pred": y_pred, "y_true": y_true}
+    # Save the results
+    with open(save_result_path, "w") as f:
+        json.dump(result_all, f, indent=4)
+    with open(save_output_path, "w") as f:
+        json.dump(output_all, f, indent=4)
+def scan(args):
+    # Initialize the detector
+    detector = Detector(args)
+    # Define the pre-processing function
+    test_transform = detector.model.test_transform
+    # Load the image
+    image_filepath = input("Please enter the image filepath for scanning: ")
+    if not os.path.exists(image_filepath):
+        print(f"Image file not found: {image_filepath}")
+        image_filepath = input("Please enter the image filepath for scanning: ")
+    image = Image.open(image_filepath).convert("RGB")
+    image = test_transform(image)
+    image = image.unsqueeze(0)
+    image = image.to(args.device)
+    # Make the prediction
+    prediction = detector.predict(image)[0]
+    if prediction > 0.5:
+        print(f"CO-SPY Prediction: {prediction:.3f} - AI-Generated")
+    else:
+        print(f"CO-SPY Prediction: {prediction:.3f} - Real")
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser("Deep Fake Detection")
+    parser.add_argument("--gpu", type=int, default=0, help="GPU ID")
+    parser.add_argument("--phase", type=str, default="scan", choices=["scan", "test"], help="Phase of the experiment")
+    parser.add_argument("--semantic_weights_path", type=str, default="pretrained/semantic_weights.pth", help="Semantic weights path")
+    parser.add_argument("--artifact_weights_path", type=str, default="pretrained/artifact_weights.pth", help="Artifact weights path")
+    parser.add_argument("--classifier_weights_path", type=str, default="pretrained/classifier_weights.pth", help="Classifier weights path")
+    parser.add_argument("--testset_dirpath", type=str, default="data/test", help="Testset directory")
+    parser.add_argument("--save_dir", type=str, default="test_results", help="Save directory")
+    parser.add_argument("--batch_size", type=int, default=32, help="Batch size")
+    parser.add_argument("--seed", type=int, default=1024, help="Random seed")
+    args = parser.parse_args()
+    # Set the random seed
+    seed_torch(args.seed)
+    # Set the GPU ID
+    args.device = f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu"
+    # Begin the experiment
+    if args.phase == "scan":
+        scan(args)
+    elif args.phase == "test":
+        test(args)
+    else:
+        raise ValueError("Unknown phase")

pretrained/classifer_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b3cd62721dca4183bfd37790c12ccaf964f71fe7c6bbf4d97eda5f44c6bafab
+size 1456

pretrained/classifier_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0bf7e0efb68cf57718742ec3c944640856fd86ddaf1bb219e6cacdc280f781dc
+size 1450

pretrained/semantic_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a3e7c4cf6534e7fac0f2f898d3764d5aa892653dab96ed1316fd123fa4e0a17c
+size 6064

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+torch
+torchvision
+opencv-python-headless
+numpy
+Pillow
+streamlit
+tqdm
+einops

train.py ADDED Viewed

	@@ -0,0 +1,271 @@

+import os
+import time
+import json
+import torch
+import numpy as np
+from tqdm import tqdm
+from loguru import logger
+from sklearn.metrics import average_precision_score
+from utils import seed_torch
+from Detectors import CospyDetector, LabelSmoothingBCEWithLogits
+from Datasets import TrainDataset, TestDataset, EVAL_DATASET_LIST, EVAL_MODEL_LIST
+import warnings
+warnings.filterwarnings("ignore")
+class Detector():
+    def __init__(self, args):
+        super(Detector, self).__init__()
+        # Device
+        self.device = args.device
+        # Get the detector
+        self.model = CospyDetector()
+        # Put the model on the device
+        self.model.to(self.device)
+        # Initialize the fc layer
+        torch.nn.init.normal_(self.model.sem.fc.weight.data, 0.0, 0.02)
+        torch.nn.init.normal_(self.model.art.fc.weight.data, 0.0, 0.02)
+        torch.nn.init.normal_(self.model.fc.weight.data, 0.0, 0.02)
+        # Optimizer
+        _lr = 1e-4
+        _beta1 = 0.9
+        _weight_decay = 0.0
+        params = []
+        for name, param in self.model.named_parameters():
+            if param.requires_grad:
+                params.append(param)
+        print(f"Trainable parameters: {len(params)}")
+        self.optimizer = torch.optim.AdamW(params, lr=_lr, betas=(_beta1, 0.999), weight_decay=_weight_decay)
+        # Loss function
+        if args.no_label_smooth:
+            self.criterion = torch.nn.BCEWithLogitsLoss()
+        else:
+            self.criterion = LabelSmoothingBCEWithLogits(smoothing=0.1)
+        # Scheduler
+        self.delr_freq = 10
+    # Training function for the detector
+    def train_step(self, batch_data):
+        # Decompose the batch data
+        inputs, labels = batch_data
+        inputs, labels = inputs.to(self.device), labels.to(self.device)
+        self.optimizer.zero_grad()
+        outputs = self.model(inputs)
+        loss = self.criterion(outputs, labels.unsqueeze(1).float())
+        loss.backward()
+        self.optimizer.step()
+        eval_loss = loss.item()
+        y_pred = outputs.sigmoid().flatten().tolist()
+        y_true = labels.tolist()
+        return eval_loss, y_pred, y_true
+    # Schedule the training
+    # Early stopping / learning rate adjustment
+    def scheduler(self, status_dict):
+        epoch = status_dict["epoch"]
+        if epoch % self.delr_freq == 0 and epoch != 0:
+            for param_group in self.optimizer.param_groups:
+                param_group["lr"] *= 0.9
+            self.lr = param_group["lr"]
+        return True
+    # Prediction function
+    def predict(self, inputs):
+        inputs = inputs.to(self.device)
+        outputs = self.model(inputs)
+        prediction = outputs.sigmoid().flatten().tolist()
+        return prediction
+def evaluate(y_pred, y_true):
+    ap = average_precision_score(y_true, y_pred)
+    accuracy = ((np.array(y_pred) > 0.5) == y_true).mean()
+    return ap, accuracy
+def train(args):
+    # Get the detector
+    detector = Detector(args)
+    # Load the dataset
+    train_dataset = TrainDataset(data_path=args.trainset_dirpath,
+                                 split="train",
+                                 transform=detector.model.train_transform)
+    train_loader  = torch.utils.data.DataLoader(train_dataset,
+                                                batch_size=args.batch_size,
+                                                shuffle=True,
+                                                num_workers=4,
+                                                pin_memory=True)
+    test_dataset  = TrainDataset(data_path=args.trainset_dirpath,
+                                 split="val",
+                                 transform=detector.model.test_transform)
+    test_loader   = torch.utils.data.DataLoader(test_dataset,
+                                                batch_size=args.batch_size,
+                                                shuffle=False,
+                                                num_workers=4,
+                                                pin_memory=True)
+    logger.info(f"Train size {len(train_dataset)} | Test size {len(test_dataset)}")
+    # Set the saving directory
+    model_dir = os.path.join(args.ckpt, "cospy")
+    if not os.path.exists(model_dir):
+        os.makedirs(model_dir)
+    log_path = f"{model_dir}/training.log"
+    if os.path.exists(log_path):
+        os.remove(log_path)
+    logger_id = logger.add(
+        log_path,
+        format="{time:MM-DD at HH:mm:ss} | {level} | {module}:{line} | {message}",
+        level="DEBUG",
+    )
+    # Train the detector
+    best_acc = 0
+    for epoch in range(args.epochs):
+        # Set the model to training mode
+        detector.model.train()
+        time_start = time.time()
+        for step_id, batch_data in enumerate(train_loader):
+            eval_loss, y_pred, y_true = detector.train_step(batch_data)
+            ap, accuracy = evaluate(y_pred, y_true)
+            # Log the training information
+            if (step_id + 1) % 100 == 0:
+                time_end = time.time()
+                logger.info(f"Epoch {epoch} | Batch {step_id + 1}/{len(train_loader)} | Loss {eval_loss:.4f} | AP {ap*100:.2f}% | Accuracy {accuracy*100:.2f}% | Time {time_end-time_start:.2f}s")
+                time_start = time.time()
+        # Evaluate the model
+        detector.model.eval()
+        y_pred, y_true = [], []
+        for (images, labels) in test_loader:
+            y_pred.extend(detector.predict(images))
+            y_true.extend(labels.tolist())
+        ap, accuracy = evaluate(y_pred, y_true)
+        logger.info(f"Epoch {epoch} | Test AP {ap*100:.2f}% | Test Accuracy {accuracy*100:.2f}%")
+        # Schedule the training
+        status_dict = {"epoch": epoch, "AP": ap, "Accuracy": accuracy}
+        proceed = detector.scheduler(status_dict)
+        if not proceed:
+            logger.info("Early stopping")
+            break
+        # Save the model
+        if accuracy >= best_acc:
+            best_acc = accuracy
+            detector.model.save_weights(f"{model_dir}/best_model.pth")
+            logger.info(f"Best model saved with accuracy {best_acc.mean()*100:.2f}%")
+        if epoch % 5 == 0:
+            detector.model.save_weights(f"{model_dir}/epoch_{epoch}.pth")
+            logger.info(f"Model saved at epoch {epoch}")
+    # Save the final model
+    detector.model.save_weights(f"{model_dir}/final_model.pth")
+    logger.info("Final model saved")
+    # Remove the logger
+    logger.remove(logger_id)
+def test(args):
+    # Initialize the detector
+    detector = Detector(args)
+    # Load the [best/final] model
+    weights_path = os.path.join(args.ckpt, "cospy", "best_model.pth")
+    detector.model.load_weights(weights_path)
+    detector.model.to(args.device)
+    detector.model.eval()
+    # Set the pre-processing function
+    test_transform = detector.model.test_transform
+    # Set the saving directory
+    save_dir = os.path.join(args.ckpt, "cospy")
+    save_result_path = os.path.join(save_dir, "result.json")
+    save_output_path = os.path.join(save_dir, "output.json")
+    # Begin the evaluation
+    result_all = {}
+    output_all = {}
+    for dataset_name in EVAL_DATASET_LIST:
+        result_all[dataset_name] = {}
+        output_all[dataset_name] = {}
+        for model_name in EVAL_MODEL_LIST:
+            test_dataset = TestDataset(dataset=dataset_name, model=model_name, root_path=args.testset_dirpath, transform=test_transform)
+            test_loader = torch.utils.data.DataLoader(test_dataset,
+                                                      batch_size=args.batch_size,
+                                                      shuffle=False,
+                                                      num_workers=4,
+                                                      pin_memory=True)
+            # Evaluate the model
+            y_pred, y_true = [], []
+            for (images, labels) in tqdm(test_loader, desc=f"Evaluating {dataset_name} {model_name}"):
+                y_pred.extend(detector.predict(images))
+                y_true.extend(labels.tolist())
+            ap, accuracy = evaluate(y_pred, y_true)
+            print(f"Evaluate on {dataset_name} {model_name} | Size {len(y_true)} | AP {ap*100:.2f}% | Accuracy {accuracy*100:.2f}%")
+            result_all[dataset_name][model_name] = {"size": len(y_true), "AP": ap, "Accuracy": accuracy}
+            output_all[dataset_name][model_name] = {"y_pred": y_pred, "y_true": y_true}
+    # Save the results
+    with open(save_result_path, "w") as f:
+        json.dump(result_all, f, indent=4)
+    with open(save_output_path, "w") as f:
+        json.dump(output_all, f, indent=4)
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser("Deep Fake Detection")
+    parser.add_argument("--gpu", type=int, default=0, help="GPU ID")
+    parser.add_argument("--phase", type=str, default="test", choices=["train", "test"], help="Phase of the experiment")
+    parser.add_argument("--no_label_smooth", action="store_true", help="Whether to use label smoothing")
+    parser.add_argument("--trainset_dirpath", type=str, default="data/train", help="Trainset directory")
+    parser.add_argument("--testset_dirpath", type=str, default="data/test", help="Testset directory")
+    parser.add_argument("--ckpt", type=str, default="ckpt", help="Checkpoint directory")
+    parser.add_argument("--epochs", type=int, default=10, help="Number of epochs")
+    parser.add_argument("--batch_size", type=int, default=32, help="Batch size")
+    parser.add_argument("--seed", type=int, default=1024, help="Random seed")
+    args = parser.parse_args()
+    # Set the random seed
+    seed_torch(args.seed)
+    # Set the GPU ID
+    args.device = f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu"
+    # Begin the experiment
+    if args.phase == "train":
+        train(args)
+    elif args.phase == "test":
+        test(args)
+    else:
+        raise ValueError("Unknown phase")

train_single.py ADDED Viewed

	@@ -0,0 +1,293 @@

+import os
+import time
+import json
+import torch
+import numpy as np
+from tqdm import tqdm
+from loguru import logger
+from sklearn.metrics import average_precision_score
+from utils import seed_torch
+from Detectors import ArtifactDetector, SemanticDetector
+from Datasets import TrainDataset, TestDataset, EVAL_DATASET_LIST, EVAL_MODEL_LIST
+import warnings
+warnings.filterwarnings("ignore")
+class Detector():
+    def __init__(self, args):
+        super(Detector, self).__init__()
+        # Device
+        self.device = args.device
+        # Get the detector
+        if args.detector == "artifact":
+            self.model = ArtifactDetector()
+        elif args.detector == "semantic":
+            self.model = SemanticDetector()
+        else:
+            raise ValueError("Unknown detector")
+        # Put the model on the device
+        self.model.to(self.device)
+        # Initialize the fc layer
+        torch.nn.init.normal_(self.model.fc.weight.data, 0.0, 0.02)
+        # Optimizer
+        _lr = 1e-4
+        _beta1 = 0.9
+        _weight_decay = 0.0
+        params = [p for p in self.model.parameters() if p.requires_grad]
+        print(f"Trainable parameters: {len(params)}")
+        self.optimizer = torch.optim.AdamW(params, lr=_lr, betas=(_beta1, 0.999), weight_decay=_weight_decay)
+        # Loss function
+        self.criterion = torch.nn.BCEWithLogitsLoss()
+        # Scheduler
+        self.delr_freq = 10
+        # Resume info
+        self.start_epoch = 0
+        self.best_acc = 0.0
+    def train_step(self, batch_data):
+        inputs, labels = batch_data
+        inputs, labels = inputs.to(self.device), labels.to(self.device)
+        self.optimizer.zero_grad()
+        outputs = self.model(inputs)
+        loss = self.criterion(outputs, labels.unsqueeze(1).float())
+        loss.backward()
+        self.optimizer.step()
+        eval_loss = loss.item()
+        y_pred = outputs.sigmoid().flatten().tolist()
+        y_true = labels.tolist()
+        return eval_loss, y_pred, y_true
+    def scheduler(self, status_dict):
+        epoch = status_dict["epoch"]
+        if epoch % self.delr_freq == 0 and epoch != 0:
+            for param_group in self.optimizer.param_groups:
+                param_group["lr"] *= 0.9
+            self.lr = param_group["lr"]
+        return True
+    def predict(self, inputs):
+        inputs = inputs.to(self.device)
+        outputs = self.model(inputs)
+        return outputs.sigmoid().flatten().tolist()
+    # --- Checkpoint functions ---
+    def save_checkpoint(self, path, epoch, best_acc):
+        torch.save({
+            "epoch": epoch,
+            "best_acc": best_acc,
+            "model_state": self.model.state_dict(),
+            "optimizer_state": self.optimizer.state_dict()
+        }, path)
+    def load_checkpoint(self, path):
+        if os.path.exists(path):
+            ckpt = torch.load(path, map_location=self.device)
+            self.model.load_state_dict(ckpt["model_state"])
+            self.optimizer.load_state_dict(ckpt["optimizer_state"])
+            self.start_epoch = ckpt.get("epoch", 0) + 1
+            self.best_acc = ckpt.get("best_acc", 0.0)
+            print(f"[INFO] Loaded checkpoint '{path}' (start_epoch={self.start_epoch}, best_acc={self.best_acc})")
+        else:
+            print(f"[WARNING] Checkpoint not found: {path}")
+def evaluate(y_pred, y_true):
+    ap = average_precision_score(y_true, y_pred)
+    accuracy = ((np.array(y_pred) > 0.5) == y_true).mean()
+    return ap, accuracy
+def train(args):
+    # Get the detector
+    detector = Detector(args)
+    # --- Resume checkpoint ---
+    start_epoch = 0
+    best_acc = 0
+    if args.resume != "":
+        if os.path.exists(args.resume):
+            print(f"[INFO] Loading checkpoint from {args.resume}")
+            ckpt = torch.load(args.resume, map_location=args.device)
+            detector.model.load_weights(args.resume)
+            # Nếu lưu thêm optimizer & best_acc, load ở đây
+            if "best_acc" in ckpt:
+                best_acc = ckpt["best_acc"]
+            if "epoch" in ckpt:
+                start_epoch = ckpt["epoch"] + 1
+        else:
+            print(f"[WARNING] Resume checkpoint not found: {args.resume}")
+    # Load datasets
+    train_dataset = TrainDataset(data_path=args.trainset_dirpath,
+                                 split="train",
+                                 transform=detector.model.train_transform)
+    train_loader  = torch.utils.data.DataLoader(train_dataset,
+                                                batch_size=args.batch_size,
+                                                shuffle=True,
+                                                num_workers=4,
+                                                pin_memory=True)
+    test_dataset  = TrainDataset(data_path=args.trainset_dirpath,
+                                 split="val",
+                                 transform=detector.model.test_transform)
+    test_loader   = torch.utils.data.DataLoader(test_dataset,
+                                                batch_size=args.batch_size,
+                                                shuffle=False,
+                                                num_workers=4,
+                                                pin_memory=True)
+    logger.info(f"Train size {len(train_dataset)} | Test size {len(test_dataset)}")
+    # Set saving directory
+    model_dir = os.path.join(args.ckpt, args.detector)
+    os.makedirs(model_dir, exist_ok=True)
+    log_path = f"{model_dir}/training.log"
+    if os.path.exists(log_path):
+        os.remove(log_path)
+    logger_id = logger.add(log_path, format="{time:MM-DD at HH:mm:ss} | {level} | {module}:{line} | {message}", level="DEBUG")
+    # Train loop
+    for epoch in range(start_epoch, args.epochs):
+        detector.model.train()
+        time_start = time.time()
+        for step_id, batch_data in enumerate(train_loader):
+            eval_loss, y_pred, y_true = detector.train_step(batch_data)
+            ap, accuracy = evaluate(y_pred, y_true)
+            if (step_id + 1) % 100 == 0:
+                time_end = time.time()
+                logger.info(f"Epoch {epoch} | Batch {step_id + 1}/{len(train_loader)} | Loss {eval_loss:.4f} | AP {ap*100:.2f}% | Accuracy {accuracy*100:.2f}% | Time {time_end-time_start:.2f}s")
+                time_start = time.time()
+        # Evaluate
+        detector.model.eval()
+        y_pred, y_true = [], []
+        for (images, labels) in test_loader:
+            y_pred.extend(detector.predict(images))
+            y_true.extend(labels.tolist())
+        ap, accuracy = evaluate(y_pred, y_true)
+        logger.info(f"Epoch {epoch} | Test AP {ap*100:.2f}% | Test Accuracy {accuracy*100:.2f}%")
+        # Save best model
+        if accuracy >= best_acc:
+            best_acc = accuracy
+            detector.model.save_weights(f"{model_dir}/best_model.pth")
+            torch.save({"epoch": epoch, "best_acc": best_acc}, f"{model_dir}/best_model_meta.pth")
+            logger.info(f"Best model saved with accuracy {best_acc*100:.2f}%")
+        # Save periodic checkpoints
+        if epoch % 5 == 0:
+            detector.model.save_weights(f"{model_dir}/epoch_{epoch}.pth")
+            logger.info(f"Model saved at epoch {epoch}")
+    # Save final model
+    detector.model.save_weights(f"{model_dir}/final_model.pth")
+    logger.info("Final model saved")
+    logger.remove(logger_id)
+def test(args):
+    # Initialize the detector
+    detector = Detector(args)
+    # --- Load checkpoint if resume is provided ---
+    if args.resume != "":
+        ckpt_path = args.resume
+        if os.path.exists(ckpt_path):
+            print(f"[INFO] Loading checkpoint from {ckpt_path}")
+            detector.model.load_weights(ckpt_path)
+        else:
+            print(f"[WARNING] Resume checkpoint not found: {ckpt_path}")
+    # Load the [best/final] model
+    weights_path = os.path.join(args.ckpt, args.detector, "best_model.pth")
+    detector.model.load_weights(weights_path)
+    detector.model.to(args.device)
+    detector.model.eval()
+    # Set the pre-processing function
+    test_transform = detector.model.test_transform
+    # Set the saving directory
+    save_dir = os.path.join(args.ckpt, args.detector)
+    save_result_path = os.path.join(save_dir, "result.json")
+    save_output_path = os.path.join(save_dir, "output.json")
+    # Begin the evaluation
+    result_all = {}
+    output_all = {}
+    for dataset_name in EVAL_DATASET_LIST:
+        result_all[dataset_name] = {}
+        output_all[dataset_name] = {}
+        for model_name in EVAL_MODEL_LIST:
+            test_dataset = TestDataset(dataset=dataset_name, model=model_name, root_path=args.testset_dirpath, transform=test_transform)
+            test_loader = torch.utils.data.DataLoader(test_dataset,
+                                                      batch_size=args.batch_size,
+                                                      shuffle=False,
+                                                      num_workers=4,
+                                                      pin_memory=True)
+            # Evaluate the model
+            y_pred, y_true = [], []
+            for (images, labels) in tqdm(test_loader, desc=f"Evaluating {dataset_name} {model_name}"):
+                y_pred.extend(detector.predict(images))
+                y_true.extend(labels.tolist())
+            ap, accuracy = evaluate(y_pred, y_true)
+            print(f"Evaluate on {dataset_name} {model_name} | Size {len(y_true)} | AP {ap*100:.2f}% | Accuracy {accuracy*100:.2f}%")
+            result_all[dataset_name][model_name] = {"size": len(y_true), "AP": ap, "Accuracy": accuracy}
+            output_all[dataset_name][model_name] = {"y_pred": y_pred, "y_true": y_true}
+    # Save the results
+    with open(save_result_path, "w") as f:
+        json.dump(result_all, f, indent=4)
+    with open(save_output_path, "w") as f:
+        json.dump(output_all, f, indent=4)
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser("Deep Fake Detection")
+    parser.add_argument("--gpu", type=int, default=0, help="GPU ID")
+    parser.add_argument("--phase", type=str, default="test", choices=["train", "test"], help="Phase of the experiment")
+    parser.add_argument("--detector", type=str, default="artifact", choices=["artifact", "semantic"], help="Detector to use")
+    parser.add_argument("--trainset_dirpath", type=str, default="data/train", help="Trainset directory")
+    parser.add_argument("--testset_dirpath", type=str, default="data/test", help="Testset directory")
+    parser.add_argument("--ckpt", type=str, default="ckpt", help="Checkpoint directory")
+    parser.add_argument("--epochs", type=int, default=10, help="Number of epochs")
+    parser.add_argument("--batch_size", type=int, default=32, help="Batch size")
+    parser.add_argument("--seed", type=int, default=1024, help="Random seed")
+    parser.add_argument("--resume", type=str, default="", help="Path to checkpoint to resume training")
+    args = parser.parse_args()
+    # Set the random seed
+    seed_torch(args.seed)
+    # Set the GPU ID
+    args.device = f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu"
+    # Begin the experiment
+    if args.phase == "train":
+        train(args)
+    elif args.phase == "test":
+        test(args)
+    else:
+        raise ValueError("Unknown phase")

utils.py ADDED Viewed

	@@ -0,0 +1,162 @@

+import os
+import cv2
+import torch
+import pickle
+import random
+import numpy as np
+from io import BytesIO
+from PIL import Image, ImageFile
+import torchvision.transforms.functional as TF
+from scipy.ndimage.filters import gaussian_filter
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+# Set random seed
+def seed_torch(seed):
+    random.seed(seed)
+    os.environ['PYTHONHASHSEED'] = str(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.benchmark = False
+    torch.backends.cudnn.deterministic = True
+# Load dataset
+def recursively_read(rootdir, must_contain, exts=["png", "PNG", "jpg", "JPG", "jpeg", "JPEG"]):
+    out = []
+    for r, d, f in os.walk(rootdir):
+        for file in f:
+            if (file.split('.')[1] in exts) and (must_contain in os.path.join(r, file)):
+                out.append(os.path.join(r, file))
+    return out
+def get_list(path, must_contain=''):
+    if ".pickle" in path:
+        with open(path, 'rb') as f:
+            image_list = pickle.load(f)
+        image_list = [item for item in image_list if must_contain in item]
+    else:
+        image_list = recursively_read(path, must_contain)
+    return image_list
+# Data augmentation techniques
+def data_augment(img, aug_config):
+    img = np.array(img)
+    if img.ndim == 2:
+        img = np.expand_dims(img, axis=2)
+        img = np.repeat(img, 3, axis=2)
+    if random.random() < aug_config["blur_prob"]:
+        sig = sample_continuous(aug_config["blur_sig"])
+        gaussian_blur(img, sig)
+    if random.random() < aug_config["jpg_prob"]:
+        method = sample_discrete(aug_config["jpg_method"])
+        qual = sample_discrete(aug_config["jpg_qual"])
+        img = jpeg_from_key(img, qual, method)
+    return Image.fromarray(img)
+# Data augmentation techniques
+def tensor_data_augment(images, aug_config):
+    device = images.device
+    images = images.detach().cpu().permute(0, 2, 3, 1).numpy()
+    images = np.uint8(images * 255.)
+    outputs = []
+    for img in images:
+        if random.random() < aug_config["blur_prob"]:
+            sig = sample_continuous(aug_config["blur_sig"])
+            gaussian_blur(img, sig)
+        if random.random() < aug_config["jpg_prob"]:
+            method = sample_discrete(aug_config["jpg_method"])
+            qual = sample_discrete(aug_config["jpg_qual"])
+            img = jpeg_from_key(img, qual, method)
+        outputs.append(img)
+    outputs = np.stack(outputs)
+    outputs = torch.from_numpy(outputs).to(device).permute(0, 3, 1, 2).float() / 255.
+    return outputs
+# Sample continuous or discrete values
+def sample_continuous(s):
+    if len(s) == 1:
+        return s[0]
+    if len(s) == 2:
+        rg = s[1] - s[0]
+        return random.random() * rg + s[0]
+    raise ValueError("Length of iterable s should be 1 or 2.")
+def sample_discrete(s):
+    if len(s) == 1:
+        return s[0]
+    return random.choice(s)
+# Gaussian blur
+def gaussian_blur(img, sigma):
+    gaussian_filter(img[:,:,0], output=img[:,:,0], sigma=sigma)
+    gaussian_filter(img[:,:,1], output=img[:,:,1], sigma=sigma)
+    gaussian_filter(img[:,:,2], output=img[:,:,2], sigma=sigma)
+# JPEG compression
+def cv2_jpg(img, compress_val):
+    img_cv2 = img[:,:,::-1]
+    encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), compress_val]
+    result, encimg = cv2.imencode('.jpg', img_cv2, encode_param)
+    decimg = cv2.imdecode(encimg, 1)
+    return decimg[:,:,::-1]
+def pil_jpg(img, compress_val):
+    out = BytesIO()
+    img = Image.fromarray(img)
+    img.save(out, format='jpeg', quality=compress_val)
+    img = Image.open(out)
+    # load from memory before ByteIO closes
+    img = np.array(img)
+    out.close()
+    return img
+def png_to_jpeg(img, quality=95):
+    # Convert the PNG image to JPEG
+    # Input: PIL image
+    # Output: PIL image
+    out = BytesIO()
+    img.save(out, format='jpeg', quality=quality)
+    img = np.array(Image.open(out))
+    # Load from memory before ByteIO closes
+    out.close()
+    img = Image.fromarray(img)
+    return img
+def jpeg_from_key(img, compress_val, key):
+    jpeg_dict = {'cv2': cv2_jpg, 'pil': pil_jpg}
+    method = jpeg_dict[key]
+    return method(img, compress_val)
+# Custom resize function
+def custom_resize(img, rz_interp, loadSize):
+    rz_dict = {'bilinear': Image.BILINEAR,
+                'bicubic': Image.BICUBIC,
+                'lanczos': Image.LANCZOS,
+                'nearest': Image.NEAREST}
+    interp = sample_discrete(rz_interp)
+    return TF.resize(img, loadSize, interpolation=rz_dict[interp])
+def weights2cpu(weights):
+    for key in weights:
+        weights[key] = weights[key].cpu()
+    return weights