| 1 | """ |
| 2 | Resemble AI -- Batch Deepfake Detection with Secure Upload |
| 3 | |
| 4 | Walk a folder of local media files, upload each via the Secure Upload API, |
| 5 | run deepfake detection against the returned media token, and poll until each |
| 6 | job reaches a terminal state. Uploads and polling run concurrently so batches |
| 7 | of dozens/hundreds of files finish in practical wall-clock time (and well |
| 8 | within the 1-hour secure-upload token expiration). |
| 9 | |
| 10 | Non-media files and subdirectories are skipped. Per-file failures are |
| 11 | collected and reported in the final summary; the script never aborts early. |
| 12 | |
| 13 | Prerequisites: |
| 14 | pip install requests |
| 15 | |
| 16 | Usage: |
| 17 | export RESEMBLE_API_KEY="your_api_key" |
| 18 | python detect_with_secure_uploads.py <absolute-folder-path> [output-json-path] |
| 19 | |
| 20 | If `output-json-path` is omitted, results are written to `<folder>/results.json`. |
| 21 | """ |
| 22 | |
| 23 | import json |
| 24 | import os |
| 25 | import sys |
| 26 | import time |
| 27 | from concurrent.futures import ThreadPoolExecutor, as_completed |
| 28 | |
| 29 | import requests |
| 30 | |
| 31 | API_KEY = os.environ.get("RESEMBLE_API_KEY", "") |
| 32 | BASE_URL = "https://app.resemble.ai/api/v2" |
| 33 | MAX_WORKERS = 4 # tune to your rate limit / upload bandwidth |
| 34 | |
| 35 | # Extend with any audio/video/image extension you care about. |
| 36 | MEDIA_EXTS = {".wav", ".mp3", ".mp4", ".mov", ".png", ".jpg", ".jpeg"} |
| 37 | |
| 38 | |
| 39 | def json_headers() -> dict: |
| 40 | return {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"} |
| 41 | |
| 42 | |
| 43 | def auth_only_headers() -> dict: |
| 44 | # For multipart/form-data, requests sets Content-Type (with boundary) itself. |
| 45 | return {"Authorization": f"Bearer {API_KEY}"} |
| 46 | |
| 47 | |
| 48 | def secure_upload(path: str) -> str: |
| 49 | """Upload a local file to the Secure Upload API and return its media_token.""" |
| 50 | with open(path, "rb") as f: |
| 51 | resp = requests.post( |
| 52 | f"{BASE_URL}/secure_uploads", |
| 53 | headers=auth_only_headers(), |
| 54 | files={"file": (os.path.basename(path), f)}, |
| 55 | ) |
| 56 | if not resp.ok: |
| 57 | raise RuntimeError(f"POST /secure_uploads returned {resp.status_code}: {resp.text}") |
| 58 | token = resp.json().get("token") |
| 59 | if not token: |
| 60 | raise RuntimeError(f"no token in secure upload response: {resp.text}") |
| 61 | return token |
| 62 | |
| 63 | |
| 64 | def submit_detect(media_token: str) -> str: |
| 65 | """Submit a detect job referencing a secure-upload token and return the uuid.""" |
| 66 | payload = { |
| 67 | "media_token": media_token, |
| 68 | # Prefer webhooks over polling for large batches: |
| 69 | # "callback_url": "https://your-server.example.com/resemble-webhook", |
| 70 | } |
| 71 | resp = requests.post(f"{BASE_URL}/detect", headers=json_headers(), json=payload) |
| 72 | if not resp.ok: |
| 73 | raise RuntimeError(f"POST /detect returned {resp.status_code}: {resp.text}") |
| 74 | uuid = resp.json().get("item", {}).get("uuid") |
| 75 | if not uuid: |
| 76 | raise RuntimeError("no uuid in detect response") |
| 77 | return uuid |
| 78 | |
| 79 | |
| 80 | def poll_for_result(uuid: str, timeout: int = 600, interval: int = 5) -> dict: |
| 81 | """Poll GET /detect/{uuid} until the job reaches a terminal state.""" |
| 82 | url = f"{BASE_URL}/detect/{uuid}" |
| 83 | deadline = time.time() + timeout |
| 84 | while time.time() < deadline: |
| 85 | resp = requests.get(url, headers=json_headers()) |
| 86 | if not resp.ok: |
| 87 | raise RuntimeError(f"GET /detect/{uuid} returned {resp.status_code}: {resp.text}") |
| 88 | item = resp.json().get("item", {}) |
| 89 | status = item.get("status", "unknown") |
| 90 | if status in ("completed", "failed"): |
| 91 | return item |
| 92 | time.sleep(interval) |
| 93 | raise RuntimeError(f"polling timed out after {timeout}s") |
| 94 | |
| 95 | |
| 96 | def iter_media_files(folder: str): |
| 97 | for name in sorted(os.listdir(folder)): |
| 98 | full = os.path.join(folder, name) |
| 99 | if not os.path.isfile(full): |
| 100 | continue |
| 101 | if os.path.splitext(name)[1].lower() not in MEDIA_EXTS: |
| 102 | continue |
| 103 | yield full |
| 104 | |
| 105 | |
| 106 | def process_file(path: str) -> dict: |
| 107 | """Upload, submit, and poll one file.""" |
| 108 | token = secure_upload(path) |
| 109 | uuid = submit_detect(token) |
| 110 | result = poll_for_result(uuid) |
| 111 | status = result.get("status", "unknown") |
| 112 | if status != "completed": |
| 113 | raise RuntimeError(f"detect job {uuid} ended with status={status}") |
| 114 | return {"file": path, "detect": result} |
| 115 | |
| 116 | |
| 117 | def main(): |
| 118 | if not API_KEY: |
| 119 | sys.exit("Error: set RESEMBLE_API_KEY environment variable before running.") |
| 120 | if len(sys.argv) not in (2, 3): |
| 121 | sys.exit( |
| 122 | f"Usage: python {os.path.basename(sys.argv[0])} " |
| 123 | f"<absolute-folder-path> [output-json-path]" |
| 124 | ) |
| 125 | folder = sys.argv[1] |
| 126 | output_path = sys.argv[2] if len(sys.argv) == 3 else os.path.join(folder, "results.json") |
| 127 | |
| 128 | files = list(iter_media_files(folder)) |
| 129 | if not files: |
| 130 | print("No media files to process.") |
| 131 | return |
| 132 | |
| 133 | succeeded, failed = [], [] |
| 134 | with ThreadPoolExecutor(max_workers=MAX_WORKERS) as pool: |
| 135 | future_to_path = {pool.submit(process_file, p): p for p in files} |
| 136 | for future in as_completed(future_to_path): |
| 137 | path = future_to_path[future] |
| 138 | name = os.path.basename(path) |
| 139 | try: |
| 140 | record = future.result() |
| 141 | succeeded.append(record) |
| 142 | metrics = record["detect"].get("metrics") or {} |
| 143 | print( |
| 144 | f" [OK] {name} " |
| 145 | f"label={metrics.get('label')} score={metrics.get('aggregated_score')}" |
| 146 | ) |
| 147 | except Exception as e: |
| 148 | failed.append({"file": path, "error": str(e)}) |
| 149 | print(f" [ERR] {name} {e}") |
| 150 | |
| 151 | with open(output_path, "w") as f: |
| 152 | json.dump({"folder": folder, "succeeded": succeeded, "failed": failed}, f, indent=2) |
| 153 | |
| 154 | print(f"\nDone. {len(succeeded)} succeeded, {len(failed)} failed (of {len(files)}).") |
| 155 | print(f"Results written to {output_path}") |
| 156 | |
| 157 | |
| 158 | if __name__ == "__main__": |
| 159 | main() |