update script

This commit is contained in:
2025-09-09 20:44:21 +02:00
parent 59ac4a875f
commit 9c7aa82f42

45
main.py
View File

@@ -1,5 +1,4 @@
import cv2 import cv2
from numpy import extract
import pytesseract import pytesseract
import logging import logging
import re import re
@@ -7,9 +6,11 @@ import os
import json import json
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
CROP = ( CROP = (
10, # x 5, # x
10, # y 5, # y
32, # h 32, # h
120, # w 120, # w
) )
@@ -18,15 +19,7 @@ RX = r"\d+"
RESULTS_DIR = "results.json" RESULTS_DIR = "results.json"
VIDEOS_DIR = "videos" VIDEOS_DIR = "videos"
VIDEO_EXTENSIONS = [".webm", ".mp4"]
def get_log_level(level):
level = level.strip().upper()
if level.isdigit():
return int(level)
if lvl := logging.getLevelNamesMapping().get(level):
return lvl
raise Exception("invalid log level")
def setup_logger(level): def setup_logger(level):
@@ -49,11 +42,12 @@ def extract_count_at_frame(cap, idx):
fh, fw, _ = frame.shape fh, fw, _ = frame.shape
(x, y, h, w) = CROP (x, y, h, w) = CROP
cropped = frame[x:h, fw - w : fw - y] cropped = frame[x:h, fw - w: fw - y]
gray = cv2.cvtColor(cropped, cv2.COLOR_RGB2GRAY) gray = cv2.cvtColor(cropped, cv2.COLOR_RGB2GRAY)
text = pytesseract.image_to_string(gray, config="--psm 6") # "psm 6" = assume a block of text # "psm 6" = assume a block of text
text = pytesseract.image_to_string(gray, config="--psm 6")
matches = re.findall(RX, text) matches = re.findall(RX, text)
if not matches: if not matches:
return -1 return -1
@@ -75,7 +69,8 @@ def extract_timestamps(video_path):
c_start = extract_count_at_frame(cap, 0) c_start = extract_count_at_frame(cap, 0)
c_end = extract_count_at_frame(cap, total_frames) c_end = extract_count_at_frame(cap, total_frames)
if c_end <= c_start: if c_end <= c_start:
logging.info(f"no additional deaths in this video start={c_start} end={c_end}") logging.info(
f"no additional deaths in this video start={c_start} end={c_end}")
return [] return []
timeframes = [] timeframes = []
@@ -87,12 +82,14 @@ def extract_timestamps(video_path):
pivot = get_pivot(start, end) pivot = get_pivot(start, end)
count = extract_count_at_frame(cap, pivot) count = extract_count_at_frame(cap, pivot)
if count < c_start or count > c_end: if count < c_start or count > c_end:
logging.warning(f"faulty value found count={count} c_start={c_start} c_end={c_end} frame={pivot}") logging.warning(
f"faulty value found count={count} c_start={c_start} c_end={c_end} frame={pivot}")
start += 1 start += 1
continue continue
logging.debug(f"s={start} e={end} p={pivot} c={count}") logging.info(f"s={start} e={end} p={pivot} c={count}")
if end - start < fps: if end - start < fps:
logging.debug(f"found window s={start} e={end} p={pivot} c={count}") logging.info(
f"found window s={start} e={end} p={pivot} c={count}")
timeframes.append(start / fps) timeframes.append(start / fps)
last_count += 1 last_count += 1
if len(timeframes) == c_end - c_start: if len(timeframes) == c_end - c_start:
@@ -112,7 +109,7 @@ def extract_timestamps(video_path):
def format_timestamp(secs): def format_timestamp(secs):
mins = int(secs / 60) mins = int(secs / 60)
secs = int(secs % 60) secs = int(secs % 60)
return f"{mins:0<2}:{secs:0<2}" return f"{mins:0>2}:{secs:0>2}"
def load_results(): def load_results():
@@ -124,17 +121,21 @@ def load_results():
def store_results(results): def store_results(results):
with open(RESULTS_DIR, "w", encoding="utf-8") as f: with open(RESULTS_DIR, "w", encoding="utf-8") as f:
json.dump(results, f, indent=2) json.dump(results, f, indent=2, ensure_ascii=False)
def main(): def main():
setup_logger(get_log_level("info")) setup_logger("INFO")
results = load_results() results = load_results()
for f in os.listdir(VIDEOS_DIR): for f in os.listdir(VIDEOS_DIR):
_, ext = os.path.splitext(f)
if ext not in VIDEO_EXTENSIONS:
logging.info(f"skipping: file extension file={f}")
continue
if f in results: if f in results:
logging.info(f"already analyzed, skipping file={f}") logging.info(f"skipping: already analyzed file={f}")
continue continue
logging.info(f"extracting timestamps ... file={f}") logging.info(f"extracting timestamps ... file={f}")
timestamps = extract_timestamps(os.path.join(VIDEOS_DIR, f)) timestamps = extract_timestamps(os.path.join(VIDEOS_DIR, f))