update script

2025-09-09 20:44:21 +02:00
parent 59ac4a875f
commit 9c7aa82f42
1 changed files with 23 additions and 22 deletions
--- a/main.py
+++ b/main.py
@@ -1,5 +1,4 @@
 import cv2
 from numpy import extract
 import pytesseract
 import logging
 import re
@@ -7,9 +6,11 @@ import os
 import json
 pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
 CROP = (
-    10,  # x
+    5,  # x
-    10,  # y
+    5,  # y
    32,  # h
    120,  # w
 )
@@ -18,15 +19,7 @@ RX = r"\d+"
 RESULTS_DIR = "results.json"
 VIDEOS_DIR = "videos"
-
+VIDEO_EXTENSIONS = [".webm", ".mp4"]
 def get_log_level(level):
    level = level.strip().upper()
    if level.isdigit():
        return int(level)
    if lvl := logging.getLevelNamesMapping().get(level):
        return lvl
    raise Exception("invalid log level")
 def setup_logger(level):
@@ -49,11 +42,12 @@ def extract_count_at_frame(cap, idx):
    fh, fw, _ = frame.shape
    (x, y, h, w) = CROP
-    cropped = frame[x:h, fw - w : fw - y]
+    cropped = frame[x:h, fw - w: fw - y]
    gray = cv2.cvtColor(cropped, cv2.COLOR_RGB2GRAY)
-    text = pytesseract.image_to_string(gray, config="--psm 6")  # "psm 6" = assume a block of text
+    # "psm 6" = assume a block of text
    text = pytesseract.image_to_string(gray, config="--psm 6")
    matches = re.findall(RX, text)
    if not matches:
        return -1
@@ -75,7 +69,8 @@ def extract_timestamps(video_path):
    c_start = extract_count_at_frame(cap, 0)
    c_end = extract_count_at_frame(cap, total_frames)
    if c_end <= c_start:
-        logging.info(f"no additional deaths in this video start={c_start} end={c_end}")
+        logging.info(
            f"no additional deaths in this video start={c_start} end={c_end}")
        return []
    timeframes = []
@@ -87,12 +82,14 @@ def extract_timestamps(video_path):
        pivot = get_pivot(start, end)
        count = extract_count_at_frame(cap, pivot)
        if count < c_start or count > c_end:
-            logging.warning(f"faulty value found count={count} c_start={c_start} c_end={c_end} frame={pivot}")
+            logging.warning(
                f"faulty value found count={count} c_start={c_start} c_end={c_end} frame={pivot}")
            start += 1
            continue
-        logging.debug(f"s={start} e={end} p={pivot} c={count}")
+        logging.info(f"s={start} e={end} p={pivot} c={count}")
        if end - start < fps:
-            logging.debug(f"found window s={start} e={end} p={pivot} c={count}")
+            logging.info(
                f"found window s={start} e={end} p={pivot} c={count}")
            timeframes.append(start / fps)
            last_count += 1
            if len(timeframes) == c_end - c_start:
@@ -112,7 +109,7 @@ def extract_timestamps(video_path):
 def format_timestamp(secs):
    mins = int(secs / 60)
    secs = int(secs % 60)
-    return f"{mins:0<2}:{secs:0<2}"
+    return f"{mins:0>2}:{secs:0>2}"
 def load_results():
@@ -124,17 +121,21 @@ def load_results():
 def store_results(results):
    with open(RESULTS_DIR, "w", encoding="utf-8") as f:
-        json.dump(results, f, indent=2)
+        json.dump(results, f, indent=2, ensure_ascii=False)
 def main():
-    setup_logger(get_log_level("info"))
+    setup_logger("INFO")
    results = load_results()
    for f in os.listdir(VIDEOS_DIR):
        _, ext = os.path.splitext(f)
        if ext not in VIDEO_EXTENSIONS:
            logging.info(f"skipping: file extension file={f}")
            continue
        if f in results:
-            logging.info(f"already analyzed, skipping file={f}")
+            logging.info(f"skipping: already analyzed file={f}")
            continue
        logging.info(f"extracting timestamps ... file={f}")
        timestamps = extract_timestamps(os.path.join(VIDEOS_DIR, f))