From 9c7aa82f422d1a780f39e1c9abc6d38912004518 Mon Sep 17 00:00:00 2001
From: zekroTJA <riho@live.de>
Date: Tue, 9 Sep 2025 20:44:21 +0200
Subject: [PATCH] update script

---
 main.py | 45 +++++++++++++++++++++++----------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/main.py b/main.py
index 585d76e..3090fcf 100644
--- a/main.py
+++ b/main.py
@@ -1,5 +1,4 @@
 import cv2
-from numpy import extract
 import pytesseract
 import logging
 import re
@@ -7,9 +6,11 @@ import os
 import json
 
 
+pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
+
 CROP = (
-    10,  # x
-    10,  # y
+    5,  # x
+    5,  # y
     32,  # h
     120,  # w
 )
@@ -18,15 +19,7 @@ RX = r"\d+"
 
 RESULTS_DIR = "results.json"
 VIDEOS_DIR = "videos"
-
-
-def get_log_level(level):
-    level = level.strip().upper()
-    if level.isdigit():
-        return int(level)
-    if lvl := logging.getLevelNamesMapping().get(level):
-        return lvl
-    raise Exception("invalid log level")
+VIDEO_EXTENSIONS = [".webm", ".mp4"]
 
 
 def setup_logger(level):
@@ -49,11 +42,12 @@ def extract_count_at_frame(cap, idx):
     fh, fw, _ = frame.shape
 
     (x, y, h, w) = CROP
-    cropped = frame[x:h, fw - w : fw - y]
+    cropped = frame[x:h, fw - w: fw - y]
 
     gray = cv2.cvtColor(cropped, cv2.COLOR_RGB2GRAY)
 
-    text = pytesseract.image_to_string(gray, config="--psm 6")  # "psm 6" = assume a block of text
+    # "psm 6" = assume a block of text
+    text = pytesseract.image_to_string(gray, config="--psm 6")
     matches = re.findall(RX, text)
     if not matches:
         return -1
@@ -75,7 +69,8 @@ def extract_timestamps(video_path):
     c_start = extract_count_at_frame(cap, 0)
     c_end = extract_count_at_frame(cap, total_frames)
     if c_end <= c_start:
-        logging.info(f"no additional deaths in this video start={c_start} end={c_end}")
+        logging.info(
+            f"no additional deaths in this video start={c_start} end={c_end}")
         return []
 
     timeframes = []
@@ -87,12 +82,14 @@ def extract_timestamps(video_path):
         pivot = get_pivot(start, end)
         count = extract_count_at_frame(cap, pivot)
         if count < c_start or count > c_end:
-            logging.warning(f"faulty value found count={count} c_start={c_start} c_end={c_end} frame={pivot}")
+            logging.warning(
+                f"faulty value found count={count} c_start={c_start} c_end={c_end} frame={pivot}")
             start += 1
             continue
-        logging.debug(f"s={start} e={end} p={pivot} c={count}")
+        logging.info(f"s={start} e={end} p={pivot} c={count}")
         if end - start < fps:
-            logging.debug(f"found window s={start} e={end} p={pivot} c={count}")
+            logging.info(
+                f"found window s={start} e={end} p={pivot} c={count}")
             timeframes.append(start / fps)
             last_count += 1
             if len(timeframes) == c_end - c_start:
@@ -112,7 +109,7 @@ def extract_timestamps(video_path):
 def format_timestamp(secs):
     mins = int(secs / 60)
     secs = int(secs % 60)
-    return f"{mins:0<2}:{secs:0<2}"
+    return f"{mins:0>2}:{secs:0>2}"
 
 
 def load_results():
@@ -124,17 +121,21 @@ def load_results():
 
 def store_results(results):
     with open(RESULTS_DIR, "w", encoding="utf-8") as f:
-        json.dump(results, f, indent=2)
+        json.dump(results, f, indent=2, ensure_ascii=False)
 
 
 def main():
-    setup_logger(get_log_level("info"))
+    setup_logger("INFO")
 
     results = load_results()
 
     for f in os.listdir(VIDEOS_DIR):
+        _, ext = os.path.splitext(f)
+        if ext not in VIDEO_EXTENSIONS:
+            logging.info(f"skipping: file extension file={f}")
+            continue
         if f in results:
-            logging.info(f"already analyzed, skipping file={f}")
+            logging.info(f"skipping: already analyzed file={f}")
             continue
         logging.info(f"extracting timestamps ... file={f}")
         timestamps = extract_timestamps(os.path.join(VIDEOS_DIR, f))