From 9c7aa82f422d1a780f39e1c9abc6d38912004518 Mon Sep 17 00:00:00 2001 From: zekroTJA Date: Tue, 9 Sep 2025 20:44:21 +0200 Subject: [PATCH] update script --- main.py | 45 +++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/main.py b/main.py index 585d76e..3090fcf 100644 --- a/main.py +++ b/main.py @@ -1,5 +1,4 @@ import cv2 -from numpy import extract import pytesseract import logging import re @@ -7,9 +6,11 @@ import os import json +pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe' + CROP = ( - 10, # x - 10, # y + 5, # x + 5, # y 32, # h 120, # w ) @@ -18,15 +19,7 @@ RX = r"\d+" RESULTS_DIR = "results.json" VIDEOS_DIR = "videos" - - -def get_log_level(level): - level = level.strip().upper() - if level.isdigit(): - return int(level) - if lvl := logging.getLevelNamesMapping().get(level): - return lvl - raise Exception("invalid log level") +VIDEO_EXTENSIONS = [".webm", ".mp4"] def setup_logger(level): @@ -49,11 +42,12 @@ def extract_count_at_frame(cap, idx): fh, fw, _ = frame.shape (x, y, h, w) = CROP - cropped = frame[x:h, fw - w : fw - y] + cropped = frame[x:h, fw - w: fw - y] gray = cv2.cvtColor(cropped, cv2.COLOR_RGB2GRAY) - text = pytesseract.image_to_string(gray, config="--psm 6") # "psm 6" = assume a block of text + # "psm 6" = assume a block of text + text = pytesseract.image_to_string(gray, config="--psm 6") matches = re.findall(RX, text) if not matches: return -1 @@ -75,7 +69,8 @@ def extract_timestamps(video_path): c_start = extract_count_at_frame(cap, 0) c_end = extract_count_at_frame(cap, total_frames) if c_end <= c_start: - logging.info(f"no additional deaths in this video start={c_start} end={c_end}") + logging.info( + f"no additional deaths in this video start={c_start} end={c_end}") return [] timeframes = [] @@ -87,12 +82,14 @@ def extract_timestamps(video_path): pivot = get_pivot(start, end) count = extract_count_at_frame(cap, pivot) if count < c_start or count > c_end: - logging.warning(f"faulty value found count={count} c_start={c_start} c_end={c_end} frame={pivot}") + logging.warning( + f"faulty value found count={count} c_start={c_start} c_end={c_end} frame={pivot}") start += 1 continue - logging.debug(f"s={start} e={end} p={pivot} c={count}") + logging.info(f"s={start} e={end} p={pivot} c={count}") if end - start < fps: - logging.debug(f"found window s={start} e={end} p={pivot} c={count}") + logging.info( + f"found window s={start} e={end} p={pivot} c={count}") timeframes.append(start / fps) last_count += 1 if len(timeframes) == c_end - c_start: @@ -112,7 +109,7 @@ def extract_timestamps(video_path): def format_timestamp(secs): mins = int(secs / 60) secs = int(secs % 60) - return f"{mins:0<2}:{secs:0<2}" + return f"{mins:0>2}:{secs:0>2}" def load_results(): @@ -124,17 +121,21 @@ def load_results(): def store_results(results): with open(RESULTS_DIR, "w", encoding="utf-8") as f: - json.dump(results, f, indent=2) + json.dump(results, f, indent=2, ensure_ascii=False) def main(): - setup_logger(get_log_level("info")) + setup_logger("INFO") results = load_results() for f in os.listdir(VIDEOS_DIR): + _, ext = os.path.splitext(f) + if ext not in VIDEO_EXTENSIONS: + logging.info(f"skipping: file extension file={f}") + continue if f in results: - logging.info(f"already analyzed, skipping file={f}") + logging.info(f"skipping: already analyzed file={f}") continue logging.info(f"extracting timestamps ... file={f}") timestamps = extract_timestamps(os.path.join(VIDEOS_DIR, f))