From 59ac4a875f80c7f36d9c126511263e6ee2ad07fa Mon Sep 17 00:00:00 2001 From: zekroTJA Date: Mon, 8 Sep 2025 16:50:16 +0200 Subject: [PATCH] init --- .gitignore | 219 +++++++++++++++++++++++++++++++++++++++++++++++ main.py | 146 +++++++++++++++++++++++++++++++ requirements.txt | 5 ++ 3 files changed, 370 insertions(+) create mode 100644 .gitignore create mode 100644 main.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e9de5d1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,219 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock +#poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +#pdm.lock +#pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +#pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# Redis +*.rdb +*.aof +*.pid + +# RabbitMQ +mnesia/ +rabbitmq/ +rabbitmq-data/ + +# ActiveMQ +activemq-data/ + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + +# Streamlit +.streamlit/secrets.toml + +videos/ +*.json \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..585d76e --- /dev/null +++ b/main.py @@ -0,0 +1,146 @@ +import cv2 +from numpy import extract +import pytesseract +import logging +import re +import os +import json + + +CROP = ( + 10, # x + 10, # y + 32, # h + 120, # w +) + +RX = r"\d+" + +RESULTS_DIR = "results.json" +VIDEOS_DIR = "videos" + + +def get_log_level(level): + level = level.strip().upper() + if level.isdigit(): + return int(level) + if lvl := logging.getLevelNamesMapping().get(level): + return lvl + raise Exception("invalid log level") + + +def setup_logger(level): + logging.basicConfig( + level=level, + format="%(asctime)s [%(levelname).3s] <%(filename)s:%(funcName)s> %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + + +def extract_count_at_frame(cap, idx): + logging.debug(f"getting info at frame idx={idx}") + + cap.set(cv2.CAP_PROP_POS_FRAMES, idx) + + ret, frame = cap.read() + if not ret: + raise Exception(f"failed to select frame idx={idx}") + + fh, fw, _ = frame.shape + + (x, y, h, w) = CROP + cropped = frame[x:h, fw - w : fw - y] + + gray = cv2.cvtColor(cropped, cv2.COLOR_RGB2GRAY) + + text = pytesseract.image_to_string(gray, config="--psm 6") # "psm 6" = assume a block of text + matches = re.findall(RX, text) + if not matches: + return -1 + return int(matches[-1]) + + +def get_pivot(start, end): + return int((end - start) / 2) + start + + +def extract_timestamps(video_path): + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + raise Exception("failed to open file") + + fps = cap.get(cv2.CAP_PROP_FPS) + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - int(fps) + + c_start = extract_count_at_frame(cap, 0) + c_end = extract_count_at_frame(cap, total_frames) + if c_end <= c_start: + logging.info(f"no additional deaths in this video start={c_start} end={c_end}") + return [] + + timeframes = [] + + start = 0 + end = total_frames + last_count = c_start + while True: + pivot = get_pivot(start, end) + count = extract_count_at_frame(cap, pivot) + if count < c_start or count > c_end: + logging.warning(f"faulty value found count={count} c_start={c_start} c_end={c_end} frame={pivot}") + start += 1 + continue + logging.debug(f"s={start} e={end} p={pivot} c={count}") + if end - start < fps: + logging.debug(f"found window s={start} e={end} p={pivot} c={count}") + timeframes.append(start / fps) + last_count += 1 + if len(timeframes) == c_end - c_start: + break + start = end + fps + end = total_frames + elif count <= last_count: + start = pivot + else: + end = pivot + + cap.release() + + return timeframes + + +def format_timestamp(secs): + mins = int(secs / 60) + secs = int(secs % 60) + return f"{mins:0<2}:{secs:0<2}" + + +def load_results(): + if not os.path.exists(RESULTS_DIR): + return {} + with open(RESULTS_DIR, "r", encoding="utf-8") as f: + return json.load(f) + + +def store_results(results): + with open(RESULTS_DIR, "w", encoding="utf-8") as f: + json.dump(results, f, indent=2) + + +def main(): + setup_logger(get_log_level("info")) + + results = load_results() + + for f in os.listdir(VIDEOS_DIR): + if f in results: + logging.info(f"already analyzed, skipping file={f}") + continue + logging.info(f"extracting timestamps ... file={f}") + timestamps = extract_timestamps(os.path.join(VIDEOS_DIR, f)) + results[f] = [format_timestamp(t) for t in timestamps] + store_results(results) + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1402359 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +numpy==2.2.6 +opencv-python==4.12.0.88 +packaging==25.0 +pillow==11.3.0 +pytesseract==0.3.13