update script
This commit is contained in:
45
main.py
45
main.py
@@ -1,5 +1,4 @@
|
||||
import cv2
|
||||
from numpy import extract
|
||||
import pytesseract
|
||||
import logging
|
||||
import re
|
||||
@@ -7,9 +6,11 @@ import os
|
||||
import json
|
||||
|
||||
|
||||
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
|
||||
|
||||
CROP = (
|
||||
10, # x
|
||||
10, # y
|
||||
5, # x
|
||||
5, # y
|
||||
32, # h
|
||||
120, # w
|
||||
)
|
||||
@@ -18,15 +19,7 @@ RX = r"\d+"
|
||||
|
||||
RESULTS_DIR = "results.json"
|
||||
VIDEOS_DIR = "videos"
|
||||
|
||||
|
||||
def get_log_level(level):
|
||||
level = level.strip().upper()
|
||||
if level.isdigit():
|
||||
return int(level)
|
||||
if lvl := logging.getLevelNamesMapping().get(level):
|
||||
return lvl
|
||||
raise Exception("invalid log level")
|
||||
VIDEO_EXTENSIONS = [".webm", ".mp4"]
|
||||
|
||||
|
||||
def setup_logger(level):
|
||||
@@ -49,11 +42,12 @@ def extract_count_at_frame(cap, idx):
|
||||
fh, fw, _ = frame.shape
|
||||
|
||||
(x, y, h, w) = CROP
|
||||
cropped = frame[x:h, fw - w : fw - y]
|
||||
cropped = frame[x:h, fw - w: fw - y]
|
||||
|
||||
gray = cv2.cvtColor(cropped, cv2.COLOR_RGB2GRAY)
|
||||
|
||||
text = pytesseract.image_to_string(gray, config="--psm 6") # "psm 6" = assume a block of text
|
||||
# "psm 6" = assume a block of text
|
||||
text = pytesseract.image_to_string(gray, config="--psm 6")
|
||||
matches = re.findall(RX, text)
|
||||
if not matches:
|
||||
return -1
|
||||
@@ -75,7 +69,8 @@ def extract_timestamps(video_path):
|
||||
c_start = extract_count_at_frame(cap, 0)
|
||||
c_end = extract_count_at_frame(cap, total_frames)
|
||||
if c_end <= c_start:
|
||||
logging.info(f"no additional deaths in this video start={c_start} end={c_end}")
|
||||
logging.info(
|
||||
f"no additional deaths in this video start={c_start} end={c_end}")
|
||||
return []
|
||||
|
||||
timeframes = []
|
||||
@@ -87,12 +82,14 @@ def extract_timestamps(video_path):
|
||||
pivot = get_pivot(start, end)
|
||||
count = extract_count_at_frame(cap, pivot)
|
||||
if count < c_start or count > c_end:
|
||||
logging.warning(f"faulty value found count={count} c_start={c_start} c_end={c_end} frame={pivot}")
|
||||
logging.warning(
|
||||
f"faulty value found count={count} c_start={c_start} c_end={c_end} frame={pivot}")
|
||||
start += 1
|
||||
continue
|
||||
logging.debug(f"s={start} e={end} p={pivot} c={count}")
|
||||
logging.info(f"s={start} e={end} p={pivot} c={count}")
|
||||
if end - start < fps:
|
||||
logging.debug(f"found window s={start} e={end} p={pivot} c={count}")
|
||||
logging.info(
|
||||
f"found window s={start} e={end} p={pivot} c={count}")
|
||||
timeframes.append(start / fps)
|
||||
last_count += 1
|
||||
if len(timeframes) == c_end - c_start:
|
||||
@@ -112,7 +109,7 @@ def extract_timestamps(video_path):
|
||||
def format_timestamp(secs):
|
||||
mins = int(secs / 60)
|
||||
secs = int(secs % 60)
|
||||
return f"{mins:0<2}:{secs:0<2}"
|
||||
return f"{mins:0>2}:{secs:0>2}"
|
||||
|
||||
|
||||
def load_results():
|
||||
@@ -124,17 +121,21 @@ def load_results():
|
||||
|
||||
def store_results(results):
|
||||
with open(RESULTS_DIR, "w", encoding="utf-8") as f:
|
||||
json.dump(results, f, indent=2)
|
||||
json.dump(results, f, indent=2, ensure_ascii=False)
|
||||
|
||||
|
||||
def main():
|
||||
setup_logger(get_log_level("info"))
|
||||
setup_logger("INFO")
|
||||
|
||||
results = load_results()
|
||||
|
||||
for f in os.listdir(VIDEOS_DIR):
|
||||
_, ext = os.path.splitext(f)
|
||||
if ext not in VIDEO_EXTENSIONS:
|
||||
logging.info(f"skipping: file extension file={f}")
|
||||
continue
|
||||
if f in results:
|
||||
logging.info(f"already analyzed, skipping file={f}")
|
||||
logging.info(f"skipping: already analyzed file={f}")
|
||||
continue
|
||||
logging.info(f"extracting timestamps ... file={f}")
|
||||
timestamps = extract_timestamps(os.path.join(VIDEOS_DIR, f))
|
||||
|
||||
Reference in New Issue
Block a user