Merge pull request #39 from LemurPwned/feat/image-folder-support

Feat/image folder support
LemurPwned · Nov 11, 2024 · f7b1d8c · f7b1d8c
2 parents 983b2c2 + f1e2c36
commit f7b1d8c
Show file tree

Hide file tree

Showing 30 changed files with 1,626 additions and 477 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -21,10 +21,10 @@ jobs:
 
     steps:
     - uses: actions/checkout@v3
-    - name: Set up Python 3.9
+    - name: Set up Python 3.10
       uses: actions/setup-python@v2.2.2
       with:
-        python-version: "3.9"
+        python-version: "3.10"
 
     - name: Install dependencies
       run: |

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -37,9 +37,9 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install flake8 pytest
+        python -m pip install flake8 pytest pytest-cov
         python -m pip install -e .[dev,all]
         python -m spacy download en_core_web_sm
     - name: Test with pytest
       run: |
-        pytest
+        pytest --cov=./ --cov-report=term-missing --cov-fail-under=60
diff --git a/README.md b/README.md
@@ -26,6 +26,7 @@ Currently, it uses keyframe decoding, frame interval gating and perceptual hashi
   - [Features](#features)
   - [Installation and Usage](#installation-and-usage)
     - [Basic usage](#basic-usage)
+      - [Image sampling](#image-sampling)
       - [YT-DLP integration plugin](#yt-dlp-integration-plugin)
         - [Extra YT-DLP options](#extra-yt-dlp-options)
       - [OpenAI summary](#openai-summary)
@@ -101,14 +102,29 @@ Plain:
 python3 -m video_sampler hash FatCat.mp4 ./dataset-frames/ --hash-size 3 --buffer-size 20
 ```
 
-From the config file:
+From the config file (this is the recommended way if you plan to re-use the same config for different videos):
 
 ```bash
 python3 -m video_sampler config ./configs/hash_base.yaml /my-video-folder/ ./my-output-folder
 ```
 
 You can set the number of workers to use with the `n_workers` parameter. The default is 1.
 
+#### Image sampling
+
+If your frames are ordered, then you can use the `image_sampler` module to sample them. The images should have some concept of ordering, e.g. they should be named in a way that allows for sorting, e.g. `image_001.png`, `image_002.png`, etc, because the sampler will deduplicate based on the circular buffer of hashes.
+An example of a config for `image_sampler` is given in [./configs/image_base.yaml](./configs/image_base.yaml).
+Key changes respective to `video_sampler` are:
+
+- `frame_time_regex` - regex to extract frame time from the filename. If not provided, the frames will be lexiographically ordered.
+- any video sampling params such as `min_frame_interval_sec`, `keyframes_only` will be disregarded.
+
+You can run the image sampler with -- you need to specify the `images` flag.
+
+```bash
+python3 -m video_sampler config ./configs/image_base.yaml "./folder-frames/worlds-smallest-cat-bbc" ./sampled-output/ --images
+```
+
 #### YT-DLP integration plugin
 
 Before using please consult the ToS of the website you are scraping from -- use responsibly and for research purposes.

diff --git a/configs/image_base.yaml b/configs/image_base.yaml
@@ -0,0 +1,32 @@
+# Whether to process only keyframes (it's way faster than processing all frames)
+# Read interval while processing video (in seconds) (when there's no frame yielded, when to check again)
+queue_wait: 0.1
+debug: false
+# Whether to print stats
+print_stats: false
+# Buffer configuration
+buffer_config:
+  type: grid
+  # the smaller the hash size, the greater chance of collision
+  # smaller hashsets are faster to process & reduce frames more aggressively
+  hash_size: 8
+  # size of the collision buffer. The larger the buffer, the more in time back the
+  # hashes are stored.
+  grid_x: 5
+  grid_y: 5
+  max_hits: 2
+  size: 15
+  debug: false
+# Gating configuration
+gate_config:
+  type: pass
+extractor_config: {}
+summary_config: {}
+# Number of workers (separate processes) to process the frames. Determines level of parallelism
+n_workers: 3
+save_format:
+  encode_time_b64: false
+  include_filename: false
+  avoid_dot: false
+
+frame_time_regex: '(\d+(?:\.\d+)?)'
diff --git a/pyproject.toml b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "video_sampler"
 description = "Video Sampler -- sample frames from a video file"
 url = "https://github.com/LemurPwned/video-sampler"
-version = "0.11.5"
+version = "0.12.0"
 authors = [
     { name = "LemurPwned", email = "lemurpwned@gmail.com" }
 ]
@@ -75,3 +75,10 @@ all = [
 [build-system]
 build-backend = "flit_core.buildapi"
 requires = ["flit_core >=3.2,<4"]
+
+[tool.coverage.run]
+omit = [
+    "config.py",
+    "config-3.py",
+    "*/_remote_module_non_scriptable.py"
+    ]
diff --git a/scripts/run_benchmarks.py b/scripts/run_benchmarks.py
@@ -4,7 +4,7 @@
 from tabulate import tabulate
 from tqdm import tqdm
 
-from video_sampler.sampler import SamplerConfig, VideoSampler
+from video_sampler.worker import SamplerConfig, VideoSampler
 
 clip_gate = dict(
     type="clip",

diff --git a/tests/assets/images/imgA.jpg b/tests/assets/images/imgA.jpg
diff --git a/tests/assets/images/imgB.jpg b/tests/assets/images/imgB.jpg
diff --git a/tests/test_base.py b/tests/test_base.py
@@ -1,6 +1,7 @@
 from PIL import Image
 
-from video_sampler.sampler import SamplerConfig, VideoSampler
+from video_sampler.config import SamplerConfig
+from video_sampler.samplers import VideoSampler
 from video_sampler.schemas import FrameObject