Skip to content

Commit

Permalink
Merge pull request #36 from BlipRanger/dev
Browse files Browse the repository at this point in the history
1.4.1
  • Loading branch information
BlipRanger authored Jun 18, 2021
2 parents 68cf471 + 5efc429 commit dc2e597
Show file tree
Hide file tree
Showing 9 changed files with 191 additions and 109 deletions.
3 changes: 1 addition & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ RUN apt-get install ffmpeg -y

WORKDIR /bdfrh
COPY ./bdfrtohtml/ ./bdfrtohtml
COPY ./start.py ./start.py
COPY ./requirements.txt ./requirements.txt
COPY ./config/config.yml ./config/config.yml
COPY ./config/default_bdfr_config.cfg ./config/default_bdfr_config.cfg
Expand All @@ -18,4 +17,4 @@ RUN pip install -r requirements.txt
RUN mkdir input
RUN mkdir output

CMD python start.py
CMD python -m bdfrtohtml automate
2 changes: 1 addition & 1 deletion bdfrtohtml/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# __init__.py
__author__ = "BlipRanger"
__version__ = "1.3.1"
__version__ = "1.4.1"
__license__ = "GNU GPLv3"
18 changes: 14 additions & 4 deletions bdfrtohtml/__main__.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
# __main__.py

__author__ = "BlipRanger"
__version__ = "1.3.1"
__version__ = "1.4.1"
__license__ = "GNU GPLv3"

import os
import click
import shutil
from bdfrtohtml import filehelper
from bdfrtohtml import posthelper
from bdfrtohtml import automation
from bdfrtohtml import util
import logging
import copy

logger = logging.getLogger(__name__)


@click.command()
@click.group(invoke_without_command=True)
@click.option('--input_folder', default=None, help='The folder where the download and archive results have been saved to')
@click.option('--output_folder', default=None, help='Folder where the HTML results should be created.')
@click.option('--recover_comments', type=bool, help='Should we attempt to recover deleted comments?')
Expand All @@ -32,10 +32,12 @@
@click.pass_context
def main(context: click.Context, **_):

if context.invoked_subcommand is not None:
return
if context.params.get('config'):
config = util.load_config(context.params.get('config'))
else:
config = util.generate_default_config()
config = util.generate_default_config()["bdfrhtml"]
config = util.process_click_arguments(config, context)
logging.debug(config)

Expand Down Expand Up @@ -90,6 +92,14 @@ def main(context: click.Context, **_):

logging.info("BDFR-HTML run complete.")

@main.command("automate")
@click.option('--generate_config', type=bool, default=False, help='Just generate the config files for automation')
def run_automation(generate_config):
if generate_config:
automation.generate_configs()
else:
automation.automate()


if __name__ == '__main__':
LOGLEVEL = os.environ.get('BDFRH_LOGLEVEL', 'INFO').upper()
Expand Down
117 changes: 117 additions & 0 deletions bdfrtohtml/automation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#!/usr/bin/env python3
import os
import subprocess
import time
import yaml
import shutil
import logging
import bdfrtohtml
import sys

LOGLEVEL = os.environ.get('BDFRH_LOGLEVEL', 'INFO').upper()
logging.basicConfig(level=LOGLEVEL)

# Load in a yaml config file or make one and load it in
def load_config(config_file):
if not os.path.exists(config_file):
generate_bdfrhtml_config_file()
with open(config_file,'r') as stream:
cfg = yaml.safe_load(stream)
return cfg

# Check for path, create if does not exist
def assure_path_exists(path):
path = os.path.join(path, '')
dir = os.path.dirname(path)
if not os.path.exists(dir):
os.makedirs(dir)
logging.debug(f"Created {dir}")
return dir

# Make sure we have a config file for bdfr
def create_or_copy_config(config_filepath):
source_config = generate_bdfr_config_file()
assure_path_exists("config/user_configs")
if not os.path.exists(config_filepath):
shutil.copyfile(source_config, config_filepath)
return True

# In the case of an existing sample index file, remove it
def remove_default_index(output_folder):
filepath = os.path.join(output_folder, 'index.html')
if os.path.exists(filepath):
os.remove(filepath)

# Create a default config file for bdfr if there isn't one
def generate_bdfr_config_file():
assure_path_exists("./config/")
source_config = "config/default_bdfr_config.cfg"
if not os.path.exists(source_config):
content = bdfrtohtml.util.get_bdfr_config()
with open(source_config, "w") as cfg:
cfg.write(content)
return source_config

# Create a default bdfrhtml config file if there isn't one
def generate_bdfrhtml_config_file():
assure_path_exists("./config/")
config_file = "config/config.yml"
config = bdfrtohtml.util.generate_default_config()
with open(config_file, "w") as cfg:
yaml.dump(config, cfg, default_flow_style=False)
return config_file

# Make both config files
def generate_configs():
generate_bdfrhtml_config_file()
generate_bdfr_config_file()

# The automated process of downloading and merging posts
def automate():
config_path = "config/config.yml"
config = load_config(config_path)

bdfr_cfg = config['bdfr']
bdfrhtml_cfg = config['bdfrhtml']
input_folder = bdfrhtml_cfg['input_folder']
output_folder = bdfrhtml_cfg['output_folder']
if bdfr_cfg.get('users') is not None:
merge_users = bdfr_cfg.get('merge_users', False)
if merge_users: remove_default_index(output_folder)

while True:
for user in bdfr_cfg.get('users'):
bdfr_config_file = os.path.join("config/user_configs/", (user + '.cfg'))

if not merge_users:
input_folder = os.path.join(bdfrhtml_cfg['input_folder'], (user + '/'))
output_folder = os.path.join(bdfrhtml_cfg['output_folder'], (user + '/'))
idList = os.path.join(output_folder, "idList.txt")
create_or_copy_config(bdfr_config_file)
clone_command =[sys.executable, "-m", "bdfr", "clone", "--user", "me", "--saved", "-L", str(bdfr_cfg['limit']),
"--exclude-id-file", idList, "--authenticate", "--file-scheme", "{POSTID}", input_folder, "--config", bdfr_config_file]

if bdfr_cfg['run_bdfr']:
logging.info(f"Now running BDFR for {user}")
subprocess.call(clone_command)
if not merge_users:
subprocess.call([sys.executable, "-m", "bdfrtohtml", "--config", config_path, "--input_folder", input_folder, "--output_folder", output_folder])
if merge_users:
subprocess.call([sys.executable, "-m", "bdfrtohtml", "--config", config_path, "--input_folder", input_folder, "--output_folder", output_folder])
logging.info(f"Runs complete, now waiting for {int(bdfr_cfg['frequency'])} minutes before next run.")
time.sleep(int(bdfr_cfg['frequency']*60))

else:
idList = os.path.join(bdfrhtml_cfg['output_folder'], "idList.txt")
default_config = "config/user_configs/default_config.cfg"
create_or_copy_config(default_config)
clone_command =[sys.executable, "-m", "bdfr", "clone", "--user", "me", "--saved", "-L", str(bdfr_cfg['limit']),
"--exclude-id-file", idList, "--authenticate", "--file-scheme", "{POSTID}", input_folder, "--config", default_config]

while True:
if bdfr_cfg['run_bdfr']:
logging.info(f"Now running BDFR for saved posts.")
subprocess.call(clone_command)
subprocess.call([sys.executable, "-m", "bdfrtohtml", "--config", config_path])
logging.info(f"Runs complete, now waiting for {int(bdfr_cfg['frequency'])} minutes before next run.")
time.sleep(int(bdfr_cfg['frequency'])*60)
4 changes: 2 additions & 2 deletions bdfrtohtml/posthelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import requests
from bdfrtohtml import filehelper
import os
import sys

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -76,8 +77,7 @@ def get_comment_context(post, input_folder):

if id is not None:
try:
subprocess.call(["python", "-m", "bdfr", "archive", "-l", post['permalink'], context_folder])
subprocess.call(["python", "-m", "bdfr", "download", "-l", post['permalink'], "--file-scheme", "{POSTID}",
subprocess.call([sys.executable, "-m", "bdfr", "clone", "-l", post['permalink'], "--file-scheme", "{POSTID}",
context_folder])
except Exception as e:
logging.error(e)
Expand Down
50 changes: 38 additions & 12 deletions bdfrtohtml/util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import logging
import time
import yaml
import click
import requests

# Load in a yaml config file
def load_config(config_file):
Expand All @@ -9,19 +11,43 @@ def load_config(config_file):

# Default settings
def generate_default_config():
cfg = {
'recover_comments': False,
'recover_posts': False,
'output_folder': './output',
'input_folder': './input',
'archive_context': False,
'delete_media': False,
'write_links_to_file': 'None',
'generate_thumbnails': False,
'index_mode': 'default'
}
return cfg
cfg = {
'bdfr': {
'limit': 1000,
'run_bdfr': True,
'frequency': 60
},
'bdfrhtml': {
'recover_comments': False,
'recover_posts': False,
'output_folder': './output',
'input_folder': './input',
'archive_context': False,
'delete_media': False,
'write_links_to_file': 'None',
'generate_thumbnails': False,
'index_mode': 'default'
}
}
return cfg

#Either download or write hardcode some default bdfr configs (a bit messy)
def get_bdfr_config():
r = requests.get("https://raw.githubusercontent.com/aliparlakci/bulk-downloader-for-reddit/master/bdfr/default_config.cfg")
logging.info("Downloading default config file from github")
if r.status_code == 200:
logging.info("Successfully acquired bdfr config from github")
return bytes.decode(r.content)
else:
logging.info("Could not download, generated bdfr config instead")
return """[DEFAULT]
client_id = U-6gk4ZCh3IeNQ
client_secret = 7CZHY6AmKweZME5s50SfDGylaPg
scopes = identity, history, read, save
backup_log_count = 3
max_wait_time = 120
time_format = ISO"""

# Used in the jinja2 template
def float_to_datetime(value):
return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(value))
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ praw>=7.2.0
pyyaml>=5.4.1
requests>=2.25.1
youtube-dl>=2021.3.14
bdfr==2.1.1
bdfr==2.2
jinja2==3.0.0
pillow>=8.0.0
20 changes: 17 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
setup(
name='bdfrtohtml',
packages=['bdfrtohtml'],
version='1.3.1',
version='1.4.1',
description='Convert the output of BDFR to HTML',
author='BlipRanger',
author_email='blipranger@shrubbery.co',
Expand All @@ -12,7 +12,21 @@
keywords=['bdfr', 'reddit', 'downloader'],
classifiers=[],
package_data={
# If any package contains *.txt or *.rst files, include them:
"": ["templates/*.html", "templates/*.css"],
}
},
install_requires=[
'click==7.1.2',
'Markdown==3.3.4',
'appdirs>=1.4.4',
'bs4>=0.0.1',
'dict2xml>=1.7.0',
'ffmpeg-python>=0.2.0',
'praw>=7.2.0',
'pyyaml>=5.4.1',
'requests>=2.25.1',
'youtube-dl>=2021.3.14',
'bdfr==2.2',
'jinja2==3.0.0',
'pillow>=8.0.0'
]
)
Loading

0 comments on commit dc2e597

Please sign in to comment.