Skip to content

Commit

Permalink
PEP8 Compliance changes.
Browse files Browse the repository at this point in the history
  • Loading branch information
BlipRanger committed May 17, 2021
1 parent f699a0a commit be524dc
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 108 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
.vscode/settings.json
/venv/
.idea
html/
temp/
__pycache__/
44 changes: 22 additions & 22 deletions bdfrtohtml/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,49 +7,49 @@
import os
import click
import shutil
import logging
from bdfrtohtml import filehelper
from bdfrtohtml import posthelper
import logging

logger = logging.getLogger(__name__)


@click.command()
@click.option('--input', default='./', help='The folder where the download and archive results have been saved to')
@click.option('--output', default='./html/', help='Folder where the HTML results should be created.')
@click.option('--recover_comments', default=False, type=bool, help='Should we attempt to recover deleted comments?')
@click.option('--archive_context', default=False, type=bool, help='Should we attempt to archive the contextual post for saved comments?')
@click.option('--archive_context', default=False, type=bool,
help='Should we attempt to archive the contextual post for saved comments?')
@click.option('--delete_input', default=False, type=bool, help='Should we delete the input after creating the output?')
def main(input, output, recover_comments, archive_context, delete_input):
output = filehelper.assure_path_exists(output)
input = filehelper.assure_path_exists(input)
filehelper.assure_path_exists(os.path.join(output, "media/"))

output = filehelper.assurePathExists(output)
input = filehelper.assurePathExists(input)
filehelper.assurePathExists(os.path.join(output, "media/"))

# Load all of the json files
all_posts = filehelper.import_posts(input)

#Load all of the json files
allPosts = filehelper.importPosts(input)

#Find the media for the files and append that to the entry
for post in allPosts:
# Find the media for the files and append that to the entry
for post in all_posts:
try:
posthelper.handleComments(post, archive_context)
if recover_comments:
post = posthelper.recoverDeletedComments(post)
if archive_context:
post = posthelper.getCommentContext(post, input)

filehelper.findMatchingMedia(post, input, output)
filehelper.writePostToFile(post, output)
posthelper.handle_comments(post)
if recover_comments:
post = posthelper.recover_deleted_comments(post)
if archive_context:
post = posthelper.get_comment_context(post, input)

filehelper.find_matching_media(post, input, output)
filehelper.write_post_to_file(post, output)
except Exception as e:
logging.error("Processing post " + post["id"] + " has failed due to: " + str(e))

filehelper.writeIndexFile(allPosts, output)
filehelper.writeListFile(allPosts, output)
filehelper.write_index_file(all_posts, output)
filehelper.write_list_file(all_posts, output)
shutil.copyfile('./templates/style.css', os.path.join(output, 'style.css'))

if delete_input:
filehelper.emptyInputFolder(input)
filehelper.empty_input_folder(input)


if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
Expand Down
99 changes: 53 additions & 46 deletions bdfrtohtml/filehelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,75 +6,80 @@
import os
import logging
from bdfrtohtml import util
logger = logging.getLogger(__name__)

logger = logging.getLogger(__name__)

templateLoader = jinja2.FileSystemLoader(searchpath="./templates")
templateEnv = jinja2.Environment(loader=templateLoader)
templateEnv.add_extension('jinja2.ext.debug')
templateEnv.filters["markdown"] = markdown.markdown
templateEnv.filters["float_to_datetime"] = util.float_to_datetime

#Import all json files into single list.
def importPosts(folder):
postList = []

# Import all json files into single list.
def import_posts(folder):
post_list = []
for dirpath, dnames, fnames in os.walk(folder):
for f in fnames:
if f.endswith(".json"):
data = loadJson(os.path.join(dirpath, f))
data = load_json(os.path.join(dirpath, f))
if data.get("id") is not None:
postList.append(data)
post_list.append(data)
logging.debug('Imported ' + os.path.join(dirpath, f))
return postList
return post_list

#Write index page
def writeIndexFile(postList, outputFolder):

# Write index page
def write_index_file(post_list, output_folder):
template = templateEnv.get_template("index.html")

with open(os.path.join(outputFolder, "index.html"), 'w', encoding="utf-8") as file:
file.write(template.render(posts=postList))
logging.debug('Wrote ' + os.path.join(outputFolder, "index.html"))

#Check for path, create if does not exist
def assurePathExists(path):
with open(os.path.join(output_folder, "index.html"), 'w', encoding="utf-8") as file:
file.write(template.render(posts=post_list))
logging.debug('Wrote ' + os.path.join(output_folder, "index.html"))


# Check for path, create if does not exist
def assure_path_exists(path):
path = os.path.join(path, '')
dir = os.path.dirname(path)
if not os.path.exists(dir):
os.makedirs(dir)
logging.debug("Created " + dir)
return dir

#Loads in the json file data and adds the path of it to the dict
def loadJson(file_path):
f = open(file_path,)

# Loads in the json file data and adds the path of it to the dict
def load_json(file_path):
f = open(file_path, )
data = json.load(f)
f.close()
logging.debug('Loaded ' + file_path)
return data

#Copy media from the input folder to the file structure of the html pages
def copyMedia(sourcePath, outputPath):

if outputPath.endswith('mp4'):
# Copy media from the input folder to the file structure of the html pages
def copy_media(source_path, output_path):
if output_path.endswith('mp4'):
try:
#This fixes mp4 files that won't play in browsers
command = 'ffmpeg -nostats -loglevel 0 -i "{input}" -c:v copy -c:a copy -y "{output}"'.format(input=sourcePath, output=outputPath)
# This fixes mp4 files that won't play in browsers
command = 'ffmpeg -nostats -loglevel 0 -i "{input}" -c:v copy -c:a copy -y "{output}"'.format(
input=source_path, output=output_path)
subprocess.check_output(command)
except Exception as e:
logging.error('FFMPEG failed: ' + str(e))
else:
shutil.copyfile(sourcePath, outputPath)
logging.debug('Moved ' + sourcePath + ' to ' + outputPath)
shutil.copyfile(source_path, output_path)
logging.debug('Moved ' + source_path + ' to ' + output_path)


#Search the input folder for media files containing the id value from an archive
def findMatchingMedia(post, inputFolder, outputFolder):
# Search the input folder for media files containing the id value from an archive
def find_matching_media(post, input_folder, output_folder):
paths = []
mediaFolder = os.path.join(outputFolder, 'media/')
#Don't copy if we already have it
existingMedia = os.path.join(outputFolder, "media/")
for dirpath, dnames, fnames in os.walk(existingMedia):
media_folder = os.path.join(output_folder, 'media/')

# Don't copy if we already have it
existing_media = os.path.join(output_folder, "media/")
for dirpath, dnames, fnames in os.walk(existing_media):
for f in fnames:
if post['id'] in f and not f.endswith('.json') and not f.endswith('.html'):
logging.debug("Find Matching Media found: " + dirpath + f)
Expand All @@ -83,36 +88,38 @@ def findMatchingMedia(post, inputFolder, outputFolder):
logging.debug("Existing media found for " + post['id'])
post['paths'] = paths
return
for dirpath, dnames, fnames in os.walk(inputFolder):
for dirpath, dnames, fnames in os.walk(input_folder):
for f in fnames:
if post['id'] in f and not f.endswith('.json'):
logging.debug("Find Matching Media found: " + dirpath + f)
copyMedia(os.path.join(dirpath, f), os.path.join(mediaFolder, f))
copy_media(os.path.join(dirpath, f), os.path.join(media_folder, f))
paths.append(os.path.join('media/', f))
post['paths'] = paths
return
return

#Creates the html for a post using the jinja2 template and writes it to a file
def writePostToFile(post, outputFolder):

# Creates the html for a post using the jinja2 template and writes it to a file
def write_post_to_file(post, output_folder):
template = templateEnv.get_template("page.html")
post['filename'] = post['id']+".html"
post['filepath'] = os.path.join(outputFolder, post['id']+".html")
post['filename'] = post['id'] + ".html"
post['filepath'] = os.path.join(output_folder, post['id'] + ".html")

with open(post['filepath'], 'w', encoding="utf-8") as file:
file.write(template.render(post=post))
logging.debug('Wrote ' + post['filepath'])

#Write a list of successful ids to a file
def writeListFile(posts, outputFolder):
filepath = os.path.join(outputFolder, "idList.txt")

# Write a list of successful ids to a file
def write_list_file(posts, output_folder):
filepath = os.path.join(output_folder, "idList.txt")
with open(filepath, 'w', encoding="utf-8") as file:
for post in posts:
file.write(post['id'] + '\n')

#Delete the contents of the input folder
def emptyInputFolder(inputFolder):
for root, dirs, files in os.walk(inputFolder):

# Delete the contents of the input folder
def empty_input_folder(input_folder):
for root, dirs, files in os.walk(input_folder):
for file in files:
os.remove(os.path.join(root, file))
logger.debug("Removed: " + os.path.join(root, file))
Expand Down
53 changes: 28 additions & 25 deletions bdfrtohtml/posthelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,56 +7,60 @@
import requests
from bdfrtohtml import filehelper
import os

logger = logging.getLogger(__name__)

#Request a specific comment to be recovered
def recoverDeletedComment(comment):

# Request a specific comment to be recovered
def recover_deleted_comment(comment):
try:
response = requests.get("https://api.pushshift.io/reddit/comment/search?ids={id}".format(id=comment['id']))
data = response.json()['data']
if len(data) == 1:
revComment = data[0]
comment['author'] = revComment['author']
comment['body'] = revComment['body']
comment['score'] = revComment['score']
rev_comment = data[0]
comment['author'] = rev_comment['author']
comment['body'] = rev_comment['body']
comment['score'] = rev_comment['score']
comment['recovered'] = True
logging.info('Recovered ' + comment.get('id','') + ' from pushshift')
logging.info('Recovered ' + comment.get('id', '') + ' from pushshift')
except Exception as e:
logging.error(e)
return comment

#Recover deleted comments via pushshift
def recoverDeletedComments(post):

# Recover deleted comments via pushshift
def recover_deleted_comments(post):
for comment in post['comments']:
if comment['body'] == "[deleted]":
comment = recoverDeletedComment(comment)
comment = recover_deleted_comment(comment)
for reply in comment['replies']:
if reply['body'] == "[deleted]":
reply = recoverDeletedComment(reply)
reply = recover_deleted_comment(reply)
return post


#Requires bdfr V2
#Use BDFR to download both the archive and media for a given post
def getCommentContext(post, inputFolder):
# Requires bdfr V2
# Use BDFR to download both the archive and media for a given post
def get_comment_context(post, input_folder):
id = post.get("savedcomment")

contextFolder = os.path.join(inputFolder,"context/")
filehelper.assurePathExists(contextFolder)
context_folder = os.path.join(input_folder, "context/")
filehelper.assure_path_exists(context_folder)

if id is not None:
try:
subprocess.call(["python", "-m", "bdfr", "archive", "-l", post['permalink'], contextFolder])
subprocess.call(["python", "-m", "bdfr", "download", "-l", post['permalink'], "--file-scheme", "{POSTID}", contextFolder])
subprocess.call(["python", "-m", "bdfr", "archive", "-l", post['permalink'], context_folder])
subprocess.call(["python", "-m", "bdfr", "download", "-l", post['permalink'], "--file-scheme", "{POSTID}",
context_folder])
except Exception as e:
logging.error(e)
print(post['id'])
for dirpath, dnames, fnames in os.walk(contextFolder):
for dirpath, dnames, fnames in os.walk(context_folder):
for f in fnames:
print(f)
if post['id'] in f and f.endswith('.json'):
print("Loaded")
post = filehelper.loadJson(os.path.join(dirpath, f))
post = filehelper.load_json(os.path.join(dirpath, f))

for comment in post["comments"]:
if comment["id"] == id:
Expand All @@ -70,9 +74,9 @@ def getCommentContext(post, inputFolder):
return post


#Convert comments into posts
def handleComments(comment, context):
#Filter out posts
# Convert comments into posts
def handle_comments(comment):
# Filter out posts
if comment.get('parent_id') is None:
return comment

Expand All @@ -84,5 +88,4 @@ def handleComments(comment, context):
post["selftext"] = post["body"]
post["permalink"] = "https://www.reddit.com/r/{subreddit}/comments/{submission}/{title}/{id}".format(
subreddit=post["subreddit"], submission=post["submission"], title=post["title"], id=post["id"]
)

)
3 changes: 2 additions & 1 deletion bdfrtohtml/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@

import time


def float_to_datetime(value):
return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(value))
return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(value))
23 changes: 12 additions & 11 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from setuptools import setup

setup(
name = 'bdfrtohtml',
packages = ['bdfrtohtml'],
version = '1.3.0',
description = 'Convert the output of BDFR to HTML',
author = 'BlipRanger',
author_email = 'blipranger@shrubbery.co',
url = 'https://github.com/BlipRanger/bdfr-html',
download_url = 'https://github.com/BlipRanger/bdfr-html/releases/tag/v1.3.0',
keywords = ['bdfr', 'reddit', 'downloader'],
classifiers = [],
)
name='bdfrtohtml',
packages=['bdfrtohtml'],
version='1.3.0',
description='Convert the output of BDFR to HTML',
author='BlipRanger',
author_email='blipranger@shrubbery.co',
url='https://github.com/BlipRanger/bdfr-html',
download_url='https://github.com/BlipRanger/bdfr-html/releases/tag/v1.3.0',
keywords=['bdfr', 'reddit', 'downloader'],
classifiers=[],
)
9 changes: 6 additions & 3 deletions start.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@

while True:
if runBdfr:
subprocess.call(["python3.9", "-m", "bdfr", "archive", "--user", "me", "--saved", "-L", limit, "--authenticate", inFolder])
subprocess.call(["python3.9", "-m", "bdfr", "download", "--user", "me", "--saved", "-L", limit, "--exclude-id-file", idList, "--authenticate", "--file-scheme", "{POSTID}", inFolder])
subprocess.call(["python3.9", "bdfrToHTML.py", "--input", inFolder, "--output", outFolder, "--recover_comments", recover_comments, "--archive_context", archive_context, "--delete_input", delete])
subprocess.call(["python3.9", "-m", "bdfr", "archive", "--user", "me", "--saved", "-L", limit,
"--authenticate", inFolder])
subprocess.call(["python3.9", "-m", "bdfr", "download", "--user", "me", "--saved", "-L", limit,
"--exclude-id-file", idList, "--authenticate", "--file-scheme", "{POSTID}", inFolder])
subprocess.call(["python3.9", "bdfrToHTML.py", "--input", inFolder, "--output", outFolder, "--recover_comments",
recover_comments, "--archive_context", archive_context, "--delete_input", delete])
time.sleep(int(freq)*60)

0 comments on commit be524dc

Please sign in to comment.