forked from weiran/wordpress-gatsby-migrator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimporter.js
99 lines (84 loc) · 2.93 KB
/
importer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
const feedRead = require('davefeedread')
const TurndownService = require('turndown')
const turndownService = new TurndownService({
headingStyle: 'atx',
codeBlockStyle: 'fenced'
})
const cheerio = require('cheerio')
const uuid = require('uuid/v4') // v4 generates random UUIDs
const url = require('url')
const path = require('path')
const importPosts = async (file) => {
const feed = await parseFeed(file)
const isPost = item => item['wp:post_type']['#'] === 'post'
const isPublished = item => item['wp:status']['#'] === 'publish'
// Filter for only blog posts
var items = feed.items.filter(isPost).filter(isPublished)
// Map to new object type
items = items.map(item => {
if (!isPost(item)) {
return
}
const mappedItem = {
'title': item.title,
'date': item.date,
'content': item['content:encoded']['#'],
'categories': item.categories,
'slug': item['wp:post_name']['#']
}
// Add passthroughUrl if exists
const postMeta = item['wp:postmeta']
if (postMeta) {
const metaKey = postMeta['wp:meta_key']
if (metaKey) {
const metaKeyValue = metaKey['#']
if (metaKeyValue == "passthrough_url") {
mappedItem.passthroughUrl = postMeta['wp:meta_value']['#']
}
}
}
// Add images array
const images = parseImages(mappedItem.content)
images.forEach(image => {
mappedItem.content = mappedItem.content.replace(image.url, image.fileName)
})
mappedItem.images = images
// Strip out Squarespace content tags
mappedItem.content = removeSquarespaceCaptions(mappedItem.content)
// Add Markdown conversion
mappedItem.markdownContent = turndownService.turndown(mappedItem.content)
return mappedItem
})
return items
}
const parseFeed = (file) => {
return new Promise((resolve, reject) => {
feedRead.parseString(file, undefined, (error, result) => {
if (error) {
reject(error)
} else {
resolve(result)
}
})
})
}
const parseImages = (content) => {
const postElements = cheerio.load(content)
const imagesElements = postElements('img')
const images = imagesElements.map((index, item) => {
const imageName = uuid()
const imageUrl = item.attribs['src']
const imageExtension = path.extname(url.parse(imageUrl).pathname)
return {
url: imageUrl,
fileName: `${imageName}${imageExtension}`
}
}).toArray()
return images
}
const removeSquarespaceCaptions = (post) => {
// remove the caption crap that gets put in by squarespace
post = post.replace(/(\[caption.*"])(<.*>)(.*\[\/caption])/g, "$2")
return post
}
module.exports = { importPosts: importPosts }