Blockdown.coffee 51.2 KB
Newer Older
1
2
################################################################################
#
3
#	Better
4
#
5
6
#	Blockdown Parser: parses Blockdown content and generates the
#	data for the Beter site and iOS app.
7
#
8
#	This is Independent Technology.
9
#
10
11
12
13
14
#	▲❤ We practice Ethical Design (https://ind.ie/ethical-design)
#
#	© Aral Balkan. © Ind.ie. All Rights Reserved.
#	Released with love by Ind.ie under GNU AGPLv3 or later.
#	Free as in freedom. Please see the LICENSE file.
15
16
17
#
################################################################################

18
fs = require 'fs-extra-as-promised'
19
path = require 'path'
Aral Balkan's avatar
Aral Balkan committed
20

21
22
23
24
25
26
27
28
29
30
31
32
33
34
marked = require 'marked'
jsonlint = require 'jsonlint'

chai = require 'chai'
expect = chai.expect
assert = chai.assert
chai.should()

Promise = require 'thrush'

glob = require 'glob'
globAsync = Promise.promisify glob

eyespect = require 'eyespect'
Aral Balkan's avatar
Aral Balkan committed
35
inspect = eyespect.inspector { stream: null }
36
winston = require 'winston'
37
app = require './App'
38
39
ContentBlockerRule = require './ContentBlockerRule'

40
41
set = require 'indie-set'

42
43
GlobalStatistics = require './GlobalStatistics'

Aral Balkan's avatar
Aral Balkan committed
44
require './StringExtensions' # for String::trim()
45

46
47
48
49
50
51
52
53
54
55
require 'crypto'

#
# Calculate checksum
#
checksum = (str) ->
		return crypto
				.createHash('md5')
				.update(str, 'utf8')
				.digest('hex')
56
57
58

class Blockdown

59
60
	log: null

61
62
63
	site: "site"
	app: "app"

64
65
66
67
	markdownRenderer: null		# (marked.Renderer)
	blockdownRenderer: null		# (marked.Renderer)
	rules: null					# (Array)

68
69
	rulePartials: null # (Object)

70
	parserIsInStatisticsSection: false
71

72
73
	numberOfTrackers: 0

74
75
	pathOfTheFileCurrentlyBeingRendered: null

76
77
	theme: null

78
79
80
81
	#
	# Cache-related.
	#
	useContentCache: false
82
	contentHasNotChanged: false
83
84
85
86
	contentCurrentModifiedTimes: null
	contentPreviousModifiedTimes: null


87
88
89
90
91
92
	#
	# Public methods.
	#

	constructor: ->

93
		# Initialise properties
94
		@rules = []
95
		@rulePartials = {}
96
97
98
99

		# Initialise two renderers (one for regular Markdown, the
		# other for the Blockdown code sections).
		@markdownRenderer = new marked.Renderer()
100
		@markdownRenderer.image = @imageRenderer
101
102
103
		@markdownRenderer.options.headerPrefix = '' # Otherwise we get undefinedheader-slug
		@markdownRenderer.options.langPrefix = ''	# Ditto for <code class="undefined…">
		@markdownRenderer.options.gfm = true
104
105
106

		@blockdownRenderer = new marked.Renderer()
		@blockdownRenderer.code = @codeRenderer
107
		@blockdownRenderer.image = @imageRenderer
108
109
		@blockdownRenderer.list = @listRenderer
		@blockdownRenderer.listitem = @listItemRenderer
110
		@blockdownRenderer.heading = @headingRenderer
111
112

		# Set up logging.
113
		logFile = path.join app.logsDirectory, 'Blockdown.log'
114
		@log = new winston.Logger
115
116
			transports:
				[
117
					new (winston.transports.Console)({ level: 'info'})
Aral Balkan's avatar
Aral Balkan committed
118
					new (winston.transports.File)({ filename: logFile, level: 'debug' })
119
				]
120

121
122
		# Setup cache
		@setupCache()
123

124
		# @log.debug "Blockdown initiated. Logs at #{logFile}"
125
126


127
	#
128
	# Setup cache.
129
	#
130
131
132
	# We are now using the cache both during development and production as we
	# use the content cache to calculate the diff of changes since the last
	# deployment.
133
	#
134
	setupCache: =>
135
136
137
138
		#
		# Note: I do not trust the cache for use in production so we detect that and turn it off.
		# ===== A production deployment should always do a full, clean build.
		#
139
140
141
142
		cacheDirectory = path.join app.homeDirectory, '.private', 'cache'
		themeCacheCurrentModifiedTimesFile = path.join cacheDirectory, 'current-modified-times', 'themes.json'
		themeCachePreviousModifiedTimesFile = path.join cacheDirectory, 'previous-modified-times', 'themes.json'

143
144
145
146
147
148
149
150
151
152
		themeCacheCurrentModifiedTimes = fs.readJsonSync themeCacheCurrentModifiedTimesFile, 'utf-8'
		themeCachePreviousModifiedTimes = fs.readJsonSync themeCachePreviousModifiedTimesFile, 'utf-8'

		# Check if there are any template changes (if there are, this will require a full rebuild),
		# otherwise, we can use the cache and we won’t need to re-render the content.
		@useContentCache = true
		for fileName, lastModifiedDate of themeCachePreviousModifiedTimes
			if (fileName.indexOf '/templates/') != -1
				# This is a template, check if it has changed
				if lastModifiedDate != themeCacheCurrentModifiedTimes[fileName]
Aral Balkan's avatar
Aral Balkan committed
153
					# There are changes to a theme template, do not use the cache.
154
					@log.info "\t✓ Theme template changed (#{fileName}), *not* using cache."
155
156
157
					@useContentCache = false
					break

158
159
160
161
		# Set up the content cache (we will use it, if nothing else, to calculate
		# the last modified time for the metadata).
		contentCacheCurrentModifiedTimesFile = path.join cacheDirectory, 'current-modified-times', 'content.json'
		contentCachePreviousModifiedTimesFile = path.join cacheDirectory, 'previous-modified-times', 'content.json'
162

163
164
		_contentCurrentModifiedTimes = fs.readFileSync contentCacheCurrentModifiedTimesFile, 'utf-8'
		_contentPreviousModifiedTimes = fs.readFileSync contentCachePreviousModifiedTimesFile, 'utf-8'
165

166
		@contentHasNotChanged = _contentCurrentModifiedTimes == _contentPreviousModifiedTimes
167

168
169
		@contentCurrentModifiedTimes = JSON.parse(_contentCurrentModifiedTimes)
		@contentPreviousModifiedTimes = JSON.parse(_contentPreviousModifiedTimes)
170

171
172
		# Inform person if we are using the content cache.
		if @useContentCache
173
			console.log "\t✓ No changes to theme templates; using cache."
174

175

176

177
	#
178
	# Render data for site.
179
	#
180
	renderDataForSite: =>
181
		# @log.debug "Rendering data for site."
182

183
184
		@theme = app.siteTheme

185
186
187
		# Reset the rules
		@rules = []

188
189
190
		# Reset the global statistics for the site render.
		GlobalStatistics.reset()

191
192
193
194
195
196
		#
		# TODO: Need a separate pass to create the indices as they
		# will include folders as well as the content files.
		#
		# This could be done at the end, on the data folder.
		#
197
198
		indices = {}

199
200
201
		Promise.try =>
			# Clean the data directory before starting only if we’re not using the cache.
			if @useContentCache
202
				#console.log "\t✓ Using cache"
203
204
				return true
			else
205
				# @log.info "Not using cache: returning promise to delete the data for site directory from Promise.try"
206
207
208
209
210
211
212
213
				return fs.removeAsync "#{app.dataForSiteDirectory}/*"
		.then =>
			@renderBlockdown app.contentDirectory, app.dataForSiteDirectory, app.siteTheme
				.then (indices) =>

					# TODO: Make this async
					# TODO: Update render pipeline to only render content (and these partials)
					#       in the content pass.
214
					@saveRulePartials()
215
216
217

					@saveBlockerListJSON app.dataForSiteDirectory
						.then =>
218
219
220
221
222
223
224
							Promise.try =>
								if @useContentCache and @contentHasNotChanged
									return
								else
									@copyAssets @contentAssets('site'), app.dataForSiteDirectory
						.then =>
							Promise.try =>
225
226
								if @useContentCache and app.themeHasNotChanged
									console.log "\t✓ Theme has not changed, not copying theme assets for site."
227
228
									return
								else							
229
									console.log "\t✓ Copying theme assets for site."
230
									@copyAssets @themeForSiteDataAssets(), app.dataForSiteDirectory
231
232
233
234
		.then =>
			Promise.try =>
				if @useContentCache
					@removeDeletedFiles()
235
		.catch (e) =>
236
			throw new Error "[Blockdown: site data] #{e} #{e.stack}"
237

238
239
240
241
	#
	# Private methods.
	#

242
	# Returns the partial HTML after a Set render.
243
244
	partial: (fullHTML) -> (fullHTML.replace '<html><head></head><body>', '').replace('</body></html>', '')

245
246
247
248
249
250
251
252
	#
	# Takes HTML rendered from Markdown and wraps sections into section tags,
	# transferring the id from the H2 to the section tag.
	#
	wrapSections: (html) ->
		# Write out the opening section tags
		html = html.replace(/<h2 id="(.*?)">(.*?)<\/h2>/g, "<section id=\"$1\">\n\t<h2>$2</h2>")

253
		# Write out the closing section tags
254
255
256
257
258
259
260
261
		html = html.replace(/(<section.*?>)/g, "\n</section>\n$1")

		# Remove the erroneous first </section> tag
		html = html.replace('</section>', '')

		# Write out the last </section> tag
		html = html.replace('</main>', '</section></main>')

262
263
264
	imageExtensions: ['.jpg', '.gif', '.png', '.svg']

	contentAssets: =>
265
		dataExtensions = ['.json', '.html', '.js']
266

267
268
269
270
271
		contentAssetExtensions = (@imageExtensions.concat dataExtensions).join '|'
		contentAssetPath = app.contentDirectory

		return (extensions: contentAssetExtensions, path: contentAssetPath)

Aral Balkan's avatar
Aral Balkan committed
272
	# Now that we have the new unified themes repository, we can just copy any
273
274
275
276
	# files from the /static/ folder over without worrying about limiting
	# to a subset of extensions but I’m keeping this as it is safer in that we
	# won’t accidentally let an unsupported file type get published.

277
	themeAssetExtensions: =>
278
		return (['.woff', '.js', '.css', '.html'].concat @imageExtensions).join '|'
279

280
	themeForSiteDataAssets: =>
281
		fontExtensions = ['.woff2', '.ttf', '.svg', '.eot'].join '|'
282
283
		safariExtensionExtensions = ['.safariextz', '.plist'].join '|'
		themeAssetExtensions = "#{@themeAssetExtensions()}|#{fontExtensions}|#{safariExtensionExtensions}"
284
		return (extensions: themeAssetExtensions, path: (path.join app.themeForSiteDataDirectory, 'static'))
285
286
287
288
289

	#
	# Copies assets to the destination
	#
	copyAssets: (assets, destinationPath) =>
290
		# @log.info "Copying assets: #{assets.path} to #{destinationPath}"
291
		(globAsync "#{assets.path}/**/*@(#{assets.extensions})")
292
			.series (file) =>
293
294
295
296

				# Check if the file has been deleted since the previous render.
				# (If so, do not )

297
298
				baseName = path.basename file
				dirName = path.dirname file
299
300
301
302
				dirName = dirName.replace assets.path, ''
				destinationPathForDirectory = path.join destinationPath, dirName
				destinationPathForFile = path.join destinationPathForDirectory, baseName

303
				# @log.info "Copying asset: #{file}"
304

305
				fs.ensureDirAsync destinationPathForDirectory
306
					.then =>
307
						fs.copyAsync file, destinationPathForFile
308

309

310
311
312
313
314
315
316
317
318
319
320
	#
	# Returns whether the string1 ends with string2
	#
	# Note: We also check for > 0, below, to handle the edge case
	# where string1 does not end with string2 (so index = -1)
	# and where string2 is one character longer than string1,
	# resulting in string1.length - string2.length == -1 == index.
	#
	endsWith: (string1, string2) ->
		index = string1.indexOf(string2)
		index > 0 && index == string1.length - string2.length
321
322
323
324
325
326
327


	#
	# Removes deleted files (when using the cache)
	#
	removeDeletedFiles: =>

328
		# @log.info "Looking for deleted files in the content…"
329
330
331
332
333
334
335
336
337
338
339
340

		# Only relevant for content at the moment (as we do a complete rebuild
		# whenever the theme changes and this will not even get called).

		for file of @contentPreviousModifiedTimes
			if @contentCurrentModifiedTimes[file] == undefined
				# File has been deleted. Make sure its compiled assets are removed
				# from the build also.

				console.log "\t✓ Source file #{file} was deleted; removing from build."

				# Set the base of the path of the item to delete
Aral Balkan's avatar
Aral Balkan committed
341
				pathToDelete = app.dataForSiteDirectory
342
343
344
345
346
347
348
349
350
351

				if @endsWith file, 'index.md'
					#
					# If the index.md of an entry is removed, we assume that the entry itself was 
					# removed and delete its folder from the build. 
					#
					filePathComponents = file.split('/')
					filePathComponents.pop()
					_file = filePathComponents.join('/')
					
352
353
354
355
356
357
					pathToDelete = path.join pathToDelete, _file
				else if @endsWith file, '.md'
					#
					# MD files are translated to HTML files.
					#
					_file = file.replace('.md', '.html')
358
359
360
361
362
363
364
					pathToDelete = path.join pathToDelete, _file
				else
					#
					# For all other files, just remove them directly
					#
					pathToDelete = path.join pathToDelete, file

365
				# @log.info "About to delete #{pathToDelete} from the build folder (data)."
366
367
368
369

				fs.removeSync pathToDelete


370
371
372
	#
	# Renders Blockdown from content path to data path.
	#
373
	renderBlockdown: (contentPath, dataPath, theme) =>
374
		indices = {}
375
		titleOfPage = ''
376
377
378

		# Used to hold the index pages of the various categories.
		indexContent =
379
			spotlight: {}
Aral Balkan's avatar
Aral Balkan committed
380
			sites: {}
381
382
			trackers: {}
			none: {}
383

384
		(globAsync "#{contentPath}/**/*.md", {})
385
			.series (file) =>
386
				# @log.info "Reading blockdown file: #{file}"
387
388
389
390
391
392

				#
				# Check cache
				#
				if @useContentCache
					parsedFilePath = (path.parse file)
393
394
395
396
397
398
					# console.log(parsedFilePath)

					indexToStartFrom = if app.isRunningInProduction then 4 else 5

					filePathKey = path.join parsedFilePath.dir.split('/')[indexToStartFrom..].join('/'), parsedFilePath.base

Aral Balkan's avatar
Aral Balkan committed
399
					# console.log(filePathKey)
400

401
402
403
					# @log.info "File path key: #{filePathKey}"
					# @log.info "Current: #{@contentCurrentModifiedTimes[filePathKey]}"
					# @log.info "Previous: #{@contentPreviousModifiedTimes[filePathKey]}"
404
405

					if @contentCurrentModifiedTimes[filePathKey] == @contentPreviousModifiedTimes[filePathKey]
406
						
407
408
						################################################################################
						#
409
						# Use cache.
410
411
412
						#
						################################################################################

413
						# @log.info "Cache: Using existing file."
414
						
415
416
417
418
419
						#
						# Add the rule partial to the rules array
						#
						rulePartialFilePath = path.join app.cacheDirectory, 'rule-partials', "#{filePathKey}.rule.json"

420
						# @log.info "Rule partial file path: #{rulePartialFilePath}"
421
422
423
424
425
426
427
428
429

						if fs.existsSync rulePartialFilePath
							rulePartialString = fs.readFileSync rulePartialFilePath, 'utf-8'
							rulePartialArray = JSON.parse rulePartialString
							@rules = @rules.concat rulePartialArray

						#
						# Update the indices
						#
430
						outputHTMLFileName = (filePathKey.replace '.md', '.html')
Aral Balkan's avatar
Aral Balkan committed
431
432
433
434
435
436
						indexPathComponents = outputHTMLFileName.split('/')
						indexHTMLFileName = indexPathComponents.pop()
						indexRelativeFolder = "/#{indexPathComponents.join('/')}"

						if indices[indexRelativeFolder] is undefined
							indices[indexRelativeFolder] = []
437
438
439
440
						indices[indexRelativeFolder].push indexHTMLFileName

						indexInfoJSONFileName = "#{indexHTMLFileName}.json"
						indexInfoJSONFilePath = path.join app.indicesDirectory, indexRelativeFolder, indexInfoJSONFileName
441
442
443
444
445
446
						indexInfoExists = fs.existsSync indexInfoJSONFilePath
						if indexInfoExists
							indexInfo = fs.readJsonSync indexInfoJSONFilePath
							indexContent[indexInfo.category][indexInfo.titleOfPageClean] = indexInfo.listItem
						else
							console.log "\t⚠ Could not find index info JSON from path #{indexInfoJSONFilePath}. Ignoring…" 
447

448
						# Break out so we don’t fall through a fresh render.
449
						return
450

451
452
453

				################################################################################
				#
454
				# Fresh render.
455
456
457
				#
				################################################################################

458
459
				fs.readFileAsync file, 'utf-8'
					.then (content) =>
460
461
462

						if @useContentCache
							console.log "\t✓ Rendering Blockdown in file #{file}"
463

464
						fileComponents = file.split('/')
465

Aral Balkan's avatar
Aral Balkan committed
466
						categoryIndex = ('sites' in fileComponents and fileComponents.indexOf('sites')) or ('trackers' in fileComponents and fileComponents.indexOf('trackers'))
467

468
						category = if categoryIndex then fileComponents[categoryIndex] else 'none'
469
						pageDomain = if categoryIndex then fileComponents[categoryIndex+1] else 'none'
470

471
						# Add more specific ‘site’ or ‘tracker’ to category class list if necessary
472
473
						categoryForTemplate = if categoryIndex and categoryIndex == (fileComponents.length - 3) then "#{category} #{category.substr(0, category.length-1)}" else category

474
475
476
477
478
						#
						# Add the actions to the bottom of pages that have both a category and a domain
						# (i.e., content pages)
						#

Aral Balkan's avatar
Aral Balkan committed
479
						if category isnt 'none' and pageDomain isnt 'none'
480
481
							actionsBlock = """

482
483
## About Better

484
Better is a Safari content blocker for <a href='https://itunes.apple.com/us/app/better-by-ind.ie/id1080964978?mt=8'>iPhone</a>, <a href='https://itunes.apple.com/us/app/better-by-ind.ie/id1080964978?mt=8'>iPad</a>, and <a href='https://safari-extensions.apple.com/details/?id=better.fyi.safari.extension-5Q42VF5GXA'>Mac</a>. It protects you from trackers and malvertising by enforcing the principles of [Ethical Design](https://ind.ie/ethical-design).
485

486
487
488
489
490
491
492
## Get involved

[Improve page](https://source.ind.ie/better/content/blob/master/#{category}/#{pageDomain}/index.md) | [Report issue](https://source.ind.ie/better/content/issues)  | [Discuss](https://forum.ind.ie/c/better)

"""
							content += actionsBlock

493
						@pathOfTheFileCurrentlyBeingRendered = file
494
495
496

						# Save the title of the page so that we can update the <head>
						# information, later.
497
						titleFromHeading = content.match /^# (.*)?\n/
498

499
						if titleFromHeading is undefined or titleFromHeading is null
500
501
							titleOfPage = ""
						else
502
503
504
							# Set the title of the page
							titleOfPage = titleFromHeading[1]

505
506
507
508
509
							# Strip the markdown from the title for the site (keep it for the app
							# as we use it to intelligently truncate the title displayed in the navitation bar).
							# Only valid markup for headings is strong and emphasized, so just strips the *s.
							titleOfPageClean = titleOfPage.replace new RegExp('\\*', 'g'), ''

510
							# Update the index page
511
							renderedTitleOfPage = titleOfPage.replace /^\*\*(.*?)\*\*(.*)/, "<strong>$1</strong>$2"
512
513
							listItem = "\t<li><a href='/#{category}/#{pageDomain}'>#{renderedTitleOfPage}</a></li>\n"
							indexContent[category][titleOfPageClean] = listItem
514

Aral Balkan's avatar
Aral Balkan committed
515
							#
516
							# Save the index title info for the cache.
Aral Balkan's avatar
Aral Balkan committed
517
							#
518
519
520
521
522
523
524
525
526
527
528
529
530
							pathObject = path.parse file
							relativeFolder = pathObject.dir.replace contentPath, ''
							jsonFileName = "#{pathObject.name}.html.json"
							indexCacheFile = path.join app.indicesDirectory, relativeFolder, jsonFileName
							indexCacheObject =
								category: category
								titleOfPageClean: titleOfPageClean
								listItem: listItem

							# @log.info "Index cache file: #{indexCacheFile}"
							# @log.info "Index cache object:"
							# console.log indexCacheObject
							fs.outputJSONSync indexCacheFile, indexCacheObject
Aral Balkan's avatar
Aral Balkan committed
531
							
Aral Balkan's avatar
Aral Balkan committed
532
533
534
535
							# Strip the markdown from the title for the site (keep it for the app
							# as we use it to intelligently truncate the title displayed in the navitation bar).
							# Only valid markup for headings is strong and emphasized, so just strips the *s.
							titleOfPage = "#{titleOfPageClean} | Better"
536

537
538
539
540
541
						marked.setOptions
							renderer: @blockdownRenderer
							gfm: true

						marked content, (error, content) =>
542
							if error
543
544
545
546
								#
								# Blockdown parser fatal error: panic!
								#
								# Something went wrong in markdown parsing. This will lead to corrupted
547
548
								# data and we cannot have that. Better to fail and provide as much information as possible
								# so this can be debugged and fixed in development before hitting production.
549
								#
550
								throw new Error "[Blockdown: parser] #{error} #{error.stack}"
551
							else
552
553
554
								pathObject = path.parse file

								# Find out which relative folder we’re in
555
								relativeFolder = pathObject.dir.replace contentPath, ''
556

557
558
								#console.log "*** Relative folder: #{relativeFolder}"
								#console.log "*** pathObject.name: #{pathObject.name}"
Aral Balkan's avatar
Aral Balkan committed
559

560
								if relativeFolder is ''
561
562
									relativeFolder = '/'

563
								if indices[relativeFolder] is undefined
564
565
566
567
568
									indices[relativeFolder] = []

								htmlFileName = "#{pathObject.name}.html"
								indices[relativeFolder].push htmlFileName

569
								#console.log "*** HTML file name: #{htmlFileName}"
Aral Balkan's avatar
Aral Balkan committed
570

571
572
								# Selected item
								selectedItem = ''
573
574
575
								currentDirectory = pathObject.dir
								parts = currentDirectory.split('/content/')
								if parts.length >= 2
576
									selectedItem = parts[1]
577

578
579
580
								# Path for page.
								pagePath = file.replace contentPath, dataPath
								pagePath = pagePath.replace '.md', '.html'
581

582
583
								# Render the page.
								headerPartialHTML = @partial(set.render(@theme.header, {navigationList: @navigationList selectedItem}))
584
								pageHTML = set.render(@theme.page, {title: titleOfPage, header: headerPartialHTML, content: content, category: categoryForTemplate})
585
								pageHTML = @wrapSections pageHTML
586
								fs.outputFileAsync pagePath, pageHTML
587
			.then =>
588
589
				# Sort the trackers and spotlight indices alphabetically.
				indexContent.trackers = @alphabetiseIndex indexContent.trackers
Aral Balkan's avatar
Aral Balkan committed
590
				indexContent.sites = @alphabetiseIndex indexContent.sites
591

592
593
594
595
596
				# Only render pages from themes if the theme has changed
				Promise.try =>
					if !@useContentCache
						# Render the /news index.
						@renderPage 'news', dataPath, '/news/index.html', {}, 'news'
Aral Balkan's avatar
Aral Balkan committed
597
			.then =>
598
599
600
601
602
				# Only render pages from themes if the theme has changed
				Promise.try =>
					if !@useContentCache
						# Render the /spotlight index.
						@renderPage 'spotlight', dataPath, '/spotlight/index.html', {}, 'spotlight'
Aral Balkan's avatar
Aral Balkan committed
603
			.then =>
604
605
606
607
608
				# Only render pages from themes if the theme has changed
				Promise.try =>
					if !@useContentCache
						# Render the /spotlight/1 index.
						@renderPage 'spotlightIssue1', dataPath, '/spotlight/1/index.html', {}, 'spotlight'
Aral Balkan's avatar
Aral Balkan committed
609
			.then =>
610
611
612
613
614
615
616
				# Don’t render index files if the theme and content haven’t changed.
				Promise.try =>
					if @useContentCache and @contentHasNotChanged
						return
					else
						# Render the /sites index.
						@renderPage 'sites', dataPath, '/sites/index.html', {list: indexContent.sites}, 'sites'
617
			.then =>
618
619
620
621
622
623
624
				# Don’t render index files if the theme and content haven’t changed.
				Promise.try =>
					if @useContentCache and @contentHasNotChanged
						return
					else
						# Render /trackers index.
						@renderPage 'trackers', dataPath, '/trackers/index.html', {list: indexContent.trackers}, 'trackers'
625
			.then =>
626
627
628
				# Only render pages from themes if the theme has changed
				Promise.try =>
					if !@useContentCache
Aral Balkan's avatar
Aral Balkan committed
629
630
						# Render /reviews index.
						@renderPage 'reviews', dataPath, '/reviews/index.html', {}, 'reviews'
631
			.then =>
632
633
634
				# Only render pages from themes if the theme has changed
				Promise.try =>
					if !@useContentCache
Aral Balkan's avatar
Aral Balkan committed
635
636
						# Render /support index.
						@renderPage 'support', dataPath, '/support/index.html', {}, 'support'
637
			.then =>
638
639
640
641
642
643
				# Only render pages from themes if the theme has changed
				Promise.try =>
					if !@useContentCache
						# Render home page.
						@renderHomePageStatistics().then (homePageStatisticsHTML) =>
							@renderPage 'home', dataPath, '/index.html', {statistics: homePageStatisticsHTML}, 'home'
644
			.then =>
645
				#@log.info "Indices:"
Aral Balkan's avatar
Aral Balkan committed
646
				#console.log indices
647
648
				return indices

649
650
651
652
653
	#
	# Takes an index object and returns a string of the list items, alphabetically sorted by the key.
	#
	alphabetiseIndex: (index) =>
		keys = Object.keys index
654
		keys.sort (a,b) -> a.localeCompare(b, 'en', {'sensitivity': 'base'})
655
656
657
658
659
		keys.reduce (previous, current) ->
			previous += index[current]
		, ''


660
661
662
	#
	# Renders a page on the site.
	#
663
	renderPage: (name, dataPath, pathFragment, data={}, category='none') =>
664
665
666
667
		theme = @theme[name]
		contentPartialHTML = @partial(set.render(theme, data))
		navigationList = @navigationList name
		headerPartialHTML = @partial(set.render(@theme.header, {navigationList: navigationList}))
668
669

		title = name[0].toUpperCase() + name.slice(1) # title is the name with the first letter capitalised.
Aral Balkan's avatar
Aral Balkan committed
670
		title += ' | Better'
671

672
		pageHTML = set.render(@theme.page, {title: title, header: headerPartialHTML, content: contentPartialHTML, category: category})
673
674
675
		pagePath = path.join dataPath, pathFragment
		fs.outputFileAsync pagePath, pageHTML

676
677
678
679
680
681
682
683
	#
	# Returns configured navigation list data structure with the
	# passed navigation element marked as selected.
	#
	navigationList: (selectedItem) =>
		#
		# Navigation list for local links
		#
684
		# (The forum and source links are not here as they’re external and cannot be
685
686
687
688
		# highlighted dynamically in the navigation when navigated to.)
		navigationList =
		[
			{ class: 'home', link: '/', label: 'Home', selected: false}
Aral Balkan's avatar
Aral Balkan committed
689
			{ class: 'news', link: '/news', label: 'News', selected: false}
Aral Balkan's avatar
Aral Balkan committed
690
			{ class: 'sites', link: '/sites', label: 'Sites', selected: false}
691
			{ class: 'trackers', link: '/trackers', label: 'Trackers', selected: false}
692
			{ class: 'support', link: '/support', label: 'Support', selected: false}
693
694
695
696
697
698
699
		]

		for i in [0...navigationList.length]
			navigationList[i].selected = selectedItem.startsWith navigationList[i].class

		return navigationList

700

701
702
703
	#
	# Saves the WebKit content blocking rule file, blockerList.json.
	#
704
	saveBlockerListJSON: (dataDirectory) =>
705

Aral Balkan's avatar
Aral Balkan committed
706
707
708
		# if @contentHasNotChanged
		# 	console.log("\t✓ Content has not changed. Not saving blocker list JSON or its metadata file.")
		# 	return Promise.resolve()
709
710
711
712
713

		#
		# Content has changed. Save the blocker list and metadata files.
		#

714
		blockerListJSONFilePath = path.join dataDirectory, 'blockerList.json'
Aral Balkan's avatar
Aral Balkan committed
715
		metadataFilePath = path.join dataDirectory, 'metadata.json'
Aral Balkan's avatar
Aral Balkan committed
716

717
		args = [@rules]
718
719
		output = JSON.stringify.apply @, args
		hash = checksum(output)
720

721
722
		# TODO: Only update metadata if rules have been updated.

723
724
725
		#
		# Prepare metadata
		#
726
727
728
729
730
731
732
733
734
735
736
737
738
739
		rulesAdded = new Set()
		rulesUpdated = new Set()
		rulesDeleted = new Set()

		sitesAdded = new Set()
		sitesUpdated = new Set()
		sitesDeleted = new Set()

		trackersAdded = new Set()
		trackersUpdated = new Set()
		trackersDeleted = new Set()

		TRACKERS = 'trackers'
		SITES = 'sites'
740
741

		for key, value of @contentCurrentModifiedTimes
Aral Balkan's avatar
Aral Balkan committed
742
743
			uniqueIdentifier = key.replace /^(.*?\/.*?)\/.*$/, "$1"
			uniqueIdentifierSubcategory = key.replace /^.*?\/(.*?)\/.*$/, "$1"
744
745

			# Added?
746
			if (@contentPreviousModifiedTimes[key] == undefined)
747
748
				rulesAdded.add uniqueIdentifier
				if (key.indexOf TRACKERS) > -1
Aral Balkan's avatar
Aral Balkan committed
749
					trackersAdded.add uniqueIdentifierSubcategory
750
				if (key.indexOf SITES) > -1
Aral Balkan's avatar
Aral Balkan committed
751
					sitesAdded.add uniqueIdentifierSubcategory
752
753

			# Updated?
754
			else if (@contentPreviousModifiedTimes[key] != value)
755
756
				rulesUpdated.add uniqueIdentifier
				if (key.indexOf TRACKERS) > -1
Aral Balkan's avatar
Aral Balkan committed
757
					trackersUpdated.add uniqueIdentifierSubcategory
758
				if (key.indexOf SITES) > -1
Aral Balkan's avatar
Aral Balkan committed
759
					sitesUpdated.add uniqueIdentifierSubcategory
760
761
		
		for key, value of @contentPreviousModifiedTimes
762
763
764
			uniqueIdentifier = key.replace /(.*\/*.)\/.*/, "$1"

			# Deleted?
765
			if (@contentCurrentModifiedTimes[key] == undefined)
766
767
				rulesDeleted.add uniqueIdentifier
				if (key.indexOf TRACKERS) > -1
Aral Balkan's avatar
Aral Balkan committed
768
					trackersDeleted.add uniqueIdentifierSubcategory
769
				if (key.indexOf SITES) > -1
Aral Balkan's avatar
Aral Balkan committed
770
					trackersDeleted.add uniqueIdentifierSubcategory
771

772
773
774
775
		rulesChanged = rulesAdded.size + rulesUpdated.size + rulesDeleted.size
		trackersChanged = trackersAdded.size + trackersUpdated.size + trackersDeleted.size
		sitesChanged = sitesAdded.size + sitesUpdated.size + sitesDeleted.size

Aral Balkan's avatar
Aral Balkan committed
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
		# console.log("")
		# console.log("Rules changed    : #{rulesChanged}")
		# console.log("Trackers changed : #{trackersChanged}")
		# console.log("Sites changed    : #{sitesChanged}")
		# console.log("")
		# console.log("Rules added (#{rulesAdded.size}):")
		# console.log(rulesAdded)
		# console.log("Ruled updated (#{rulesUpdated.size}):")
		# console.log(rulesUpdated)
		# console.log("Rules deleted (#{rulesDeleted.size}):")
		# console.log(rulesDeleted)
		# console.log("")
		# console.log("Trackers added (#{trackersAdded.size}):")
		# console.log(trackersAdded)
		# console.log("Trackers updated (#{trackersUpdated.size}):")
		# console.log(trackersUpdated)
		# console.log("Trackers deleted (#{trackersDeleted.size}):")
		# console.log(trackersDeleted)
		# console.log("")
		# console.log("Sites added (#{sitesAdded.size}):")
		# console.log(sitesAdded)
		# console.log("Sites updated (#{sitesUpdated.size}):")
		# console.log(sitesUpdated)
		# console.log("Sites deleted (#{sitesDeleted.size}):")
		# console.log(sitesDeleted)
		# console.log("")
802

803
804
805
806
807
808
809
		# Calculate the last update time based on the content cache.
		# Since timestamps have lexographical order, we can use a reduce
		# on the values of the cache dictionary to achieve this.
		# (Using ES6) as it is easier. (And we will move the codebase to ES6 eventually.)
		lastModifiedTimes = `Object.values(this.contentCurrentModifiedTimes)`
		lastModifiedTime = `lastModifiedTimes.reduce ((x,y) => { return (x > y ? x : y) })`

810
		metadata = `JSON.stringify({
Aral Balkan's avatar
Aral Balkan committed
811
			md5: hash,
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
			lastUpdate: lastModifiedTime, 
			stats: {
				rulesChanged: rulesChanged,
				trackersChanged: trackersChanged,
				sitesChanged: sitesChanged,
				trackers: {
					added: [...trackersAdded],
					updated: [...trackersUpdated],
					deleted: [...trackersDeleted]
				},
				sites: {
					added: [...sitesAdded],
					updated: [...sitesUpdated],
					deleted: [...sitesDeleted]
				}
			}
		})`
829
830

		# Save the blocker list file and its metadata.
Aral Balkan's avatar
Aral Balkan committed
831
832
		(fs.outputFileAsync blockerListJSONFilePath, output).then =>
			fs.outputFileAsync metadataFilePath, metadata
Aral Balkan's avatar
Aral Balkan committed
833

834
835
836
837
838
	#
	# Saves the rule partials in JSON format.
	#
	saveRulePartials: =>

839
		for pathOfOriginalFile, rules of @rulePartials
840
			
841
			# Determine where to store the partial rule (used in caching so that we
842
843
844
845
846
847
848
849
850
851
			# can create the blocker list without having to render every rule every time).
			parsedPathOfOriginalFile = path.parse pathOfOriginalFile
			partialRuleFileRelativePath = parsedPathOfOriginalFile.dir.split('/')[5..].join('/')
			partialRuleFilePath = path.join app.rulePartialsDirectory, partialRuleFileRelativePath

			fs.ensureDirSync partialRuleFilePath 

			partialRuleFile = path.join partialRuleFilePath, "#{parsedPathOfOriginalFile.base}.rule.json"

			#console.log "Partial rule file: #{partialRuleFile}"
852
			args = [rules]
853
854
855
856
857
858
859

			rulesJSON = (JSON.stringify.apply @, args)

			#console.log "Rules: #{rulesJSON}"

			fs.outputFileSync partialRuleFile, rulesJSON, 'utf-8'

860
861
862
863
	#
	# Hook into the marked Markdown renderer for the Blockdown MSON code sections.
	#
	codeRenderer: (code, language) =>
864
		# @log.info "Language: #{language}, Code: >#{code}<"
865
866
867

		originalCode = code

868
		if language is 'mson'
869

870
			# @log.info "Rendering Blockdown MSON in #{@pathOfTheFileCurrentlyBeingRendered}"
871

872
873
874
875
			# Create the dictionary entry in the table of files currently being rendered
			# to their rule partials if one does not already exist.
			if @rulePartials[@pathOfTheFileCurrentlyBeingRendered] == undefined
				@rulePartials[@pathOfTheFileCurrentlyBeingRendered] = []
876

877
878
879
880
881
			# Convert blocker rule MSON to JSON

			# Do a very strict conversion of the MSON based on the block list JSON specification.

			# Trim any leading or trailing whitespace
Aral Balkan's avatar
Aral Balkan committed
882
			code = code.trim()
883

884
885
			# Handle authoring-time optimisation: if no dashes are found in the code,
			# assume that it is a url-filter entry and transform it into one.
886
887
			if (code.indexOf '-') != 0 and (code.indexOf "\t-") != 0 and (code.indexOf '  -') != 0
				# console.log "Transforming naked url-filter rule: #{code}"
888
889
				code = "- url-filter: #{code}"

890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
			# Flatten the key/value pairs
			safeDelimeter = '\udbff\udfff'
			code = code.replace /^[\t, ]*-[\t, ]*(.*?):[\t, ]*(.*?)$/mg, "$1#{safeDelimeter}$2"

			# Split into lines
			lines = code.split "\n"

			#
			# Create the rule.
			#
			rule = new ContentBlockerRule

			lines.forEach (line) ->
				keyValuePair = line.split safeDelimeter
				key = keyValuePair[0]
				value = keyValuePair[1]
906
				# console.log "#{key} = #{value}"
907
908
909
910
911
				rule[key](value)

			# Lint the content blocker JSON rule string
			try
				@lintRule rule.value()
912
913
			catch error
				throw new Error "[Blockdown: blocker rule] Lint error, please check that your block rule MSON is valid. #{error} #{error.stack}"
914
915
916
917

			# Add the linted rule to the rules for this Markdown document.
			@rules.push rule.value()

918
919
920
			# Add the linted rule to the rules for the file that’s currently being rendered.
			@rulePartials[@pathOfTheFileCurrentlyBeingRendered].push rule.value()

921

922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
			#
			# Add basic syntax highlighting to the original code in HTML.
			#
			renderedCode = @markdownRenderer.code originalCode, language

			dash = "<span class='punctuation'>-</span>"
			colon = "<span class='punctuation'>:</span>"
			matches = renderedCode.matches(/-([\t ]*)(.+?):([\t ]*)(.*?)$/gm)

			for _ in matches
				whitespaceAfterDash = _.groups[0]
				key = _.groups[1]
				whitespaceAfterColon = _.groups[2]
				value = _.groups[3]

				if key in ['trigger', 'action']
					# Top-level key, no value (just key).
					key = "<span class='key top-level'>#{key}</span>"
				else
					key = "<span class='key'>#{key}</span>"
					value = "<span class='value'>#{value}</span>"

				highlightedCode = "#{dash}#{whitespaceAfterDash}#{key}#{colon}#{whitespaceAfterColon}#{value}"
				renderedCode = renderedCode.replace _.match, highlightedCode

			return renderedCode

		# Render as regular code.
950
951
		return @markdownRenderer.code originalCode, language

952

953
954
955
956
957
958
	#
	# Image renderer.
	#
	# Adds image size support to Markdown.
	# Based on https://github.com/chjj/marked/issues/339#issuecomment-40975942
	#
959
	imageRenderer: (href, title, text) =>
960
961
962
963
964
965
966
967
968
969
970

		if title
			size = title.split 'x'
			if size[1]
				size = 'width=' + size[0] + ' height=' + size[1]
			else
				size = 'width=' + size[0]
		else
			size = ''

		return "<img src=\"#{href}\" alt=\"#{text}\" #{size}>"
971

972
973

	#
974
975
976
	# Heading renderer
	#
	# Headings let us know which section of the page we’re in so we can
977
	# handle subsequent data accordingly.
978
979
980
981
	#
	headingRenderer: (text, level, raw) =>

		lowercaseText = text.toLowerCase()
982
983
984
985

		@parserIsInStatisticsSection = false
		switch lowercaseText
			when 'after better' then @parserIsInStatisticsSection = true
986
987
988
989

		# Fall back to plain Markdown renderer.
		return @markdownRenderer.heading text, level, raw

Aral Balkan's avatar
Aral Balkan committed
990
991
992
993
994

	#
	# List renderer.
	#
	listRenderer: (body, ordered) =>
995
996
		if @parserIsInStatisticsSection
			return @renderStatistics body
Aral Balkan's avatar
Aral Balkan committed
997
998
999
		return @markdownRenderer.list body, ordered


1000
1001
1002
1003
	#
	# List item renderer
	#
	# List items are the primary means by which we implment higher-order
1004
1005
1006
1007
	# semantics for Blockdown. List items that start with certain keywords are rendered
	# using specialised templates.
	#
	listItemRenderer: (text) =>
1008
		# @log.info "List item: #{text}"
1009

1010
		# Trackers badge.
Aral Balkan's avatar
Aral Balkan committed
1011
		if text.startsWith '(Trackers)' or text.startsWith '(trackers)' then return @renderTrackersBadgeListItem text
1012

1013
1014
1015
1016
1017
		#
		# Static badges.
		#
		staticBadges =
		[
Aral Balkan's avatar
Aral Balkan committed
1018
1019
1020
1021
1022
1023
			['(Aggressive)', 'Attempts to block malware blockers.'],
			['(Doorslam)', 'Interrupts your flow with modal dialogs.'],
			['(Clickbait)', 'Third-party exploitative content.'],
			['(Fingerprint)', 'Canvas fingerprinting.'],
			['(Web bug)', 'Invisible tracking pixel.'],
			['(Tracker)', 'Monetises you.']
1024
1025
		]

1026
		for badge in staticBadges
1027
1028
			title = badge[0]
			explanation = badge[1]
1029
1030
1031
1032
1033

			# Support title variants to be forgiving (e.g., case and whitespace insensitive) in authoring
			forgivingTitle = title.replace(/\s/g, '').toLowerCase()
			forgivingText = text.replace(/\s/g, '').toLowerCase()

1034
			if forgivingText.startsWith "#{forgivingTitle}" then return @renderStaticBadgeListItem title.toLowerCase(), explanation
1035

1036
1037
1038
1039
		#
		# No known vocabulary matches, render as regular Markdown.
		#
		return @markdownRenderer.listitem text
1040
1041


1042
1043
	#
	# Renders a static badge list item (these are the badges that contain an icon, title, and explanation).
1044
	# The icon is added in the CSS (see the theme-for-site-data repository).
1045
1046
	#
	renderStaticBadgeListItem: (title, explanation) =>
1047

1048
		# @log.info "About to render static badge with title: #{title}"
1049

Aral Balkan's avatar
Aral Balkan committed
1050
1051
		title = title.replace('(', '').replace(')', '')

1052
		badgeID = title.replace(' ', '-')
1053
		staticBadgeListItem = "\t<li id='#{badgeID}' class='static-badge'>\n\t\t<h3 class='badge-title'>#{title}</h3>\n\t\t<p class='badge-explanation'>#{explanation}</p>\n\t</li>\n"
1054
		return staticBadgeListItem
1055

1056

1057
1058
1059
1060
	#
	# Renders the trackers badge list item.
	#
	renderTrackersBadgeListItem: (text) =>
1061
		moreDetailsHTML = 'Exposes you to third-party sites.'
1062

1063
1064
1065
		#
		# Get the trackers.
		#
1066