Blockdown.coffee 51.9 KB
Newer Older
1
2
################################################################################
#
3
#	Better
4
#
5
6
#	Blockdown Parser: parses Blockdown content and generates the
#	data for the Beter site and iOS app.
7
#
8
#	This is Independent Technology.
9
#
10
11
12
13
14
#	▲❤ We practice Ethical Design (https://ind.ie/ethical-design)
#
#	© Aral Balkan. © Ind.ie. All Rights Reserved.
#	Released with love by Ind.ie under GNU AGPLv3 or later.
#	Free as in freedom. Please see the LICENSE file.
15
16
17
#
################################################################################

18
fs = require 'fs-extra-as-promised'
19
path = require 'path'
Aral Balkan's avatar
Aral Balkan committed
20

21
22
23
24
25
26
27
28
29
30
31
32
33
34
marked = require 'marked'
jsonlint = require 'jsonlint'

chai = require 'chai'
expect = chai.expect
assert = chai.assert
chai.should()

Promise = require 'thrush'

glob = require 'glob'
globAsync = Promise.promisify glob

eyespect = require 'eyespect'
Aral Balkan's avatar
Aral Balkan committed
35
inspect = eyespect.inspector { stream: null }
36
winston = require 'winston'
37
app = require './App'
38
39
ContentBlockerRule = require './ContentBlockerRule'

40
41
set = require 'indie-set'

42
43
GlobalStatistics = require './GlobalStatistics'

Aral Balkan's avatar
Aral Balkan committed
44
require './StringExtensions' # for String::trim()
45

46
47
48
49
50
51
52
53
54
55
require 'crypto'

#
# Calculate checksum
#
checksum = (str) ->
		return crypto
				.createHash('md5')
				.update(str, 'utf8')
				.digest('hex')
56
57
58

class Blockdown

59
60
	log: null

61
62
63
	site: "site"
	app: "app"

64
65
66
67
	markdownRenderer: null		# (marked.Renderer)
	blockdownRenderer: null		# (marked.Renderer)
	rules: null					# (Array)

68
69
	rulePartials: null # (Object)

70
	parserIsInStatisticsSection: false
71

72
73
	numberOfTrackers: 0

74
75
	pathOfTheFileCurrentlyBeingRendered: null

76
	rendering: null
77
78
	theme: null

79
80
81
	isRenderingDataForSite: false
	isRenderingDataForApp: false

82
83
84
85
	#
	# Cache-related.
	#
	useContentCache: false
86
	contentHasNotChanged: false
87
88
89
90
	contentCurrentModifiedTimes: null
	contentPreviousModifiedTimes: null


91
92
93
94
95
96
	#
	# Public methods.
	#

	constructor: ->

97
		# Initialise properties
98
		@rules = []
99
		@rulePartials = {}
100
101
102
103

		# Initialise two renderers (one for regular Markdown, the
		# other for the Blockdown code sections).
		@markdownRenderer = new marked.Renderer()
104
		@markdownRenderer.image = @imageRenderer
105
106
107
		@markdownRenderer.options.headerPrefix = '' # Otherwise we get undefinedheader-slug
		@markdownRenderer.options.langPrefix = ''	# Ditto for <code class="undefined…">
		@markdownRenderer.options.gfm = true
108
109
110

		@blockdownRenderer = new marked.Renderer()
		@blockdownRenderer.code = @codeRenderer
111
		@blockdownRenderer.image = @imageRenderer
112
113
		@blockdownRenderer.list = @listRenderer
		@blockdownRenderer.listitem = @listItemRenderer
114
		@blockdownRenderer.heading = @headingRenderer
115
116

		# Set up logging.
117
		logFile = path.join app.logsDirectory, 'Blockdown.log'
118
		@log = new winston.Logger
119
120
			transports:
				[
121
					new (winston.transports.Console)({ level: 'info'})
Aral Balkan's avatar
Aral Balkan committed
122
					new (winston.transports.File)({ filename: logFile, level: 'debug' })
123
				]
124
125
126
127

		# Decide cache policy
		@decideCachePolicy()

128
		# @log.debug "Blockdown initiated. Logs at #{logFile}"
129
130


131
132
133
134
135
136
137
138
139
140
	#
	# Should we use the cache?
	#
	# Currently, we only use the cache to render the Blockdown (Markdown) content
	# if the theme hasn’t changed.
	#
	# TODO: Once we’ve separated and optimised content and theme rendering, we can implement
	# ===== caching for the template also. 
	#
	decideCachePolicy: =>
141
142
143
144
145
146
147
148
149
		#
		# Note: I do not trust the cache for use in production so we detect that and turn it off.
		# ===== A production deployment should always do a full, clean build.
		#
		if app.isRunningInProduction
			console.log "\t✓ Better is running in production, not using the cache feature."
			@useContentCache == false
			return

150
151
152
153
		cacheDirectory = path.join app.homeDirectory, '.private', 'cache'
		themeCacheCurrentModifiedTimesFile = path.join cacheDirectory, 'current-modified-times', 'themes.json'
		themeCachePreviousModifiedTimesFile = path.join cacheDirectory, 'previous-modified-times', 'themes.json'

154
155
156
157
158
159
160
161
162
163
164
		themeCacheCurrentModifiedTimes = fs.readJsonSync themeCacheCurrentModifiedTimesFile, 'utf-8'
		themeCachePreviousModifiedTimes = fs.readJsonSync themeCachePreviousModifiedTimesFile, 'utf-8'

		# Check if there are any template changes (if there are, this will require a full rebuild),
		# otherwise, we can use the cache and we won’t need to re-render the content.
		@useContentCache = true
		for fileName, lastModifiedDate of themeCachePreviousModifiedTimes
			if (fileName.indexOf '/templates/') != -1
				# This is a template, check if it has changed
				if lastModifiedDate != themeCacheCurrentModifiedTimes[fileName]
					# There are changes to a theme template, do not use the cache.
165
					@log.info "\t✓ Theme template changed (#{fileName}), *not* using cache."
166
167
168
169
170
					@useContentCache = false
					break

		# Set up the content cache if we’re using it.
		if @useContentCache
171
172
173
174

			contentCacheCurrentModifiedTimesFile = path.join cacheDirectory, 'current-modified-times', 'content.json'
			contentCachePreviousModifiedTimesFile = path.join cacheDirectory, 'previous-modified-times', 'content.json'

175
176
177
178
179
180
181
			_contentCurrentModifiedTimes = fs.readFileSync contentCacheCurrentModifiedTimesFile, 'utf-8'
			_contentPreviousModifiedTimes = fs.readFileSync contentCachePreviousModifiedTimesFile, 'utf-8'

			@contentHasNotChanged = _contentCurrentModifiedTimes == _contentPreviousModifiedTimes

			@contentCurrentModifiedTimes = JSON.parse(_contentCurrentModifiedTimes)
			@contentPreviousModifiedTimes = JSON.parse(_contentPreviousModifiedTimes)
182

183
			console.log "\t✓ No changes to theme templates; using cache."
184

185

186

187
	#
188
	# Render data for site.
189
	#
190
	renderDataForSite: =>
191
		# @log.debug "Rendering data for site."
192

193
194
195
		@isRenderingDataForSite = true
		@isRenderingDataForApp = false

196
		@theme = app.siteTheme
197
		@rendering = @site
198

199
200
201
		# Reset the rules
		@rules = []

202
203
204
		# Reset the global statistics for the site render.
		GlobalStatistics.reset()

205
206
207
208
209
210
		#
		# TODO: Need a separate pass to create the indices as they
		# will include folders as well as the content files.
		#
		# This could be done at the end, on the data folder.
		#
211
212
		indices = {}

213
214
215
		Promise.try =>
			# Clean the data directory before starting only if we’re not using the cache.
			if @useContentCache
216
				#console.log "\t✓ Using cache"
217
218
				return true
			else
219
				# @log.info "Not using cache: returning promise to delete the data for site directory from Promise.try"
220
221
222
223
224
225
226
227
				return fs.removeAsync "#{app.dataForSiteDirectory}/*"
		.then =>
			@renderBlockdown app.contentDirectory, app.dataForSiteDirectory, app.siteTheme
				.then (indices) =>

					# TODO: Make this async
					# TODO: Update render pipeline to only render content (and these partials)
					#       in the content pass.
228
229
230
231
					# Note: Only save rule partials if we’re running in development
					# ===== (the cache feature is not used in production.)
					if app.isRunningInDevelopment
						@saveRulePartials()
232
233
234

					@saveBlockerListJSON app.dataForSiteDirectory
						.then =>
235
236
237
238
239
240
241
							Promise.try =>
								if @useContentCache and @contentHasNotChanged
									return
								else
									@copyAssets @contentAssets('site'), app.dataForSiteDirectory
						.then =>
							Promise.try =>
242
243
								if @useContentCache and app.themeHasNotChanged
									console.log "\t✓ Theme has not changed, not copying theme assets for site."
244
245
									return
								else							
246
									console.log "\t✓ Copying theme assets for site."
247
									@copyAssets @themeForSiteDataAssets(), app.dataForSiteDirectory
248
249
250
251
		.then =>
			Promise.try =>
				if @useContentCache
					@removeDeletedFiles()
252
		.catch (e) =>
253
			throw new Error "[Blockdown: site data] #{e} #{e.stack}"
254

255
256
257
	#
	# Render data for apps.
	#
258
	renderDataForApp: =>
259
		# @log.debug "Rendering data for apps."
260

261
262
263
		@isRenderingDataForSite = false
		@isRenderingDataForApp = true

264
		@rendering = @app
265
		@theme = app.appTheme
266

267
268
269
		# Reset the rules
		@rules = []

270
271
272
		# Reset the global statistics for the app render.
		GlobalStatistics.reset()

273
274
275
276
277
278
279
280
		#
		# TODO: Need a separate pass to create the indices as they
		# will include folders as well as the content files.
		#
		# This could be done at the end, on the data folder.
		#
		indices = {}

281
282
283
		Promise.try =>
			# Clean the data directory before starting only if we’re not using the cache.
			if @useContentCache
284
				# @log.info "Using cache: returning true from Promise.try"
285
286
				return true
			else
287
				# @log.info "Not using cache: returning promise to delete the data for app directory from Promise.try"
288
289
290
291
292
293
				return fs.removeAsync "#{app.dataForAppDirectory}/*"
		.then =>
			@renderBlockdown app.contentDirectory, app.dataForAppDirectory, app.appTheme
				.then (indices) =>
					@saveBlockerListJSON app.dataForAppDirectory
						.then =>
294
295
296
297
298
299
300
							Promise.try =>
								if @useContentCache and @contentHasNotChanged
									return
								else
									@copyAssets @contentAssets('app'), app.dataForAppDirectory
						.then =>
							Promise.try =>
301
302
								if @useContentCache and app.themeHasNotChanged
									console.log "\t✓ Theme has not changed, not copying theme assets for app."
303
304
									return
								else
305
									console.log "\t✓ Copying theme assets for app."
306
									@copyAssets @themeForAppDataAssets(), app.dataForAppDirectory
307
308
309
310
		.then =>
			Promise.try =>
				if @useContentCache
					@removeDeletedFiles()
311
		.catch (e) =>
312
			throw new Error "[Blockdown: app data] #{e} #{e.stack}"
313
314
315
316
317

	#
	# Private methods.
	#

318
319
320
	# Returns the partial HTML after a Set render.
	partial: (fullHTML) -> (fullHTML.replace '<html><head></head><body>', '').replace('</body></html>', '')

321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
	#
	# Takes HTML rendered from Markdown and wraps sections into section tags,
	# transferring the id from the H2 to the section tag.
	#
	wrapSections: (html) ->
		# Write out the opening section tags
		html = html.replace(/<h2 id="(.*?)">(.*?)<\/h2>/g, "<section id=\"$1\">\n\t<h2>$2</h2>")

		# Write out the closing section tags
		html = html.replace(/(<section.*?>)/g, "\n</section>\n$1")

		# Remove the erroneous first </section> tag
		html = html.replace('</section>', '')

		# Write out the last </section> tag
		html = html.replace('</main>', '</section></main>')

338
339
340
	imageExtensions: ['.jpg', '.gif', '.png', '.svg']

	contentAssets: =>
341
		dataExtensions = ['.json', '.html', '.js']
342

343
344
345
346
347
		contentAssetExtensions = (@imageExtensions.concat dataExtensions).join '|'
		contentAssetPath = app.contentDirectory

		return (extensions: contentAssetExtensions, path: contentAssetPath)

Aral Balkan's avatar
Aral Balkan committed
348
	# Now that we have the new unified themes repository, we can just copy any
349
350
351
352
	# files from the /static/ folder over without worrying about limiting
	# to a subset of extensions but I’m keeping this as it is safer in that we
	# won’t accidentally let an unsupported file type get published.

353
	themeAssetExtensions: =>
354
		return (['.woff', '.js', '.css', '.html'].concat @imageExtensions).join '|'
355

356
	themeForAppDataAssets: =>
357
		return (extensions: @themeAssetExtensions(), path: (path.join app.themeForAppDataDirectory, 'static'))
358

359
	themeForSiteDataAssets: =>
360
		fontExtensions = ['.woff2', '.ttf', '.svg', '.eot'].join '|'
361
362
		safariExtensionExtensions = ['.safariextz', '.plist'].join '|'
		themeAssetExtensions = "#{@themeAssetExtensions()}|#{fontExtensions}|#{safariExtensionExtensions}"
363
		return (extensions: themeAssetExtensions, path: (path.join app.themeForSiteDataDirectory, 'static'))
364
365
366
367
368

	#
	# Copies assets to the destination
	#
	copyAssets: (assets, destinationPath) =>
369
		# @log.info "Copying assets: #{assets.path} to #{destinationPath}"
370
		(globAsync "#{assets.path}/**/*@(#{assets.extensions})")
371
			.series (file) =>
372
373
374
375

				# Check if the file has been deleted since the previous render.
				# (If so, do not )

376
377
				baseName = path.basename file
				dirName = path.dirname file
378
379
380
381
				dirName = dirName.replace assets.path, ''
				destinationPathForDirectory = path.join destinationPath, dirName
				destinationPathForFile = path.join destinationPathForDirectory, baseName

382
				# @log.info "Copying asset: #{file}"
383

384
				fs.ensureDirAsync destinationPathForDirectory
385
					.then =>
386
						fs.copyAsync file, destinationPathForFile
387

388

389
390
391
392
393
394
395
396
397
398
399
	#
	# Returns whether the string1 ends with string2
	#
	# Note: We also check for > 0, below, to handle the edge case
	# where string1 does not end with string2 (so index = -1)
	# and where string2 is one character longer than string1,
	# resulting in string1.length - string2.length == -1 == index.
	#
	endsWith: (string1, string2) ->
		index = string1.indexOf(string2)
		index > 0 && index == string1.length - string2.length
400
401
402
403
404
405
406


	#
	# Removes deleted files (when using the cache)
	#
	removeDeletedFiles: =>

407
		# @log.info "Looking for deleted files in the content…"
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430

		# Only relevant for content at the moment (as we do a complete rebuild
		# whenever the theme changes and this will not even get called).

		for file of @contentPreviousModifiedTimes
			if @contentCurrentModifiedTimes[file] == undefined
				# File has been deleted. Make sure its compiled assets are removed
				# from the build also.

				console.log "\t✓ Source file #{file} was deleted; removing from build."

				# Set the base of the path of the item to delete
				pathToDelete = if @isRenderingDataForApp then app.dataForAppDirectory else app.dataForSiteDirectory

				if @endsWith file, 'index.md'
					#
					# If the index.md of an entry is removed, we assume that the entry itself was 
					# removed and delete its folder from the build. 
					#
					filePathComponents = file.split('/')
					filePathComponents.pop()
					_file = filePathComponents.join('/')
					
431
432
433
434
435
436
					pathToDelete = path.join pathToDelete, _file
				else if @endsWith file, '.md'
					#
					# MD files are translated to HTML files.
					#
					_file = file.replace('.md', '.html')
437
438
439
440
441
442
443
					pathToDelete = path.join pathToDelete, _file
				else
					#
					# For all other files, just remove them directly
					#
					pathToDelete = path.join pathToDelete, file

444
				# @log.info "About to delete #{pathToDelete} from the build folder (data)."
445
446
447
448

				fs.removeSync pathToDelete


449
450
451
	#
	# Renders Blockdown from content path to data path.
	#
452
	renderBlockdown: (contentPath, dataPath, theme) =>
453
		indices = {}
454
		titleOfPage = ''
455
456

		# TODO: Refactor so we do not have to check for site/app render here.
457
		isRenderingDataForApp = theme is app.appTheme
458
459
460
461
		isRenderingDataForSite = !isRenderingDataForApp

		# Used to hold the index pages of the various categories.
		indexContent =
462
			spotlight: {}
Aral Balkan's avatar
Aral Balkan committed
463
			sites: {}
464
465
			trackers: {}
			none: {}
466

467
		(globAsync "#{contentPath}/**/*.md", {})
468
			.series (file) =>
469
				# @log.info "Reading blockdown file: #{file}"
470
471
472
473
474
475
476
477

				#
				# Check cache
				#
				if @useContentCache
					parsedFilePath = (path.parse file)
					filePathKey = path.join parsedFilePath.dir.split('/')[5..].join('/'), parsedFilePath.base

478
479
480
					# @log.info "File path key: #{filePathKey}"
					# @log.info "Current: #{@contentCurrentModifiedTimes[filePathKey]}"
					# @log.info "Previous: #{@contentPreviousModifiedTimes[filePathKey]}"
481
482

					if @contentCurrentModifiedTimes[filePathKey] == @contentPreviousModifiedTimes[filePathKey]
483
						
484
485
						################################################################################
						#
486
						# Use cache.
487
488
489
						#
						################################################################################

490
						# @log.info "Cache: Using existing file."
491
						
492
493
494
495
496
						#
						# Add the rule partial to the rules array
						#
						rulePartialFilePath = path.join app.cacheDirectory, 'rule-partials', "#{filePathKey}.rule.json"

497
						# @log.info "Rule partial file path: #{rulePartialFilePath}"
498
499
500
501
502
503
504
505
506

						if fs.existsSync rulePartialFilePath
							rulePartialString = fs.readFileSync rulePartialFilePath, 'utf-8'
							rulePartialArray = JSON.parse rulePartialString
							@rules = @rules.concat rulePartialArray

						#
						# Update the indices
						#
507
						outputHTMLFileName = (filePathKey.replace '.md', '.html')
Aral Balkan's avatar
Aral Balkan committed
508
509
510
511
512
513
						indexPathComponents = outputHTMLFileName.split('/')
						indexHTMLFileName = indexPathComponents.pop()
						indexRelativeFolder = "/#{indexPathComponents.join('/')}"

						if indices[indexRelativeFolder] is undefined
							indices[indexRelativeFolder] = []
514
515
516
517
						indices[indexRelativeFolder].push indexHTMLFileName

						indexInfoJSONFileName = "#{indexHTMLFileName}.json"
						indexInfoJSONFilePath = path.join app.indicesDirectory, indexRelativeFolder, indexInfoJSONFileName
518
519
520
521
522
523
						indexInfoExists = fs.existsSync indexInfoJSONFilePath
						if indexInfoExists
							indexInfo = fs.readJsonSync indexInfoJSONFilePath
							indexContent[indexInfo.category][indexInfo.titleOfPageClean] = indexInfo.listItem
						else
							console.log "\t⚠ Could not find index info JSON from path #{indexInfoJSONFilePath}. Ignoring…" 
524

525
						# Break out so we don’t fall through a fresh render.
526
						return
527

528
529
530

				################################################################################
				#
531
				# Fresh render.
532
533
534
				#
				################################################################################

535
536
				fs.readFileAsync file, 'utf-8'
					.then (content) =>
537
538
539

						if @useContentCache
							console.log "\t✓ Rendering Blockdown in file #{file}"
540

541
						fileComponents = file.split('/')
542

Aral Balkan's avatar
Aral Balkan committed
543
						categoryIndex = ('sites' in fileComponents and fileComponents.indexOf('sites')) or ('trackers' in fileComponents and fileComponents.indexOf('trackers'))
544

545
						category = if categoryIndex then fileComponents[categoryIndex] else 'none'
546
						pageDomain = if categoryIndex then fileComponents[categoryIndex+1] else 'none'
547

548
549
550
						# Add more specific ‘site’ or ‘tracker’ to category class list if necessary
						categoryForTemplate = if categoryIndex and categoryIndex == (fileComponents.length - 3) then "#{category} #{category.substr(0, category.length-1)}" else category

551
552
553
554
555
						#
						# Add the actions to the bottom of pages that have both a category and a domain
						# (i.e., content pages)
						#

556
						if isRenderingDataForSite and category isnt 'none' and pageDomain isnt 'none'
557
558
							actionsBlock = """

559
560
## About Better

561
Better is a Safari content blocker for <a href='https://itunes.apple.com/us/app/better-by-ind.ie/id1080964978?mt=8'>iPhone</a>, <a href='https://itunes.apple.com/us/app/better-by-ind.ie/id1080964978?mt=8'>iPad</a>, and <a href='https://safari-extensions.apple.com/details/?id=better.fyi.safari.extension-5Q42VF5GXA'>Mac</a>. It protects you from trackers and malvertising by enforcing the principles of [Ethical Design](https://ind.ie/ethical-design).
562

563
564
565
566
567
568
569
## Get involved

[Improve page](https://source.ind.ie/better/content/blob/master/#{category}/#{pageDomain}/index.md) | [Report issue](https://source.ind.ie/better/content/issues)  | [Discuss](https://forum.ind.ie/c/better)

"""
							content += actionsBlock

570
						@pathOfTheFileCurrentlyBeingRendered = file
571
572
573

						# Save the title of the page so that we can update the <head>
						# information, later.
574
						titleFromHeading = content.match /^# (.*)?\n/
575

576
						if titleFromHeading is undefined or titleFromHeading is null
577
578
							titleOfPage = ""
						else
579
580
581
							# Set the title of the page
							titleOfPage = titleFromHeading[1]

582
583
584
585
586
							# Strip the markdown from the title for the site (keep it for the app
							# as we use it to intelligently truncate the title displayed in the navitation bar).
							# Only valid markup for headings is strong and emphasized, so just strips the *s.
							titleOfPageClean = titleOfPage.replace new RegExp('\\*', 'g'), ''

587
							# Update the index page
588
							renderedTitleOfPage = titleOfPage.replace /^\*\*(.*?)\*\*(.*)/, "<strong>$1</strong>$2"
589
590
							listItem = "\t<li><a href='/#{category}/#{pageDomain}'>#{renderedTitleOfPage}</a></li>\n"
							indexContent[category][titleOfPageClean] = listItem
591

Aral Balkan's avatar
Aral Balkan committed
592
593
							#
							# Save the index title info for the cache
594
595
							# (only if we’re running in development. The cache feature
							# is currently not used in production.)
Aral Balkan's avatar
Aral Balkan committed
596
							#
597
598
599
600
601
602
603
604
605
606
							if app.isRunningInDevelopment
								pathObject = path.parse file
								relativeFolder = pathObject.dir.replace contentPath, ''
								jsonFileName = "#{pathObject.name}.html.json"
								indexCacheFile = path.join app.indicesDirectory, relativeFolder, jsonFileName
								indexCacheObject =
									category: category
									titleOfPageClean: titleOfPageClean
									listItem: listItem

607
608
								# @log.info "Index cache file: #{indexCacheFile}"
								# @log.info "Index cache object:"
609
610
								# console.log indexCacheObject
								fs.outputJSONSync indexCacheFile, indexCacheObject
Aral Balkan's avatar
Aral Balkan committed
611
612
							

613
							#
614
							# Apply data theme specialisations…
615
616
617
							#
							# (Note: the app-specific specialisations that were here have now been moved to the iOS App.)
							if isRenderingDataForSite
618
619
620
621
622
623
								#
								# Is rendering data for site
								#
								# Strip the markdown from the title for the site (keep it for the app
								# as we use it to intelligently truncate the title displayed in the navitation bar).
								# Only valid markup for headings is strong and emphasized, so just strips the *s.
624
								titleOfPage = "#{titleOfPageClean} | Better"
625

626
627
628
629
630
						marked.setOptions
							renderer: @blockdownRenderer
							gfm: true

						marked content, (error, content) =>
631
							if error
632
633
634
635
								#
								# Blockdown parser fatal error: panic!
								#
								# Something went wrong in markdown parsing. This will lead to corrupted
636
637
								# data and we cannot have that. Better to fail and provide as much information as possible
								# so this can be debugged and fixed in development before hitting production.
638
								#
639
								throw new Error "[Blockdown: parser] #{error} #{error.stack}"
640
							else
641
642
643
								pathObject = path.parse file

								# Find out which relative folder we’re in
644
								relativeFolder = pathObject.dir.replace contentPath, ''
645

646
647
								#console.log "*** Relative folder: #{relativeFolder}"
								#console.log "*** pathObject.name: #{pathObject.name}"
Aral Balkan's avatar
Aral Balkan committed
648

649
								if relativeFolder is ''
650
651
									relativeFolder = '/'

652
								if indices[relativeFolder] is undefined
653
654
655
656
657
									indices[relativeFolder] = []

								htmlFileName = "#{pathObject.name}.html"
								indices[relativeFolder].push htmlFileName

658
								#console.log "*** HTML file name: #{htmlFileName}"
Aral Balkan's avatar
Aral Balkan committed
659

660
661
								# Selected item
								selectedItem = ''
662
663
664
								currentDirectory = pathObject.dir
								parts = currentDirectory.split('/content/')
								if parts.length >= 2
665
									selectedItem = parts[1]
666

667
668
669
								# Path for page.
								pagePath = file.replace contentPath, dataPath
								pagePath = pagePath.replace '.md', '.html'
670

671
672
								# Render the page.
								headerPartialHTML = @partial(set.render(@theme.header, {navigationList: @navigationList selectedItem}))
673
								pageHTML = set.render(@theme.page, {title: titleOfPage, header: headerPartialHTML, content: content, category: categoryForTemplate})
674
								pageHTML = @wrapSections pageHTML
675
								fs.outputFileAsync pagePath, pageHTML
676
			.then =>
677
678
				# Sort the trackers and spotlight indices alphabetically.
				indexContent.trackers = @alphabetiseIndex indexContent.trackers
Aral Balkan's avatar
Aral Balkan committed
679
				indexContent.sites = @alphabetiseIndex indexContent.sites
680

681
682
683
684
685
				# Only render pages from themes if the theme has changed
				Promise.try =>
					if !@useContentCache
						# Render the /news index.
						@renderPage 'news', dataPath, '/news/index.html', {}, 'news'
Aral Balkan's avatar
Aral Balkan committed
686
			.then =>
687
688
689
690
691
				# Only render pages from themes if the theme has changed
				Promise.try =>
					if !@useContentCache
						# Render the /spotlight index.
						@renderPage 'spotlight', dataPath, '/spotlight/index.html', {}, 'spotlight'
Aral Balkan's avatar
Aral Balkan committed
692
			.then =>
693
694
695
696
697
				# Only render pages from themes if the theme has changed
				Promise.try =>
					if !@useContentCache
						# Render the /spotlight/1 index.
						@renderPage 'spotlightIssue1', dataPath, '/spotlight/1/index.html', {}, 'spotlight'
Aral Balkan's avatar
Aral Balkan committed
698
			.then =>
699
700
701
702
703
704
705
				# Don’t render index files if the theme and content haven’t changed.
				Promise.try =>
					if @useContentCache and @contentHasNotChanged
						return
					else
						# Render the /sites index.
						@renderPage 'sites', dataPath, '/sites/index.html', {list: indexContent.sites}, 'sites'
706
			.then =>
707
708
709
710
711
712
713
				# Don’t render index files if the theme and content haven’t changed.
				Promise.try =>
					if @useContentCache and @contentHasNotChanged
						return
					else
						# Render /trackers index.
						@renderPage 'trackers', dataPath, '/trackers/index.html', {list: indexContent.trackers}, 'trackers'
714
			.then =>
715
716
717
718
719
720
				# Only render pages from themes if the theme has changed
				Promise.try =>
					if !@useContentCache
						if isRenderingDataForSite
							# Render /reviews index.
							@renderPage 'reviews', dataPath, '/reviews/index.html', {}, 'reviews'
721
			.then =>
722
723
724
725
726
727
				# Only render pages from themes if the theme has changed
				Promise.try =>
					if !@useContentCache
						if isRenderingDataForSite
							# Render /support index.
							@renderPage 'support', dataPath, '/support/index.html', {}, 'support'
728
			.then =>
729
730
731
732
733
734
				# Only render pages from themes if the theme has changed
				Promise.try =>
					if !@useContentCache
						# Render home page.
						@renderHomePageStatistics().then (homePageStatisticsHTML) =>
							@renderPage 'home', dataPath, '/index.html', {statistics: homePageStatisticsHTML}, 'home'
735
			.then =>
736
				#@log.info "Indices:"
Aral Balkan's avatar
Aral Balkan committed
737
				#console.log indices
738
739
				return indices

740
741
742
743
744
	#
	# Takes an index object and returns a string of the list items, alphabetically sorted by the key.
	#
	alphabetiseIndex: (index) =>
		keys = Object.keys index
745
		keys.sort (a,b) -> a.localeCompare(b, 'en', {'sensitivity': 'base'})
746
747
748
749
750
		keys.reduce (previous, current) ->
			previous += index[current]
		, ''


751
752
753
	#
	# Renders a page on the site.
	#
754
	renderPage: (name, dataPath, pathFragment, data={}, category='none') =>
755
756
757
758
		theme = @theme[name]
		contentPartialHTML = @partial(set.render(theme, data))
		navigationList = @navigationList name
		headerPartialHTML = @partial(set.render(@theme.header, {navigationList: navigationList}))
759
760
761
762
763


		title = name[0].toUpperCase() + name.slice(1) # title is the name with the first letter capitalised.

		if @isRenderingDataForSite
Aral Balkan's avatar
Aral Balkan committed
764
			title += ' | Better'
765

766
		pageHTML = set.render(@theme.page, {title: title, header: headerPartialHTML, content: contentPartialHTML, category: category})
767
768
769
		pagePath = path.join dataPath, pathFragment
		fs.outputFileAsync pagePath, pageHTML

770
771
772
773
774
775
776
777
778
779
780
781
782
	#
	# Returns configured navigation list data structure with the
	# passed navigation element marked as selected.
	#
	navigationList: (selectedItem) =>
		#
		# Navigation list for local links
		#
		# (The forum and source links are not here as they’re external and cannot be
		# highlighted dynamically in the navigation when navigated to.)
		navigationList =
		[
			{ class: 'home', link: '/', label: 'Home', selected: false}
Aral Balkan's avatar
Aral Balkan committed
783
			{ class: 'news', link: '/news', label: 'News', selected: false}
Aral Balkan's avatar
Aral Balkan committed
784
			{ class: 'sites', link: '/sites', label: 'Sites', selected: false}
785
			{ class: 'trackers', link: '/trackers', label: 'Trackers', selected: false}
786
			{ class: 'support', link: '/support', label: 'Support', selected: false}
787
788
789
790
791
792
793
		]

		for i in [0...navigationList.length]
			navigationList[i].selected = selectedItem.startsWith navigationList[i].class

		return navigationList

794

795
796
797
	#
	# Saves the WebKit content blocking rule file, blockerList.json.
	#
798
799
	saveBlockerListJSON: (dataDirectory) =>
		blockerListJSONFilePath = path.join dataDirectory, 'blockerList.json'
800
		blockerListJSONHashFilePath = path.join dataDirectory, 'blockerList.json.md5'
Aral Balkan's avatar
Aral Balkan committed
801

802
803
804
		# In development, pretty print the JSON to make it easier to debug.
		# In production, minify it for better performance.
		args = if app.isRunningInDevelopment then [@rules, null, 4] else [@rules]
805
806
807
808
		output = JSON.stringify.apply @, args
		hash = checksum(output)
		(fs.outputFileAsync blockerListJSONFilePath, JSON.stringify.apply @, args).then =>
			fs.outputFileAsync blockerListJSONHashFilePath, hash
Aral Balkan's avatar
Aral Balkan committed
809

810
811
812
813
814
	#
	# Saves the rule partials in JSON format.
	#
	saveRulePartials: =>

815
		for pathOfOriginalFile, rules of @rulePartials
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
			
			# Determine where to store the partial rule (used in caching so that we
			# can create the blocker list without having to render every rule every time).
			parsedPathOfOriginalFile = path.parse pathOfOriginalFile
			partialRuleFileRelativePath = parsedPathOfOriginalFile.dir.split('/')[5..].join('/')
			partialRuleFilePath = path.join app.rulePartialsDirectory, partialRuleFileRelativePath

			fs.ensureDirSync partialRuleFilePath 

			partialRuleFile = path.join partialRuleFilePath, "#{parsedPathOfOriginalFile.base}.rule.json"

			#console.log "Partial rule file: #{partialRuleFile}"
			args = if app.isRunningInDevelopment then [rules, null, 4] else [rules]

			rulesJSON = (JSON.stringify.apply @, args)

			#console.log "Rules: #{rulesJSON}"

			fs.outputFileSync partialRuleFile, rulesJSON, 'utf-8'


837
838
	#
	# Create the index HTML files for the site.
839
	# UNUSED. TODO: Refactor or remove.
840
841
	#
	createIndexFilesForSiteData: (indices) =>
842
843
844
845
846
847
848

		directoryPaths = Object.keys indices

		Promise.series directoryPaths, (directoryPath) =>

			fileNames = indices[directoryPath]

849
			indexHTMLFilePath = path.join app.dataForSiteDirectory, directoryPath, 'index.html'
850
851
852
853
854
855

			indexHTML = "<ul>\n"
			for fileName in fileNames
				indexHTML += "\t<li><a href='#{fileName}'>#{fileName}</a>\n"
			indexHTML += "</ul>"

856
857
858
859
860
861
862
			indexHTML = app.siteTheme.header + indexHTML + app.siteTheme.footer

			fs.outputFileAsync indexHTMLFilePath, indexHTML


	#
	# Create the index JSON files for the app data.
863
	# UNUSED. TODO: Refactor or remove.
864
865
866
867
868
869
870
871
872
	#
	createIndexFilesForAppData: (indices) =>

		directoryPaths = Object.keys indices

		Promise.series directoryPaths, (directoryPath) =>

			fileNames = indices[directoryPath]

873
			indexJSONFilePath = path.join app.dataForAppDirectory, directoryPath, 'index.json'
874
875
876
877
878

			# Generate the index.json for the app.
			indexJSON = JSON.stringify {files: fileNames}


879
			# Save the JSON index file.
880
881
882
			fs.outputFileAsync indexJSONFilePath, indexJSON


883
884
885
886
	#
	# Hook into the marked Markdown renderer for the Blockdown MSON code sections.
	#
	codeRenderer: (code, language) =>
887
		# @log.info "Language: #{language}, Code: >#{code}<"
888
889
890

		originalCode = code

891
		if language is 'mson'
892

893
			# @log.info "Rendering Blockdown MSON in #{@pathOfTheFileCurrentlyBeingRendered}"
894

895
896
897
898
			# Create the dictionary entry in the table of files currently being rendered
			# to their rule partials if one does not already exist.
			if @rulePartials[@pathOfTheFileCurrentlyBeingRendered] == undefined
				@rulePartials[@pathOfTheFileCurrentlyBeingRendered] = []
899

900
901
902
903
904
			# Convert blocker rule MSON to JSON

			# Do a very strict conversion of the MSON based on the block list JSON specification.

			# Trim any leading or trailing whitespace
Aral Balkan's avatar
Aral Balkan committed
905
			code = code.trim()
906

907
908
			# Handle authoring-time optimisation: if no dashes are found in the code,
			# assume that it is a url-filter entry and transform it into one.
909
910
			if (code.indexOf '-') != 0 and (code.indexOf "\t-") != 0 and (code.indexOf '  -') != 0
				# console.log "Transforming naked url-filter rule: #{code}"
911
912
				code = "- url-filter: #{code}"

913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
			# Flatten the key/value pairs
			safeDelimeter = '\udbff\udfff'
			code = code.replace /^[\t, ]*-[\t, ]*(.*?):[\t, ]*(.*?)$/mg, "$1#{safeDelimeter}$2"

			# Split into lines
			lines = code.split "\n"

			#
			# Create the rule.
			#
			rule = new ContentBlockerRule

			lines.forEach (line) ->
				keyValuePair = line.split safeDelimeter
				key = keyValuePair[0]
				value = keyValuePair[1]
929
				# console.log "#{key} = #{value}"
930
931
932
933
934
				rule[key](value)

			# Lint the content blocker JSON rule string
			try
				@lintRule rule.value()
935
936
			catch error
				throw new Error "[Blockdown: blocker rule] Lint error, please check that your block rule MSON is valid. #{error} #{error.stack}"
937
938
939
940

			# Add the linted rule to the rules for this Markdown document.
			@rules.push rule.value()

941
942
943
			# Add the linted rule to the rules for the file that’s currently being rendered.
			@rulePartials[@pathOfTheFileCurrentlyBeingRendered].push rule.value()

944

945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
			#
			# Add basic syntax highlighting to the original code in HTML.
			#
			renderedCode = @markdownRenderer.code originalCode, language

			dash = "<span class='punctuation'>-</span>"
			colon = "<span class='punctuation'>:</span>"
			matches = renderedCode.matches(/-([\t ]*)(.+?):([\t ]*)(.*?)$/gm)

			for _ in matches
				whitespaceAfterDash = _.groups[0]
				key = _.groups[1]
				whitespaceAfterColon = _.groups[2]
				value = _.groups[3]

				if key in ['trigger', 'action']
					# Top-level key, no value (just key).
					key = "<span class='key top-level'>#{key}</span>"
				else
					key = "<span class='key'>#{key}</span>"
					value = "<span class='value'>#{value}</span>"

				highlightedCode = "#{dash}#{whitespaceAfterDash}#{key}#{colon}#{whitespaceAfterColon}#{value}"
				renderedCode = renderedCode.replace _.match, highlightedCode

			return renderedCode

		# Render as regular code.
973
974
		return @markdownRenderer.code originalCode, language

975

976
977
978
979
980
981
	#
	# Image renderer.
	#
	# Adds image size support to Markdown.
	# Based on https://github.com/chjj/marked/issues/339#issuecomment-40975942
	#
982
	imageRenderer: (href, title, text) =>
983
984
985
986
987
988
989
990
991
992
993

		if title
			size = title.split 'x'
			if size[1]
				size = 'width=' + size[0] + ' height=' + size[1]
			else
				size = 'width=' + size[0]
		else
			size = ''

		return "<img src=\"#{href}\" alt=\"#{text}\" #{size}>"
994

995
996

	#
997
998
999
	# Heading renderer
	#
	# Headings let us know which section of the page we’re in so we can
1000
	# handle subsequent data accordingly.
1001
1002
1003
1004
	#
	headingRenderer: (text, level, raw) =>

		lowercaseText = text.toLowerCase()
1005
1006
1007
1008

		@parserIsInStatisticsSection = false
		switch lowercaseText
			when 'after better' then @parserIsInStatisticsSection = true
1009
1010
1011
1012

		# Fall back to plain Markdown renderer.
		return @markdownRenderer.heading text, level, raw

Aral Balkan's avatar
Aral Balkan committed
1013
1014
1015
1016
1017

	#
	# List renderer.
	#
	listRenderer: (body, ordered) =>
1018
1019
		if @parserIsInStatisticsSection
			return @renderStatistics body
Aral Balkan's avatar
Aral Balkan committed
1020
1021
1022
		return @markdownRenderer.list body, ordered


1023
1024
1025
1026
	#
	# List item renderer
	#
	# List items are the primary means by which we implment higher-order
1027
1028
1029
1030
	# semantics for Blockdown. List items that start with certain keywords are rendered
	# using specialised templates.
	#
	listItemRenderer: (text) =>
1031
		# @log.info "List item: #{text}"
1032

1033
		# Trackers badge.
Aral Balkan's avatar
Aral Balkan committed
1034
		if text.startsWith '(Trackers)' or text.startsWith '(trackers)' then return @renderTrackersBadgeListItem text
1035

1036
1037
1038
1039
1040
		#
		# Static badges.
		#
		staticBadges =
		[
Aral Balkan's avatar
Aral Balkan committed
1041
1042
1043
1044
1045
1046
			['(Aggressive)', 'Attempts to block malware blockers.'],
			['(Doorslam)', 'Interrupts your flow with modal dialogs.'],
			['(Clickbait)', 'Third-party exploitative content.'],
			['(Fingerprint)', 'Canvas fingerprinting.'],
			['(Web bug)', 'Invisible tracking pixel.'],
			['(Tracker)', 'Monetises you.']
1047
1048
		]

1049
		for badge in staticBadges
1050
1051
			title = badge[0]
			explanation = badge[1]
1052
1053
1054
1055
1056

			# Support title variants to be forgiving (e.g., case and whitespace insensitive) in authoring
			forgivingTitle = title.replace(/\s/g, '').toLowerCase()
			forgivingText = text.replace(/\s/g, '').toLowerCase()

1057
			if forgivingText.startsWith "#{forgivingTitle}" then return @renderStaticBadgeListItem title.toLowerCase(), explanation
1058

1059
1060
1061
1062
		#
		# No known vocabulary matches, render as regular Markdown.
		#
		return @markdownRenderer.listitem text
1063
1064


1065
1066
1067
1068
1069
	#
	# Renders a static badge list item (these are the badges that contain an icon, title, and explanation).
	# The icon is added in the CSS (see the theme-for-app-data and theme-for-site-data repositories).
	#
	renderStaticBadgeListItem: (title, explanation) =>
1070

1071
		# @log.info "About to render static badge with title: #{title}"
1072

Aral Balkan's avatar
Aral Balkan committed
1073
1074
		title = title.replace('(', '').replace(')', '')

1075
		badgeID = title.replace(' ', '-')
1076
		staticBadgeListItem = "\t<li id='#{badgeID}' class='static-badge'>\n\t\t<h3 class='badge-title'>#{title}</h3>\n\t\t<p class='badge-explanation'>#{explanation}</p>\n\t</li>\n"
1077
		return staticBadgeListItem
1078

1079

1080
1081
1082
1083
	#
	# Renders the trackers badge list item.
	#
	renderTrackersBadgeListItem: (text) =>
1084
		moreDetailsHTML = 'Exposes you to third-party sites.'
1085

1086
1087
1088
		#
		# Get the trackers.
		#
1089

1090
1091
		# Get the URLs of the individual trackers
		lines = (text.split "\n")
1092

Aral Balkan's avatar
Aral Balkan committed
1093
			# We will always have more than zero items in the list of lines, as otherwise an entry would not exist for this site, but
1094
1095
1096
1097
1098
1099
1100
		# check just in case (to account for potentially corrupted content).
		if lines.length >= 4
			# Remove the first line (the opening UL) and the last two lines (closing UL, followed by an empty line)
			lines = lines[1..lines.length-3]

			# The remaining lines are the URLs of the trackers
			numberOfTrackers = 0
1101
			trackersHash = {}
1102
1103
			lines.forEach (line) =>
				numberOfTrackers++
1104
1105
1106

				domain = line.replace '<li>', ''
				domain = domain.replace '</li>', ''
1107
				domain = domain.replace '<!-- /trackersList -->',