################################################################################ # # Better # # Blockdown Parser: parses Blockdown content and generates the # data for the Beter site and iOS app. # # This is Independent Technology. # # ▲❤ We practice Ethical Design (https://ind.ie/ethical-design) # # © Aral Balkan. © Ind.ie. All Rights Reserved. # Released with love by Ind.ie under GNU AGPLv3 or later. # Free as in freedom. Please see the LICENSE file. # ################################################################################ fs = require 'fs-extra-as-promised' path = require 'path' marked = require 'marked' jsonlint = require 'jsonlint' chai = require 'chai' expect = chai.expect assert = chai.assert chai.should() Promise = require 'thrush' glob = require 'glob' globAsync = Promise.promisify glob eyespect = require 'eyespect' inspect = eyespect.inspector { stream: null } winston = require 'winston' app = require './App' ContentBlockerRule = require './ContentBlockerRule' set = require 'indie-set' GlobalStatistics = require './GlobalStatistics' require './StringExtensions' # for String::trim() require 'crypto' # # Calculate checksum # checksum = (str) -> return crypto .createHash('md5') .update(str, 'utf8') .digest('hex') class Blockdown log: null site: "site" app: "app" markdownRenderer: null # (marked.Renderer) blockdownRenderer: null # (marked.Renderer) rules: null # (Array) rulePartials: null # (Object) parserIsInStatisticsSection: false numberOfTrackers: 0 pathOfTheFileCurrentlyBeingRendered: null theme: null # # Cache-related. # useContentCache: false contentHasNotChanged: false contentCurrentModifiedTimes: null contentPreviousModifiedTimes: null # # Public methods. # constructor: -> # Initialise properties @rules = [] @rulePartials = {} # Initialise two renderers (one for regular Markdown, the # other for the Blockdown code sections). @markdownRenderer = new marked.Renderer() @markdownRenderer.image = @imageRenderer @markdownRenderer.options.headerPrefix = '' # Otherwise we get undefinedheader-slug @markdownRenderer.options.langPrefix = '' # Ditto for @markdownRenderer.options.gfm = true @blockdownRenderer = new marked.Renderer() @blockdownRenderer.code = @codeRenderer @blockdownRenderer.image = @imageRenderer @blockdownRenderer.list = @listRenderer @blockdownRenderer.listitem = @listItemRenderer @blockdownRenderer.heading = @headingRenderer # Set up logging. logFile = path.join app.logsDirectory, 'Blockdown.log' @log = new winston.Logger transports: [ new (winston.transports.Console)({ level: 'info'}) new (winston.transports.File)({ filename: logFile, level: 'debug' }) ] # Setup cache @setupCache() # @log.debug "Blockdown initiated. Logs at #{logFile}" # # Setup cache. # # We are now using the cache both during development and production as we # use the content cache to calculate the diff of changes since the last # deployment. # setupCache: => # # Note: I do not trust the cache for use in production so we detect that and turn it off. # ===== A production deployment should always do a full, clean build. # cacheDirectory = path.join app.homeDirectory, '.private', 'cache' themeCacheCurrentModifiedTimesFile = path.join cacheDirectory, 'current-modified-times', 'themes.json' themeCachePreviousModifiedTimesFile = path.join cacheDirectory, 'previous-modified-times', 'themes.json' themeCacheCurrentModifiedTimes = fs.readJsonSync themeCacheCurrentModifiedTimesFile, 'utf-8' themeCachePreviousModifiedTimes = fs.readJsonSync themeCachePreviousModifiedTimesFile, 'utf-8' # Check if there are any template changes (if there are, this will require a full rebuild), # otherwise, we can use the cache and we won’t need to re-render the content. @useContentCache = true for fileName, lastModifiedDate of themeCachePreviousModifiedTimes if (fileName.indexOf '/templates/') != -1 # This is a template, check if it has changed if lastModifiedDate != themeCacheCurrentModifiedTimes[fileName] # There are changes to a theme template, do not use the cache. @log.info "\t✓ Theme template changed (#{fileName}), *not* using cache." @useContentCache = false break # Set up the content cache (we will use it, if nothing else, to calculate # the last modified time for the metadata). contentCacheCurrentModifiedTimesFile = path.join cacheDirectory, 'current-modified-times', 'content.json' contentCachePreviousModifiedTimesFile = path.join cacheDirectory, 'previous-modified-times', 'content.json' _contentCurrentModifiedTimes = fs.readFileSync contentCacheCurrentModifiedTimesFile, 'utf-8' _contentPreviousModifiedTimes = fs.readFileSync contentCachePreviousModifiedTimesFile, 'utf-8' @contentHasNotChanged = _contentCurrentModifiedTimes == _contentPreviousModifiedTimes @contentCurrentModifiedTimes = JSON.parse(_contentCurrentModifiedTimes) @contentPreviousModifiedTimes = JSON.parse(_contentPreviousModifiedTimes) # Inform person if we are using the content cache. if @useContentCache console.log "\t✓ No changes to theme templates; using cache." # # Render data for site. # renderDataForSite: => # @log.debug "Rendering data for site." @theme = app.siteTheme # Reset the rules @rules = [] # Reset the global statistics for the site render. GlobalStatistics.reset() # # TODO: Need a separate pass to create the indices as they # will include folders as well as the content files. # # This could be done at the end, on the data folder. # indices = {} Promise.try => # Clean the data directory before starting only if we’re not using the cache. if @useContentCache #console.log "\t✓ Using cache" return true else # @log.info "Not using cache: returning promise to delete the data for site directory from Promise.try" return fs.removeAsync "#{app.dataForSiteDirectory}/*" .then => @renderBlockdown app.contentDirectory, app.dataForSiteDirectory, app.siteTheme .then (indices) => # TODO: Make this async # TODO: Update render pipeline to only render content (and these partials) # in the content pass. @saveRulePartials() @saveBlockerListJSON app.dataForSiteDirectory .then => Promise.try => if @useContentCache and @contentHasNotChanged return else @copyAssets @contentAssets('site'), app.dataForSiteDirectory .then => Promise.try => if @useContentCache and app.themeHasNotChanged console.log "\t✓ Theme has not changed, not copying theme assets for site." return else console.log "\t✓ Copying theme assets for site." @copyAssets @themeForSiteDataAssets(), app.dataForSiteDirectory .then => Promise.try => if @useContentCache @removeDeletedFiles() .catch (e) => throw new Error "[Blockdown: site data] #{e} #{e.stack}" # # Private methods. # # Returns the partial HTML after a Set render. partial: (fullHTML) -> (fullHTML.replace '', '').replace('', '') # # Takes HTML rendered from Markdown and wraps sections into section tags, # transferring the id from the H2 to the section tag. # wrapSections: (html) -> # Write out the opening section tags html = html.replace(/

(.*?)<\/h2>/g, "
\n\t

$2

") # Write out the closing section tags html = html.replace(/()/g, "\n
\n$1") # Remove the erroneous first tag html = html.replace('', '') # Write out the last tag html = html.replace('', '') imageExtensions: ['.jpg', '.gif', '.png', '.svg'] contentAssets: => dataExtensions = ['.json', '.html', '.js'] contentAssetExtensions = (@imageExtensions.concat dataExtensions).join '|' contentAssetPath = app.contentDirectory return (extensions: contentAssetExtensions, path: contentAssetPath) # Now that we have the new unified themes repository, we can just copy any # files from the /static/ folder over without worrying about limiting # to a subset of extensions but I’m keeping this as it is safer in that we # won’t accidentally let an unsupported file type get published. themeAssetExtensions: => return (['.woff', '.js', '.css', '.html'].concat @imageExtensions).join '|' themeForSiteDataAssets: => fontExtensions = ['.woff2', '.ttf', '.svg', '.eot'].join '|' safariExtensionExtensions = ['.safariextz', '.plist'].join '|' themeAssetExtensions = "#{@themeAssetExtensions()}|#{fontExtensions}|#{safariExtensionExtensions}" return (extensions: themeAssetExtensions, path: (path.join app.themeForSiteDataDirectory, 'static')) # # Copies assets to the destination # copyAssets: (assets, destinationPath) => # @log.info "Copying assets: #{assets.path} to #{destinationPath}" (globAsync "#{assets.path}/**/*@(#{assets.extensions})") .series (file) => # Check if the file has been deleted since the previous render. # (If so, do not ) baseName = path.basename file dirName = path.dirname file dirName = dirName.replace assets.path, '' destinationPathForDirectory = path.join destinationPath, dirName destinationPathForFile = path.join destinationPathForDirectory, baseName # @log.info "Copying asset: #{file}" fs.ensureDirAsync destinationPathForDirectory .then => fs.copyAsync file, destinationPathForFile # # Returns whether the string1 ends with string2 # # Note: We also check for > 0, below, to handle the edge case # where string1 does not end with string2 (so index = -1) # and where string2 is one character longer than string1, # resulting in string1.length - string2.length == -1 == index. # endsWith: (string1, string2) -> index = string1.indexOf(string2) index > 0 && index == string1.length - string2.length # # Removes deleted files (when using the cache) # removeDeletedFiles: => # @log.info "Looking for deleted files in the content…" # Only relevant for content at the moment (as we do a complete rebuild # whenever the theme changes and this will not even get called). for file of @contentPreviousModifiedTimes if @contentCurrentModifiedTimes[file] == undefined # File has been deleted. Make sure its compiled assets are removed # from the build also. console.log "\t✓ Source file #{file} was deleted; removing from build." # Set the base of the path of the item to delete pathToDelete = app.dataForSiteDirectory if @endsWith file, 'index.md' # # If the index.md of an entry is removed, we assume that the entry itself was # removed and delete its folder from the build. # filePathComponents = file.split('/') filePathComponents.pop() _file = filePathComponents.join('/') pathToDelete = path.join pathToDelete, _file else if @endsWith file, '.md' # # MD files are translated to HTML files. # _file = file.replace('.md', '.html') pathToDelete = path.join pathToDelete, _file else # # For all other files, just remove them directly # pathToDelete = path.join pathToDelete, file # @log.info "About to delete #{pathToDelete} from the build folder (data)." fs.removeSync pathToDelete # # Renders Blockdown from content path to data path. # renderBlockdown: (contentPath, dataPath, theme) => indices = {} titleOfPage = '' # Used to hold the index pages of the various categories. indexContent = spotlight: {} sites: {} trackers: {} none: {} (globAsync "#{contentPath}/**/*.md", {}) .series (file) => # @log.info "Reading blockdown file: #{file}" # # Check cache # if @useContentCache parsedFilePath = (path.parse file) # console.log(parsedFilePath) indexToStartFrom = if app.isRunningInProduction then 4 else 5 filePathKey = path.join parsedFilePath.dir.split('/')[indexToStartFrom..].join('/'), parsedFilePath.base # console.log(filePathKey) # @log.info "File path key: #{filePathKey}" # @log.info "Current: #{@contentCurrentModifiedTimes[filePathKey]}" # @log.info "Previous: #{@contentPreviousModifiedTimes[filePathKey]}" if @contentCurrentModifiedTimes[filePathKey] == @contentPreviousModifiedTimes[filePathKey] ################################################################################ # # Use cache. # ################################################################################ # @log.info "Cache: Using existing file." # # Add the rule partial to the rules array # rulePartialFilePath = path.join app.cacheDirectory, 'rule-partials', "#{filePathKey}.rule.json" # @log.info "Rule partial file path: #{rulePartialFilePath}" if fs.existsSync rulePartialFilePath rulePartialString = fs.readFileSync rulePartialFilePath, 'utf-8' rulePartialArray = JSON.parse rulePartialString @rules = @rules.concat rulePartialArray # # Update the indices # outputHTMLFileName = (filePathKey.replace '.md', '.html') indexPathComponents = outputHTMLFileName.split('/') indexHTMLFileName = indexPathComponents.pop() indexRelativeFolder = "/#{indexPathComponents.join('/')}" if indices[indexRelativeFolder] is undefined indices[indexRelativeFolder] = [] indices[indexRelativeFolder].push indexHTMLFileName indexInfoJSONFileName = "#{indexHTMLFileName}.json" indexInfoJSONFilePath = path.join app.indicesDirectory, indexRelativeFolder, indexInfoJSONFileName indexInfoExists = fs.existsSync indexInfoJSONFilePath if indexInfoExists indexInfo = fs.readJsonSync indexInfoJSONFilePath indexContent[indexInfo.category][indexInfo.titleOfPageClean] = indexInfo.listItem else console.log "\t⚠ Could not find index info JSON from path #{indexInfoJSONFilePath}. Ignoring…" # Break out so we don’t fall through a fresh render. return ################################################################################ # # Fresh render. # ################################################################################ fs.readFileAsync file, 'utf-8' .then (content) => if @useContentCache console.log "\t✓ Rendering Blockdown in file #{file}" fileComponents = file.split('/') categoryIndex = ('sites' in fileComponents and fileComponents.indexOf('sites')) or ('trackers' in fileComponents and fileComponents.indexOf('trackers')) category = if categoryIndex then fileComponents[categoryIndex] else 'none' pageDomain = if categoryIndex then fileComponents[categoryIndex+1] else 'none' # Add more specific ‘site’ or ‘tracker’ to category class list if necessary categoryForTemplate = if categoryIndex and categoryIndex == (fileComponents.length - 3) then "#{category} #{category.substr(0, category.length-1)}" else category # # Add the actions to the bottom of pages that have both a category and a domain # (i.e., content pages) # if category isnt 'none' and pageDomain isnt 'none' actionsBlock = """ ## About Better Better is a Safari content blocker for iPhone, iPad, and Mac. It protects you from trackers and malvertising by enforcing the principles of [Ethical Design](https://ind.ie/ethical-design). ## Get involved [Improve page](https://source.ind.ie/better/content/blob/master/#{category}/#{pageDomain}/index.md) | [Report issue](https://source.ind.ie/better/content/issues) | [Discuss](https://forum.ind.ie/c/better) """ content += actionsBlock @pathOfTheFileCurrentlyBeingRendered = file # Save the title of the page so that we can update the # information, later. titleFromHeading = content.match /^# (.*)?\n/ if titleFromHeading is undefined or titleFromHeading is null titleOfPage = "" else # Set the title of the page titleOfPage = titleFromHeading[1] # Strip the markdown from the title for the site (keep it for the app # as we use it to intelligently truncate the title displayed in the navitation bar). # Only valid markup for headings is strong and emphasized, so just strips the *s. titleOfPageClean = titleOfPage.replace new RegExp('\\*', 'g'), '' # Update the index page renderedTitleOfPage = titleOfPage.replace /^\*\*(.*?)\*\*(.*)/, "$1$2" listItem = "\t
  • #{renderedTitleOfPage}
  • \n" indexContent[category][titleOfPageClean] = listItem # # Save the index title info for the cache. # pathObject = path.parse file relativeFolder = pathObject.dir.replace contentPath, '' jsonFileName = "#{pathObject.name}.html.json" indexCacheFile = path.join app.indicesDirectory, relativeFolder, jsonFileName indexCacheObject = category: category titleOfPageClean: titleOfPageClean listItem: listItem # @log.info "Index cache file: #{indexCacheFile}" # @log.info "Index cache object:" # console.log indexCacheObject fs.outputJSONSync indexCacheFile, indexCacheObject # Strip the markdown from the title for the site (keep it for the app # as we use it to intelligently truncate the title displayed in the navitation bar). # Only valid markup for headings is strong and emphasized, so just strips the *s. titleOfPage = "#{titleOfPageClean} | Better" marked.setOptions renderer: @blockdownRenderer gfm: true marked content, (error, content) => if error # # Blockdown parser fatal error: panic! # # Something went wrong in markdown parsing. This will lead to corrupted # data and we cannot have that. Better to fail and provide as much information as possible # so this can be debugged and fixed in development before hitting production. # throw new Error "[Blockdown: parser] #{error} #{error.stack}" else pathObject = path.parse file # Find out which relative folder we’re in relativeFolder = pathObject.dir.replace contentPath, '' #console.log "*** Relative folder: #{relativeFolder}" #console.log "*** pathObject.name: #{pathObject.name}" if relativeFolder is '' relativeFolder = '/' if indices[relativeFolder] is undefined indices[relativeFolder] = [] htmlFileName = "#{pathObject.name}.html" indices[relativeFolder].push htmlFileName #console.log "*** HTML file name: #{htmlFileName}" # Selected item selectedItem = '' currentDirectory = pathObject.dir parts = currentDirectory.split('/content/') if parts.length >= 2 selectedItem = parts[1] # Path for page. pagePath = file.replace contentPath, dataPath pagePath = pagePath.replace '.md', '.html' # Render the page. headerPartialHTML = @partial(set.render(@theme.header, {navigationList: @navigationList selectedItem})) pageHTML = set.render(@theme.page, {title: titleOfPage, header: headerPartialHTML, content: content, category: categoryForTemplate}) pageHTML = @wrapSections pageHTML fs.outputFileAsync pagePath, pageHTML .then => # Sort the trackers and spotlight indices alphabetically. indexContent.trackers = @alphabetiseIndex indexContent.trackers indexContent.sites = @alphabetiseIndex indexContent.sites # Only render pages from themes if the theme has changed Promise.try => if !@useContentCache # Render the /news index. @renderPage 'news', dataPath, '/news/index.html', {}, 'news' .then => # Only render pages from themes if the theme has changed Promise.try => if !@useContentCache # Render the /spotlight index. @renderPage 'spotlight', dataPath, '/spotlight/index.html', {}, 'spotlight' .then => # Only render pages from themes if the theme has changed Promise.try => if !@useContentCache # Render the /spotlight/1 index. @renderPage 'spotlightIssue1', dataPath, '/spotlight/1/index.html', {}, 'spotlight' .then => # Don’t render index files if the theme and content haven’t changed. Promise.try => if @useContentCache and @contentHasNotChanged return else # Render the /sites index. @renderPage 'sites', dataPath, '/sites/index.html', {list: indexContent.sites}, 'sites' .then => # Don’t render index files if the theme and content haven’t changed. Promise.try => if @useContentCache and @contentHasNotChanged return else # Render /trackers index. @renderPage 'trackers', dataPath, '/trackers/index.html', {list: indexContent.trackers}, 'trackers' .then => # Only render pages from themes if the theme has changed Promise.try => if !@useContentCache # Render /reviews index. @renderPage 'reviews', dataPath, '/reviews/index.html', {}, 'reviews' .then => # Only render pages from themes if the theme has changed Promise.try => if !@useContentCache # Render /support index. @renderPage 'support', dataPath, '/support/index.html', {}, 'support' .then => # Only render pages from themes if the theme has changed Promise.try => if !@useContentCache # Render home page. @renderHomePageStatistics().then (homePageStatisticsHTML) => @renderPage 'home', dataPath, '/index.html', {statistics: homePageStatisticsHTML}, 'home' .then => #@log.info "Indices:" #console.log indices return indices # # Takes an index object and returns a string of the list items, alphabetically sorted by the key. # alphabetiseIndex: (index) => keys = Object.keys index keys.sort (a,b) -> a.localeCompare(b, 'en', {'sensitivity': 'base'}) keys.reduce (previous, current) -> previous += index[current] , '' # # Renders a page on the site. # renderPage: (name, dataPath, pathFragment, data={}, category='none') => theme = @theme[name] contentPartialHTML = @partial(set.render(theme, data)) navigationList = @navigationList name headerPartialHTML = @partial(set.render(@theme.header, {navigationList: navigationList})) title = name[0].toUpperCase() + name.slice(1) # title is the name with the first letter capitalised. title += ' | Better' pageHTML = set.render(@theme.page, {title: title, header: headerPartialHTML, content: contentPartialHTML, category: category}) pagePath = path.join dataPath, pathFragment fs.outputFileAsync pagePath, pageHTML # # Returns configured navigation list data structure with the # passed navigation element marked as selected. # navigationList: (selectedItem) => # # Navigation list for local links # # (The forum and source links are not here as they’re external and cannot be # highlighted dynamically in the navigation when navigated to.) navigationList = [ { class: 'home', link: '/', label: 'Home', selected: false} { class: 'news', link: '/news', label: 'News', selected: false} { class: 'sites', link: '/sites', label: 'Sites', selected: false} { class: 'trackers', link: '/trackers', label: 'Trackers', selected: false} { class: 'support', link: '/support', label: 'Support', selected: false} ] for i in [0...navigationList.length] navigationList[i].selected = selectedItem.startsWith navigationList[i].class return navigationList # # Saves the WebKit content blocking rule file, blockerList.json. # saveBlockerListJSON: (dataDirectory) => # if @contentHasNotChanged # console.log("\t✓ Content has not changed. Not saving blocker list JSON or its metadata file.") # return Promise.resolve() # # Content has changed. Save the blocker list and metadata files. # blockerListJSONFilePath = path.join dataDirectory, 'blockerList.json' metadataFilePath = path.join dataDirectory, 'metadata.json' args = [@rules] output = JSON.stringify.apply @, args hash = checksum(output) # TODO: Only update metadata if rules have been updated. # # Prepare metadata # rulesAdded = new Set() rulesUpdated = new Set() rulesDeleted = new Set() sitesAdded = new Set() sitesUpdated = new Set() sitesDeleted = new Set() trackersAdded = new Set() trackersUpdated = new Set() trackersDeleted = new Set() TRACKERS = 'trackers' SITES = 'sites' for key, value of @contentCurrentModifiedTimes uniqueIdentifier = key.replace /^(.*?\/.*?)\/.*$/, "$1" uniqueIdentifierSubcategory = key.replace /^.*?\/(.*?)\/.*$/, "$1" # Added? if (@contentPreviousModifiedTimes[key] == undefined) rulesAdded.add uniqueIdentifier if (key.indexOf TRACKERS) > -1 trackersAdded.add uniqueIdentifierSubcategory if (key.indexOf SITES) > -1 sitesAdded.add uniqueIdentifierSubcategory # Updated? else if (@contentPreviousModifiedTimes[key] != value) rulesUpdated.add uniqueIdentifier if (key.indexOf TRACKERS) > -1 trackersUpdated.add uniqueIdentifierSubcategory if (key.indexOf SITES) > -1 sitesUpdated.add uniqueIdentifierSubcategory for key, value of @contentPreviousModifiedTimes uniqueIdentifier = key.replace /(.*\/*.)\/.*/, "$1" # Deleted? if (@contentCurrentModifiedTimes[key] == undefined) rulesDeleted.add uniqueIdentifier if (key.indexOf TRACKERS) > -1 trackersDeleted.add uniqueIdentifierSubcategory if (key.indexOf SITES) > -1 trackersDeleted.add uniqueIdentifierSubcategory rulesChanged = rulesAdded.size + rulesUpdated.size + rulesDeleted.size trackersChanged = trackersAdded.size + trackersUpdated.size + trackersDeleted.size sitesChanged = sitesAdded.size + sitesUpdated.size + sitesDeleted.size # console.log("") # console.log("Rules changed : #{rulesChanged}") # console.log("Trackers changed : #{trackersChanged}") # console.log("Sites changed : #{sitesChanged}") # console.log("") # console.log("Rules added (#{rulesAdded.size}):") # console.log(rulesAdded) # console.log("Ruled updated (#{rulesUpdated.size}):") # console.log(rulesUpdated) # console.log("Rules deleted (#{rulesDeleted.size}):") # console.log(rulesDeleted) # console.log("") # console.log("Trackers added (#{trackersAdded.size}):") # console.log(trackersAdded) # console.log("Trackers updated (#{trackersUpdated.size}):") # console.log(trackersUpdated) # console.log("Trackers deleted (#{trackersDeleted.size}):") # console.log(trackersDeleted) # console.log("") # console.log("Sites added (#{sitesAdded.size}):") # console.log(sitesAdded) # console.log("Sites updated (#{sitesUpdated.size}):") # console.log(sitesUpdated) # console.log("Sites deleted (#{sitesDeleted.size}):") # console.log(sitesDeleted) # console.log("") # Calculate the last update time based on the content cache. # Since timestamps have lexographical order, we can use a reduce # on the values of the cache dictionary to achieve this. # (Using ES6) as it is easier. (And we will move the codebase to ES6 eventually.) lastModifiedTimes = `Object.values(this.contentCurrentModifiedTimes)` lastModifiedTime = `lastModifiedTimes.reduce ((x,y) => { return (x > y ? x : y) })` metadata = `JSON.stringify({ md5: hash, lastUpdate: lastModifiedTime, stats: { rulesChanged: rulesChanged, trackersChanged: trackersChanged, sitesChanged: sitesChanged, trackers: { added: [...trackersAdded], updated: [...trackersUpdated], deleted: [...trackersDeleted] }, sites: { added: [...sitesAdded], updated: [...sitesUpdated], deleted: [...sitesDeleted] } } })` # Save the blocker list file and its metadata. (fs.outputFileAsync blockerListJSONFilePath, output).then => fs.outputFileAsync metadataFilePath, metadata # # Saves the rule partials in JSON format. # saveRulePartials: => for pathOfOriginalFile, rules of @rulePartials # Determine where to store the partial rule (used in caching so that we # can create the blocker list without having to render every rule every time). parsedPathOfOriginalFile = path.parse pathOfOriginalFile partialRuleFileRelativePath = parsedPathOfOriginalFile.dir.split('/')[5..].join('/') partialRuleFilePath = path.join app.rulePartialsDirectory, partialRuleFileRelativePath fs.ensureDirSync partialRuleFilePath partialRuleFile = path.join partialRuleFilePath, "#{parsedPathOfOriginalFile.base}.rule.json" #console.log "Partial rule file: #{partialRuleFile}" args = [rules] rulesJSON = (JSON.stringify.apply @, args) #console.log "Rules: #{rulesJSON}" fs.outputFileSync partialRuleFile, rulesJSON, 'utf-8' # # Hook into the marked Markdown renderer for the Blockdown MSON code sections. # codeRenderer: (code, language) => # @log.info "Language: #{language}, Code: >#{code}<" originalCode = code if language is 'mson' # @log.info "Rendering Blockdown MSON in #{@pathOfTheFileCurrentlyBeingRendered}" # Create the dictionary entry in the table of files currently being rendered # to their rule partials if one does not already exist. if @rulePartials[@pathOfTheFileCurrentlyBeingRendered] == undefined @rulePartials[@pathOfTheFileCurrentlyBeingRendered] = [] # Convert blocker rule MSON to JSON # Do a very strict conversion of the MSON based on the block list JSON specification. # Trim any leading or trailing whitespace code = code.trim() # Handle authoring-time optimisation: if no dashes are found in the code, # assume that it is a url-filter entry and transform it into one. if (code.indexOf '-') != 0 and (code.indexOf "\t-") != 0 and (code.indexOf ' -') != 0 # console.log "Transforming naked url-filter rule: #{code}" code = "- url-filter: #{code}" # Flatten the key/value pairs safeDelimeter = '\udbff\udfff' code = code.replace /^[\t, ]*-[\t, ]*(.*?):[\t, ]*(.*?)$/mg, "$1#{safeDelimeter}$2" # Split into lines lines = code.split "\n" # # Create the rule. # rule = new ContentBlockerRule lines.forEach (line) -> keyValuePair = line.split safeDelimeter key = keyValuePair[0] value = keyValuePair[1] # console.log "#{key} = #{value}" rule[key](value) # Lint the content blocker JSON rule string try @lintRule rule.value() catch error throw new Error "[Blockdown: blocker rule] Lint error, please check that your block rule MSON is valid. #{error} #{error.stack}" # Add the linted rule to the rules for this Markdown document. @rules.push rule.value() # Add the linted rule to the rules for the file that’s currently being rendered. @rulePartials[@pathOfTheFileCurrentlyBeingRendered].push rule.value() # # Add basic syntax highlighting to the original code in HTML. # renderedCode = @markdownRenderer.code originalCode, language dash = "-" colon = ":" matches = renderedCode.matches(/-([\t ]*)(.+?):([\t ]*)(.*?)$/gm) for _ in matches whitespaceAfterDash = _.groups[0] key = _.groups[1] whitespaceAfterColon = _.groups[2] value = _.groups[3] if key in ['trigger', 'action'] # Top-level key, no value (just key). key = "#{key}" else key = "#{key}" value = "#{value}" highlightedCode = "#{dash}#{whitespaceAfterDash}#{key}#{colon}#{whitespaceAfterColon}#{value}" renderedCode = renderedCode.replace _.match, highlightedCode return renderedCode # Render as regular code. return @markdownRenderer.code originalCode, language # # Image renderer. # # Adds image size support to Markdown. # Based on https://github.com/chjj/marked/issues/339#issuecomment-40975942 # imageRenderer: (href, title, text) => if title size = title.split 'x' if size[1] size = 'width=' + size[0] + ' height=' + size[1] else size = 'width=' + size[0] else size = '' return "\"#{text}\"" # # Heading renderer # # Headings let us know which section of the page we’re in so we can # handle subsequent data accordingly. # headingRenderer: (text, level, raw) => lowercaseText = text.toLowerCase() @parserIsInStatisticsSection = false switch lowercaseText when 'after better' then @parserIsInStatisticsSection = true # Fall back to plain Markdown renderer. return @markdownRenderer.heading text, level, raw # # List renderer. # listRenderer: (body, ordered) => if @parserIsInStatisticsSection return @renderStatistics body return @markdownRenderer.list body, ordered # # List item renderer # # List items are the primary means by which we implment higher-order # semantics for Blockdown. List items that start with certain keywords are rendered # using specialised templates. # listItemRenderer: (text) => # @log.info "List item: #{text}" # Trackers badge. if text.startsWith '(Trackers)' or text.startsWith '(trackers)' then return @renderTrackersBadgeListItem text # # Static badges. # staticBadges = [ ['(Aggressive)', 'Attempts to block malware blockers.'], ['(Doorslam)', 'Interrupts your flow with modal dialogs.'], ['(Clickbait)', 'Third-party exploitative content.'], ['(Fingerprint)', 'Canvas fingerprinting.'], ['(Web bug)', 'Invisible tracking pixel.'], ['(Tracker)', 'Monetises you.'] ] for badge in staticBadges title = badge[0] explanation = badge[1] # Support title variants to be forgiving (e.g., case and whitespace insensitive) in authoring forgivingTitle = title.replace(/\s/g, '').toLowerCase() forgivingText = text.replace(/\s/g, '').toLowerCase() if forgivingText.startsWith "#{forgivingTitle}" then return @renderStaticBadgeListItem title.toLowerCase(), explanation # # No known vocabulary matches, render as regular Markdown. # return @markdownRenderer.listitem text # # Renders a static badge list item (these are the badges that contain an icon, title, and explanation). # The icon is added in the CSS (see the theme-for-site-data repository). # renderStaticBadgeListItem: (title, explanation) => # @log.info "About to render static badge with title: #{title}" title = title.replace('(', '').replace(')', '') badgeID = title.replace(' ', '-') staticBadgeListItem = "\t
  • \n\t\t

    #{title}

    \n\t\t

    #{explanation}

    \n\t
  • \n" return staticBadgeListItem # # Renders the trackers badge list item. # renderTrackersBadgeListItem: (text) => moreDetailsHTML = 'Exposes you to third-party sites.' # # Get the trackers. # # Get the URLs of the individual trackers lines = (text.split "\n") # We will always have more than zero items in the list of lines, as otherwise an entry would not exist for this site, but # check just in case (to account for potentially corrupted content). if lines.length >= 4 # Remove the first line (the opening UL) and the last two lines (closing UL, followed by an empty line) lines = lines[1..lines.length-3] # The remaining lines are the URLs of the trackers numberOfTrackers = 0 trackersHash = {} lines.forEach (line) => numberOfTrackers++ domain = line.replace '
  • ', '' domain = domain.replace '
  • ', '' domain = domain.replace '', '' # in case the closing tag gets tacked on, strip it trackerArrayEntry = domain: domain, name: domain, nameDetails: '' ruleExists: true if domain not in app.contentIndex.trackers trackerArrayEntry.formattedEntryForSite = "\t\t\t
  • #{domain}
  • \n" trackerArrayEntry.ruleExists = false else details = app.trackerDomainToTrackerDetailsIndex[domain] trackerArrayEntry.name = details.name trackerArrayEntry.nameDetails = details.nameDetails trackerArrayEntry.formattedEntryForSite = "\t\t\t
  • #{details.formattedLink}
  • \n" trackersHash[trackerArrayEntry.name]=trackerArrayEntry # Alphabetise trackerKeys = Object.keys trackersHash trackerKeys.sort (a,b) -> a.localeCompare(b, 'en', {'sensitivity': 'base'}) trackersArray = [] trackerKeys.map (key) -> trackersArray.push trackersHash[key] # Create the formatted trackers list for the site trackersList = trackersArray.reduce ((previous, current) -> previous += current.formattedEntryForSite), '' # Remove the now unnecessary keys trackersArray.map (obj) -> delete obj.formattedEntryForSite trackersListCode = '' trackersListCode = "" # Udpate global statistics. GlobalStatistics.trackers.push numberOfTrackers # And save it for use in other sections @numberOfTrackers = numberOfTrackers # Generate the HTML listItem = "\t
  • \n\t\t

    #{numberOfTrackers} trackers

    \n\t\t

    #{moreDetailsHTML}

    \n\t\t#{trackersListCode}\n\t
  • \n" return listItem else # Corrupted content @log.error "Corrupted content on #{@pathOfTheFileCurrentlyBeingRendered}: list item: #{text}. Skipping Blockdown render and defaulting to Markdown." return @markdownRenderer.listitem text # # Renders the home page statistics using the global statistics JSON file # created by the inspector. (Inspector must have been run at least once for # this to succeed.) # renderHomePageStatistics: => # # Read in the statistics file and transform the data into the same format # used by the ‘improvements’ section on site pages so we can reuse this # on the main page for aggregate statistics. # (fs.readFileAsync (path.join app.privateDirectory, 'statistics.json'), 'utf-8').then (json) => _ = JSON.parse json requestsDeltaFormatted = _.requests.total.delta.actual.formatted.split(' ') sizeDeltaFormatted = _.size.total.delta.actual.formatted.split(' ') timeDeltaFormatted = _.time.total.delta.actual.formatted.split(' ') data = statistics:[ { type: 'requests' label: 'Requests:' values: before: _.requests.total.before.formatted after: _.requests.total.after.actual.formatted improvement: amount: requestsDeltaFormatted[0] type: requestsDeltaFormatted[1] details: primary: "#{_.requests.total.delta.actual.amount.formatted}" secondary: "blocked" }, { type: 'weight' label: 'Weight:' values: before: _.size.total.before.formatted after: _.size.total.after.actual.formatted improvement: amount: sizeDeltaFormatted[0] type: sizeDeltaFormatted[1] details: primary: "#{_.size.total.delta.actual.amount.formatted}" secondary: "saved" }, { type: 'speed' label: 'Speed:' values: before: _.time.total.before.formatted after: _.time.total.after.actual.formatted improvement: amount: timeDeltaFormatted[0] type: timeDeltaFormatted[1] details: primary: "#{_.time.total.delta.actual.amount.formatted}" secondary: "saved" } ] # # Render the HTML # html = '' # Improvement summaries html += @partial(set.render(@theme.improvements, data)) # Table html += @partial(set.render(@theme.table, data)) return html # # Renders the After Better improvement statistics # renderStatistics: (body) => syntaxErrorMessage = 'Blockdown statistics syntax error' listOfValidStatistics = ['weight', 'speed', 'requests'] data = {statistics:[]} try statistics = body.split "\n" if statistics[statistics.length - 1] is '' then statistics.pop() # Remove the last line, if it’s empty. # Syntax check: statistics list if statistics.length isnt 3 throw new Error "#{syntaxErrorMessage} (statistics.length): #{statistics}" for statistic in statistics # Remove the list item tags statistic = statistic.replace('
  • ', '').replace('
  • ', '') # Remove the template substitution comments statistic = statistic.replace(//g, '') parts = statistic.split ' ' # Syntax check: statistic parts expectedNumberOfParts = 11 if parts.length isnt expectedNumberOfParts throw new Error "#{syntaxErrorMessage} – parts.length (#{parts.length}) isn’t #{expectedNumberOfParts}: #{parts}" # Statistic type ('weight', 'speed', 'requests') statisticLabel = parts[0] statisticType = statisticLabel.toLowerCase().replace ':', '' # Syntax check: statistic type if statisticType not in listOfValidStatistics throw new Error "#{syntaxErrorMessage}: #{statisticType} is not a valid statistic type." # # Create the statistic object and pass it to the data array. # statistic = type: statisticType label: statisticLabel values: before: "#{parts[1]} #{parts[2]}" after: "#{parts[4]} #{parts[5]}" improvement: amount: "#{parts[9]}" type: "#{parts[10].replace(')', '')}" details: primary: "#{parts[6].replace('(', '')} #{parts[7]}" secondary: "#{parts[8].replace(',', '')}" data.statistics.push statistic statistics = weight: data.statistics[0] speed: data.statistics[1] requests: data.statistics[3] # # Add the statistics to the global statistics object # (By convention, we only do this during the site render pass so as # not to duplicate the global statistics.) # # GlobalStatistics.savings.weight.push Number(statistics.weight.improvement.amount) # GlobalStatistics.savings.speed.push Number(statistics.speed.improvement.amount) # GlobalStatistics.savings.requests.push Number(statistics.requests.improvement.amount) # # Render the HTML # html = '' # Improvement summaries html += @partial(set.render(@theme.improvements, data)) # Table html += @partial(set.render(@theme.table, data)) return html catch e # # Something failed in the statistics for this page. # # Make a note on the page itself but don’t stop parsing. # @log.error e indexOfContentFragment = @pathOfTheFileCurrentlyBeingRendered.indexOf('/content/') filePathFragment = @pathOfTheFileCurrentlyBeingRendered[indexOfContentFragment..].replace('/content/', '') urlPrefixToTheSource = 'https://source.ind.ie/better/content/blob/master/' urlToTheSource = urlPrefixToTheSource + filePathFragment urlFragments = urlToTheSource.split('/') humanReadablePath = urlFragments[urlFragments.length-3..].join('/') # TODO: Also add link to the documentation for the correct format. return "

    Error: The “After Better section of this site’s information does not conform to proper Blockdown syntax. Please fix it in the Better source: #{humanReadablePath}

    " # # Content blocker JSON rule sting linter. # # All linting rules based on, and quoted from: # https://www.webkit.org/blog/3476/content-blockers-first-look/ # lintRule: (rule) => try # # Content blocker format # # The content blocker rules are passed in JSON format. # The top level object is an array containing every rule that needs to be loaded. # # Each rule of the content blocker is a dictionary with two parts: # a trigger which activates the rule, and an action defining what to do when the rule is activated. # expect(rule, 'rule').to.have.property 'trigger' expect(rule, 'rule').to.have.property 'action' # # The “trigger” defines what properties activate a rule. When the rule is activated, its action is # queued for execution. When all the triggers have been evaluated, the actions are applied in order. # # The valid fields in the trigger are: # # “url-filter” (string, mandatory): matches the resource’s URL. # “url-filter-is-case-sensitive”: (boolean, optional): changes the “url-filter” case-sensitivity. # “resource-type”: (array of strings, optional): matches how the resource will be used. # “load-type”: (array of strings, optional): matches the relation to the main resource. # “if-domain”/”unless-domain” (array of strings, optional): matches the domain of the document. # assert.equal typeof trigger['url-filter'] is 'string' # trigger = rule['trigger'] triggerProperties = new Set Object.keys(rule['trigger']) # Check for existence of mandatory url-filter property. expect(trigger, 'trigger').to.have.property 'url-filter' urlFilter = trigger['url-filter'] # Make sure that the URL filter is a valid regular expression try new RegExp urlFilter catch e # Fail. expect(e).to.not.exist # # The Regular expression format # # It is possible to use the beginning of line (“^”) and end of line (“$”) marker but they are restricted # to be the first and last character of the expression. For example, a pattern like “^bar$” is perfectly valid, # while “(foo)?^bar$” causes a syntax error. # # URL Filter Regular Expression special case: ^ indexOfCaret = urlFilter.indexOf '^' theIndexOfTheFirstCharacter = 0 if indexOfCaret > -1 then expect(indexOfCaret, 'If ^ is used in the URL filter regular expression, it must be the first character').to.equal theIndexOfTheFirstCharacter # URL Filter Regular Expression special case: $ indexOf$ = urlFilter.indexOf '$' theIndexOfTheLastCharacter = urlFilter.length-1 if indexOf$ > -1 then expect(indexOf$, 'If $ is used in the URL filter regular expression, it must be the last character').to.equal theIndexOfTheLastCharacter # # All URL matching is done against the canonical version of the URL. As such, you can expect the URL to be # completely ASCII. The domain will already be punycode encoded. Both the scheme and domain are already # lowercase. The resource part of the URL is already percent encoded. # # Since the URL is known to be ASCII, the url-filter is also restricted to ASCII. Patterns with non-ASCII # characters result in a parse error. # urlFilterIsASCII = /^[\x00-\x7F]*$/.test urlFilter urlFilterIsASCII.should.be.true # # Check trigger types # urlFilter.should.be.a 'string' if triggerProperties.has 'url-filter-is-case-sensitive' trigger['url-filter-is-case-sensitive'].should.be.a 'boolean', 'url-filter-is-case-sensitive' # # The optional field “resource-type” specifies the type of load to match. # The content of this field is an array with all the types of load that can activate the trigger. # # The possible values are: # # “document” # “image” # “style-sheet” # “script” # “font” # “raw” (any untyped load, like XMLHttpRequest) # “svg-document” # “media” # “popup” # if triggerProperties.has 'resource-type' resourceTypes = trigger['resource-type'] @assertIsAnArrayOfStrings resourceTypes, 'trigger', 'resource-type' @assertActualValuesAreValid resourceTypes, ['document', 'image', 'style-sheet', 'script', 'font', 'raw', 'svg-document', 'media', 'popup'], 'trigger', 'resource-type' # # The field “load-type” defines the relation between the domain of the resource being loaded # and the domain of the document. The two possible values are: # # “first-party” # “third-party” # if triggerProperties.has 'load-type' loadTypes = trigger['load-type'] @assertIsAnArrayOfStrings loadTypes, 'trigger', 'load-type' @assertActualValuesAreValid loadTypes, ['first-party', 'third-party'], 'trigger', 'load-type' if triggerProperties.has 'if-domain' @assertIsAnArrayOfStrings trigger['if-domain'], 'trigger', 'if-domain' if triggerProperties.has 'unless-domain' @assertIsAnArrayOfStrings trigger['unless-domain'], 'trigger', 'unless-domain' # Make sure both if-domain and unless-domain are not present at the same time. assert.equal (triggerProperties.has 'if-domain' and triggerProperties.has 'unless-domain'), false, 'Both if-domain and unless-domain should not be set at the same time on the rule’s trigger.' # Check for invalid Trigger properties. @assertActualValuesAreValid triggerProperties, ['url-filter', 'url-filter-is-case-sensitive', 'resource-type', 'load-type', 'if-domain', 'unless-domain'], 'trigger', 'property' # # The “action” part of the dictionary defines what the engine should do # when a resource is matched by a trigger. # # Currently, the action object has only 2 valid fields: # # * “type” (string, mandatory): defines what to do when the rule is activated. # * “selector” (string, mandatory for the “css-display-none” type): # defines a selector list to apply on the page. # action = rule['action'] actionProperties = new Set Object.keys(rule['action']) expect(actionProperties.has 'type', 'Action property ‘type’ is mandatory and should exist.').to.be.true actionType = action['type'] expect(actionType, 'action.type').to.be.a 'string' if actionType is 'css-display-none' expect(actionProperties.has 'selector', 'If the action type is ‘css-display-none’ then action property ‘selector’ must exist.').to.be.true actionSelector = action['selector'] expect(actionSelector, 'action.selector').to.be.a 'string' # # There are 3 types of actions that limit resources: # # “block”, “block-cookies”, “css-display-none”. # # There is an additional type that does not have any impact on the resource # but changes how the content extension behaves: “ignore-previous-rules”. # assert.equal (actionType is 'block' or actionType is 'block-cookies' or actionType is 'css-display-none' or actionType is 'ignore-previous-rules'), true, 'Action type should be valid.' catch e throw new Error "[Blockdown] [Eyespect] #{e} #{e.stack} #{eyespect.inspect rule}" # # Checks that the passed object is an array of strings. Throws if not. # assertIsAnArrayOfStrings: (obj, parentObjectsHumanName, propertysHumanName) -> expect(obj, "The value of #{parentObjectsHumanName}’s #{propertysHumanName} property should be an array.").to.be.an 'array' obj.forEach (property) -> expect(property, "The value of #{parentObjectsHumanName}’s #{propertysHumanName} property should be an array of strings.").to.be.a 'string' # # Checks that the values in the actual values array conform to the set of values in the valid values array. # Throws if not. # assertActualValuesAreValid: (actualValuesArray, validValuesArray, parentObjectsHumanName, propertysHumanName) -> actualValues = new Set actualValuesArray validValues = new Set validValuesArray # Perform a union between the set of valid resource type values and the actual resource type values # and check that there are no extra values (i.e., invalid values) in the resulting set. validValues.forEach (validValue) -> actualValues.add validValue expect(validValues.size, "All #{parentObjectsHumanName} #{propertysHumanName} values should be valid.").to.equal actualValues.size module.exports = Blockdown