Verified Commit de7fd89d authored by Aral Balkan's avatar Aral Balkan
Browse files

Fixes #43

Inquiry no longer exits on Chromium crash. Launches Chromium afresh
for each inspection in an inquiry.
parent 77eec456
......@@ -91,107 +91,153 @@ deployHTTPArchive = ->
inspect = (domain, callback) ->
# Start anaylsing a new domain
inquiryStatistics.domainsAnalysed++
# Progress: show that we are starting to investigate a new domain.
console.log " ┊ "
console.log " 🔍 #{domain}"
#
# Lauch a new browser every time. This is not the most efficient way of doing things
# but it means that we can easily recover from browser crashes without taking
# down the whole inspection.
#
(puppeteer.launch args: ['--remote-debugging-port=9222'], dumpio: true).then (browser) ->
spinner = new Spinner(" %s")
spinner.setSpinnerString('|/-\\')
spinner.start()
handleBrowserEvents = true
# Start timing the function.
startTime = new Date()
browser.on "disconnected", ->
if handleBrowserEvents
console.log "\n\n\n\n\n 💀 Browser disconnected. Failing.\n\n"
# process.exit 1
browser.disconnect()
callback new Error "Browser disconnected."
#
# Prepare the HTTP Archive from local or remote source
#
prepareRemoteHTTPArchive = -> HTTPArchive.fromURL domain
prepareLocalHTTPArchive = -> HTTPArchive.fromFileAtPath (path.join archivePath, "#{domain}.har.gz")
browser.on "close", ->
if handleBrowserEvents
console.log "\n\n\n\n\n 🔥 Browser closed. Failing.\n\n"
# process exit 1
browser.disconnect()
callback new Error "Browser closed."
# Start anaylsing a new domain
inquiryStatistics.domainsAnalysed++
# Progress: show that we are starting to investigate a new domain.
console.log " ┊ "
console.log " 🔍 #{domain}"
spinner = new Spinner(" %s")
spinner.setSpinnerString('|/-\\')
spinner.start()
prepareHTTPArchive = if isLocalInquiry then prepareLocalHTTPArchive() else prepareRemoteHTTPArchive()
# Start timing the function.
startTime = new Date()
prepareHTTPArchive.then (archive) ->
#
# Success
# Prepare the HTTP Archive from local or remote source
#
prepareRemoteHTTPArchive = -> HTTPArchive.fromURL domain
prepareLocalHTTPArchive = -> HTTPArchive.fromFileAtPath (path.join archivePath, "#{domain}.har.gz")
# Save the HTTP Archive file
# hostname = archive.parsedArchive.hostname()
filePath = path.join path.homedir(), 'better.fyi', 'archive', "#{domain}.har.gz"
prepareHTTPArchive = if isLocalInquiry then prepareLocalHTTPArchive() else prepareRemoteHTTPArchive()
(archive.save filePath).then ->
prepareHTTPArchive.then (archive) ->
#
# Success
#
spinner.stop(true)
# Save the HTTP Archive file
# hostname = archive.parsedArchive.hostname()
filePath = path.join path.homedir(), 'better.fyi', 'archive', "#{domain}.har.gz"
domainsInspected++
(archive.save filePath).then ->
# Calculate how long it took.
endTime = new Date()
taskDuration = endTime - startTime
spinner.stop(true)
# Update global stats
inquiryStatistics.taskDurations.push taskDuration
taskDurationInSeconds = (moment.duration taskDuration).asSeconds().toFixed(2)
domainsInspected++
inquiryStatistics.successes++
# Calculate how long it took.
endTime = new Date()
taskDuration = endTime - startTime
# Get the statistics for this inspection.
stats = archive.parsedArchive.statistics()
# Update global stats
inquiryStatistics.taskDurations.push taskDuration
taskDurationInSeconds = (moment.duration taskDuration).asSeconds().toFixed(2)
# Aggregate the statistics
globalStatistics.add stats
inquiryStatistics.successes++
# Display statistics
console.log " ┊ "
console.log " ├─✅ #{domain}"
console.log " ┊ ├─ (#{domainsInspected} of #{domainsToInspect})"
console.log " ┊ ├─ #{stats.requestsBefore}#{stats.requestsAfter} (#{stats.deltaRequests})"
console.log " ┊ ├─ #{stats.sizeBefore}#{stats.sizeAfter} (#{stats.deltaSize})"
console.log " ┊ ╰─ #{stats.timeBefore}#{stats.timeAfter} (#{stats.deltaTime})"
# Get the statistics for this inspection.
stats = archive.parsedArchive.statistics()
# Debug
console.log " #{globalStatistics}"
# console.log globalStatistics.json()
# Aggregate the statistics
globalStatistics.add stats
#
# Check if there’s a tracker we’re searching for
#
if args.find?
for request in Array.from archive.parsedArchive.thirdPartyRequests.uniqueURLs.set
if (request.indexOf args.find) > -1
# Display statistics
console.log " ┊ "
console.log " ├─✅ #{domain}"
console.log " ┊ ├─ (#{domainsInspected} of #{domainsToInspect})"
console.log " ┊ ├─ #{stats.requestsBefore}#{stats.requestsAfter} (#{stats.deltaRequests})"
console.log " ┊ ├─ #{stats.sizeBefore}#{stats.sizeAfter} (#{stats.deltaSize})"
console.log " ┊ ╰─ #{stats.timeBefore}#{stats.timeAfter} (#{stats.deltaTime})"
if not foundThirdPartyRequests[domain]?
foundThirdPartyRequests[domain] = []
# Debug
console.log " #{globalStatistics}"
# console.log globalStatistics.json()
foundThirdPartyRequests[domain].push request
#
# Check if there’s a tracker we’re searching for
#
if args.find?
for request in Array.from archive.parsedArchive.thirdPartyRequests.uniqueURLs.set
if (request.indexOf args.find) > -1
callback null, domain
.catch (error) ->
#
# Failure
#
spinner.stop(true)
if not foundThirdPartyRequests[domain]?
foundThirdPartyRequests[domain] = []
domainsInspected++
foundThirdPartyRequests[domain].push request
# Calculate how long it took.
endTime = new Date()
taskDuration = endTime - startTime
# We don’t want to trigger the error conditions, so remove them.
handleBrowserEvents = false
browser.disconnect()
browser.close()
.then ->
callback null, domain
.catch (e) ->
console.log ("B. Supressing Puppeteer failure to close browser.")
callback null, domain
.catch (error) ->
#
# Failure
#
spinner.stop(true)
# Update global stats
inquiryStatistics.taskDurations.push taskDuration
taskDurationInSeconds = (moment.duration taskDuration).asSeconds().toFixed(2)
domainsInspected++
# Calculate how long it took.
endTime = new Date()
taskDuration = endTime - startTime
# Update global stats
inquiryStatistics.taskDurations.push taskDuration
taskDurationInSeconds = (moment.duration taskDuration).asSeconds().toFixed(2)
inquiryStatistics.failures++
inquiryStatistics.failures++
errorMessage = "#{domain} (#{error})"
errorMessage = "#{domain} (#{error})"
console.log " ┊ "
console.log " ├─❌ #{errorMessage} (#{domainsInspected} of #{domainsToInspect})\n#{error.stack}"
# We don’t want to trigger the error conditions, so remove them.
handleBrowserEvents = false
browser.disconnect()
browser.close()
.then ->
callback new Error errorMessage
.catch (e) ->
console.log ("B. Supressing Puppeteer failure to close browser.")
callback new Error errorMessage
.catch (error) ->
console.log ("Could not launch browser " + error)
callback new Error "Could not launch browser"
console.log " ┊ "
console.log " ├─❌ #{errorMessage} (#{domainsInspected} of #{domainsToInspect})\n#{error.stack}"
callback new Error errorMessage
# Promisify our function.
inspect = Promise.promisify inspect
......@@ -314,69 +360,61 @@ prepareInspections = if isLocalInquiry then prepareInspectionsForLocalInquiry()
# reporting incorrect values when concurrency > 1, however (looks like ~double for 2, triple for 3)…
# TODO: look into this.
#
(puppeteer.launch args: ['--remote-debugging-port=9222']).then (browser) ->
browser.on "disconnected", ->
console.log "\n\n\n\n\n 💀 Browser disconnected. Exiting.\n\n"
process.exit 1
prepareInspections.then (inspections) ->
Promise.mapSeries(inspections, ((inspection) -> return inspection().reflect()), {concurrency: 1})
.then (inspection) ->
_ " ┴"
_ "\nResults:\n"
inspection
.each (inspection) ->
if inspection.isFulfilled()
_ " ✅ #{inspection.value()}"
else
_ " ❌ #{inspection.reason().message}"
.then ->
#
# Save the global statistics (which will be used by Builder)
#
prepareInspections.then (inspections) ->
Promise.mapSeries(inspections, ((inspection) -> return inspection().reflect()), {concurrency: 1})
.then (inspection) ->
_ " ┴"
_ "\nResults:\n"
inspection
.each (inspection) ->
if inspection.isFulfilled()
_ " ✅ #{inspection.value()}"
else
_ " ❌ #{inspection.reason().message}"
.then ->
#
# Save the global statistics (which will be used by Builder)
#
statisticsFilePath = path.join path.homedir(), 'better.fyi', '.private', 'statistics.json'
fs.writeFileAsync statisticsFilePath, globalStatistics.json()
.then =>
#
# Display statistics
#
_ "\nStatistics"
_ "==========\n"
_ " · Domains analysed: #{inquiryStatistics.domainsAnalysed}"
_ " · Successes: #{inquiryStatistics.successes}"
_ " · Failures: #{inquiryStatistics.failures}"
_ " · Average task duration: #{moment.duration(inquiryStatistics.taskDurations.reduce((prev, current) -> (prev + current))/inquiryStatistics.taskDurations.length).asSeconds().toFixed(2)}s"
_ " · Total task duration: #{moment.duration(inquiryStatistics.taskDurations.reduce((prev, current) -> prev + current)).asSeconds().toFixed(2)}s"
_ "\n"
#
# Display found domains
#
if args.find?
domainsSearchTermWasFoundOn = Object.keys foundThirdPartyRequests
if domainsSearchTermWasFoundOn.length > 0
underline = Array(args.find.length+1).join '='
_ "Found requests containing ‘#{args.find}’"
_ "============================#{underline}\n"
for domain in domainsSearchTermWasFoundOn
matchingThirdPartyRequestsOnDomain = foundThirdPartyRequests[domain]
_ " · #{domain}"
for request in matchingThirdPartyRequestsOnDomain
_ " · #{request}"
_ "\n"
else
_ "Could not find requests containing ‘#{args.find}’."
statisticsFilePath = path.join path.homedir(), 'better.fyi', '.private', 'statistics.json'
fs.writeFileAsync statisticsFilePath, globalStatistics.json()
.then =>
#
# Display statistics
#
_ "\nStatistics"
_ "==========\n"
_ " · Domains analysed: #{inquiryStatistics.domainsAnalysed}"
_ " · Successes: #{inquiryStatistics.successes}"
_ " · Failures: #{inquiryStatistics.failures}"
_ " · Average task duration: #{moment.duration(inquiryStatistics.taskDurations.reduce((prev, current) -> (prev + current))/inquiryStatistics.taskDurations.length).asSeconds().toFixed(2)}s"
_ " · Total task duration: #{moment.duration(inquiryStatistics.taskDurations.reduce((prev, current) -> prev + current)).asSeconds().toFixed(2)}s"
_ "\n"
#
# Display found domains
#
if args.find?
domainsSearchTermWasFoundOn = Object.keys foundThirdPartyRequests
if domainsSearchTermWasFoundOn.length > 0
underline = Array(args.find.length+1).join '='
_ "Found requests containing ‘#{args.find}’"
_ "============================#{underline}\n"
for domain in domainsSearchTermWasFoundOn
matchingThirdPartyRequestsOnDomain = foundThirdPartyRequests[domain]
_ " · #{domain}"
for request in matchingThirdPartyRequestsOnDomain
_ " · #{request}"
_ "\n"
else
_ "Could not find requests containing ‘#{args.find}’."
.catch (error) =>
_ "\nError saving global statistics: #{error}\n"
.catch (error) ->
console.log ("Could not launch browser " + error)
\ No newline at end of file
.catch (error) =>
_ "\nError saving global statistics: #{error}\n"
......@@ -41,6 +41,10 @@ Inquiry is a tool that runs inspections on [the domains currently being tracked
./inquiry --local : re-examine the existing HTTP Archives in the archive
```
## Known issues
[44](https://source.ind.ie/better/inspector/issues/44): In order to work around Chromium crashes crashing the inquiry, we are now running a fresh Chromium instance for each inspection (to workaround [issue 43](https://source.ind.ie/better/inspector/issues/43)). This currently has the side-effect that you cannot easily exit an inquiry. The easiest way at the moment is to close the terminal tab (and check the processing for a dangling Chromium instance to kill.)
## License
Inspector is released under GNU AGPLv3.
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment