You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

258 lines
6.6 KiB
JavaScript

8 months ago
'use strict'
const {
mkdir,
readFile,
rm,
stat,
truncate,
writeFile,
} = require('fs/promises')
const pMap = require('p-map')
const contentPath = require('./content/path')
const fsm = require('fs-minipass')
const glob = require('./util/glob.js')
const index = require('./entry-index')
const path = require('path')
const ssri = require('ssri')
const hasOwnProperty = (obj, key) =>
Object.prototype.hasOwnProperty.call(obj, key)
const verifyOpts = (opts) => ({
concurrency: 20,
log: { silly () {} },
...opts,
})
module.exports = verify
async function verify (cache, opts) {
opts = verifyOpts(opts)
opts.log.silly('verify', 'verifying cache at', cache)
const steps = [
markStartTime,
fixPerms,
garbageCollect,
rebuildIndex,
cleanTmp,
writeVerifile,
markEndTime,
]
const stats = {}
for (const step of steps) {
const label = step.name
const start = new Date()
const s = await step(cache, opts)
if (s) {
Object.keys(s).forEach((k) => {
stats[k] = s[k]
})
}
const end = new Date()
if (!stats.runTime) {
stats.runTime = {}
}
stats.runTime[label] = end - start
}
stats.runTime.total = stats.endTime - stats.startTime
opts.log.silly(
'verify',
'verification finished for',
cache,
'in',
`${stats.runTime.total}ms`
)
return stats
}
async function markStartTime (cache, opts) {
return { startTime: new Date() }
}
async function markEndTime (cache, opts) {
return { endTime: new Date() }
}
async function fixPerms (cache, opts) {
opts.log.silly('verify', 'fixing cache permissions')
await mkdir(cache, { recursive: true })
return null
}
// Implements a naive mark-and-sweep tracing garbage collector.
//
// The algorithm is basically as follows:
// 1. Read (and filter) all index entries ("pointers")
// 2. Mark each integrity value as "live"
// 3. Read entire filesystem tree in `content-vX/` dir
// 4. If content is live, verify its checksum and delete it if it fails
// 5. If content is not marked as live, rm it.
//
async function garbageCollect (cache, opts) {
opts.log.silly('verify', 'garbage collecting content')
const indexStream = index.lsStream(cache)
const liveContent = new Set()
indexStream.on('data', (entry) => {
if (opts.filter && !opts.filter(entry)) {
return
}
// integrity is stringified, re-parse it so we can get each hash
const integrity = ssri.parse(entry.integrity)
for (const algo in integrity) {
liveContent.add(integrity[algo].toString())
}
})
await new Promise((resolve, reject) => {
indexStream.on('end', resolve).on('error', reject)
})
const contentDir = contentPath.contentDir(cache)
const files = await glob(path.join(contentDir, '**'), {
follow: false,
nodir: true,
nosort: true,
})
const stats = {
verifiedContent: 0,
reclaimedCount: 0,
reclaimedSize: 0,
badContentCount: 0,
keptSize: 0,
}
await pMap(
files,
async (f) => {
const split = f.split(/[/\\]/)
const digest = split.slice(split.length - 3).join('')
const algo = split[split.length - 4]
const integrity = ssri.fromHex(digest, algo)
if (liveContent.has(integrity.toString())) {
const info = await verifyContent(f, integrity)
if (!info.valid) {
stats.reclaimedCount++
stats.badContentCount++
stats.reclaimedSize += info.size
} else {
stats.verifiedContent++
stats.keptSize += info.size
}
} else {
// No entries refer to this content. We can delete.
stats.reclaimedCount++
const s = await stat(f)
await rm(f, { recursive: true, force: true })
stats.reclaimedSize += s.size
}
return stats
},
{ concurrency: opts.concurrency }
)
return stats
}
async function verifyContent (filepath, sri) {
const contentInfo = {}
try {
const { size } = await stat(filepath)
contentInfo.size = size
contentInfo.valid = true
await ssri.checkStream(new fsm.ReadStream(filepath), sri)
} catch (err) {
if (err.code === 'ENOENT') {
return { size: 0, valid: false }
}
if (err.code !== 'EINTEGRITY') {
throw err
}
await rm(filepath, { recursive: true, force: true })
contentInfo.valid = false
}
return contentInfo
}
async function rebuildIndex (cache, opts) {
opts.log.silly('verify', 'rebuilding index')
const entries = await index.ls(cache)
const stats = {
missingContent: 0,
rejectedEntries: 0,
totalEntries: 0,
}
const buckets = {}
for (const k in entries) {
/* istanbul ignore else */
if (hasOwnProperty(entries, k)) {
const hashed = index.hashKey(k)
const entry = entries[k]
const excluded = opts.filter && !opts.filter(entry)
excluded && stats.rejectedEntries++
if (buckets[hashed] && !excluded) {
buckets[hashed].push(entry)
} else if (buckets[hashed] && excluded) {
// skip
} else if (excluded) {
buckets[hashed] = []
buckets[hashed]._path = index.bucketPath(cache, k)
} else {
buckets[hashed] = [entry]
buckets[hashed]._path = index.bucketPath(cache, k)
}
}
}
await pMap(
Object.keys(buckets),
(key) => {
return rebuildBucket(cache, buckets[key], stats, opts)
},
{ concurrency: opts.concurrency }
)
return stats
}
async function rebuildBucket (cache, bucket, stats, opts) {
await truncate(bucket._path)
// This needs to be serialized because cacache explicitly
// lets very racy bucket conflicts clobber each other.
for (const entry of bucket) {
const content = contentPath(cache, entry.integrity)
try {
await stat(content)
await index.insert(cache, entry.key, entry.integrity, {
metadata: entry.metadata,
size: entry.size,
time: entry.time,
})
stats.totalEntries++
} catch (err) {
if (err.code === 'ENOENT') {
stats.rejectedEntries++
stats.missingContent++
} else {
throw err
}
}
}
}
function cleanTmp (cache, opts) {
opts.log.silly('verify', 'cleaning tmp directory')
return rm(path.join(cache, 'tmp'), { recursive: true, force: true })
}
async function writeVerifile (cache, opts) {
const verifile = path.join(cache, '_lastverified')
opts.log.silly('verify', 'writing verifile to ' + verifile)
return writeFile(verifile, `${Date.now()}`)
}
module.exports.lastRun = lastRun
async function lastRun (cache) {
const data = await readFile(path.join(cache, '_lastverified'), { encoding: 'utf8' })
return new Date(+data)
}