From ccea1932e9212e8ee118904f9df7564e9ffc2099 Mon Sep 17 00:00:00 2001 From: Yarchik Date: Thu, 25 Jun 2026 19:03:34 +0100 Subject: [PATCH] fix: do not mistake an EOCD signature inside the comment for the real record loadAsync used the last "PK\x05\x06" signature in the file as the end of central directory record, without validating it. When the archive comment itself contains those bytes, the false positive is picked and reading the bogus comment-length field runs past the end of the buffer, throwing "Corrupted zip ... End of data reached" on otherwise valid archives (including JSZip's own generateAsync output, which libarchive reads fine). Per APPNOTE.TXT 4.3.16 the genuine EOCD satisfies `offset + 22 + commentLength === fileLength`. Scan the candidate signatures backward and prefer the one matching this invariant, falling back to the last occurrence so archives with appended/prepended bytes keep working. --- lib/reader/ArrayReader.js | 6 ++++-- lib/reader/DataReader.js | 1 + lib/reader/StringReader.js | 5 +++-- lib/zipEntries.js | 33 ++++++++++++++++++++++++++++++++- test/asserts/load.js | 22 ++++++++++++++++++++++ 5 files changed, 62 insertions(+), 5 deletions(-) diff --git a/lib/reader/ArrayReader.js b/lib/reader/ArrayReader.js index 32a9569b..58c6a321 100644 --- a/lib/reader/ArrayReader.js +++ b/lib/reader/ArrayReader.js @@ -18,12 +18,14 @@ ArrayReader.prototype.byteAt = function(i) { /** * @see DataReader.lastIndexOfSignature */ -ArrayReader.prototype.lastIndexOfSignature = function(sig) { +ArrayReader.prototype.lastIndexOfSignature = function(sig, endIndex) { var sig0 = sig.charCodeAt(0), sig1 = sig.charCodeAt(1), sig2 = sig.charCodeAt(2), sig3 = sig.charCodeAt(3); - for (var i = this.length - 4; i >= 0; --i) { + var start = typeof endIndex === "number" ? + Math.min(endIndex + this.zero, this.length - 4) : this.length - 4; + for (var i = start; i >= 0; --i) { if (this.data[i] === sig0 && this.data[i + 1] === sig1 && this.data[i + 2] === sig2 && this.data[i + 3] === sig3) { return i - this.zero; } diff --git a/lib/reader/DataReader.js b/lib/reader/DataReader.js index b61b839b..b5fba1c4 100644 --- a/lib/reader/DataReader.js +++ b/lib/reader/DataReader.js @@ -85,6 +85,7 @@ DataReader.prototype = { /** * Find the last occurrence of a zip signature (4 bytes). * @param {string} sig the signature to find. + * @param {number} [endIndex] if given, only search at or before this index. * @return {number} the index of the last occurrence, -1 if not found. */ lastIndexOfSignature: function() { diff --git a/lib/reader/StringReader.js b/lib/reader/StringReader.js index fc90784a..0338972a 100644 --- a/lib/reader/StringReader.js +++ b/lib/reader/StringReader.js @@ -15,8 +15,9 @@ StringReader.prototype.byteAt = function(i) { /** * @see DataReader.lastIndexOfSignature */ -StringReader.prototype.lastIndexOfSignature = function(sig) { - return this.data.lastIndexOf(sig) - this.zero; +StringReader.prototype.lastIndexOfSignature = function(sig, endIndex) { + var fromIndex = typeof endIndex === "number" ? endIndex + this.zero : undefined; + return this.data.lastIndexOf(sig, fromIndex) - this.zero; }; /** * @see DataReader.readAndCheckSignature diff --git a/lib/zipEntries.js b/lib/zipEntries.js index 13cb4777..4a5e21a9 100644 --- a/lib/zipEntries.js +++ b/lib/zipEntries.js @@ -150,11 +150,42 @@ ZipEntries.prototype = { } } }, + /** + * Find the offset of the genuine "end of central directory" record. + * + * The EOCD signature ("PK\x05\x06") can legitimately appear inside the + * archive comment, so the last occurrence in the file is not necessarily + * the real record. As described in APPNOTE.TXT §4.3.16, the genuine EOCD + * satisfies `offset + 22 + commentLength === fileLength` (22 being the + * fixed size of the record). We scan the candidates backward and prefer + * one that matches this invariant. If none does (e.g. a zip with trailing + * bytes appended), we fall back to the last occurrence, keeping the + * historical behavior. + * @return {number} the offset of the EOCD record, -1 if not found. + */ + findEndOfCentral: function() { + var fileLength = this.reader.length - this.reader.zero; + var lastOffset = this.reader.lastIndexOfSignature(sig.CENTRAL_DIRECTORY_END); + var offset = lastOffset; + while (offset >= 0) { + // the comment length is a 2 bytes little-endian field located 20 + // bytes after the signature. + var commentLength = this.reader.byteAt(offset + 20) + + (this.reader.byteAt(offset + 21) << 8); + if (offset + 22 + commentLength === fileLength) { + return offset; + } + // not the genuine record (the signature is likely part of the + // comment): keep scanning backward. + offset = this.reader.lastIndexOfSignature(sig.CENTRAL_DIRECTORY_END, offset - 1); + } + return lastOffset; + }, /** * Read the end of central directory. */ readEndOfCentral: function() { - var offset = this.reader.lastIndexOfSignature(sig.CENTRAL_DIRECTORY_END); + var offset = this.findEndOfCentral(); if (offset < 0) { // Check if the content is a truncated zip or complete garbage. // A "LOCAL_FILE_HEADER" is not required at the beginning (auto diff --git a/test/asserts/load.js b/test/asserts/load.js index 395ec720..578bfc06 100644 --- a/test/asserts/load.js +++ b/test/asserts/load.js @@ -212,6 +212,28 @@ QUnit.module("load", function () { })["catch"](JSZipTestUtils.assertNoError); }); + QUnit.test("load a zip whose comment contains the end of central directory signature", function (assert) { + var done = assert.async(); + // "PK\x05\x06" is the end of central directory signature: it can + // legitimately appear inside the archive comment and must not be + // mistaken for the real record (APPNOTE.TXT §4.3.16). + var comment = "\x50\x4b\x05\x06" + "Z".repeat(18); + var zip = new JSZip(); + zip.file("a.txt", "hi"); + zip.generateAsync({type: "binarystring", comment: comment}) + .then(function (generated) { + return JSZip.loadAsync(generated); + }) + .then(function (loaded) { + assert.equal(loaded.comment, comment, "the archive comment was correctly read."); + return loaded.file("a.txt").async("string"); + }) + .then(function (content) { + assert.equal(content, "hi", "the zip was correctly read."); + done(); + })["catch"](JSZipTestUtils.assertNoError); + }); + // zip -0 extra_attributes.zip Hello.txt JSZipTestUtils.testZipFile("zip with extra attributes", "ref/extra_attributes.zip", function(assert, file) { var done = assert.async();