Skip to content

Commit 747ff45

Browse files
committed
Save downloaded DB archives to disk before unzipping
This fixes two classes of DBs that can't be installed directly from downloading: 1. DBs whose central directories do not align with their file headers. We need to download and save the entire archive before we can read the central directory and use that to guide the unzipping. 2. Large DBs require too much memory so can't be downloaded and unzipped in a single stream. We also add proper progress notifications to the download progress monitor so users are aware of how many more MBs are left to download. It's not yet possible to do the same for unzipping using the current unzipper library, since unzipping using the central directory does not expose a stream.
1 parent 370dbcb commit 747ff45

5 files changed

Lines changed: 285 additions & 175 deletions

File tree

extensions/ql-vscode/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
- Fix bug when removing databases where sometimes the source folder would not be removed from the workspace or the database files would not be removed from the workspace storage location. [#692](https://github.com/github/vscode-codeql/pull/692)
77
- Query results with no string representation will now be displayed with placeholder text in query results. Previously, they were omitted. [#694](https://github.com/github/vscode-codeql/pull/694)
88
- Add a label for the language of a database in the databases view. This will only take effect for new databases created with the CodeQL CLI v2.4.1 or later. [#697](https://github.com/github/vscode-codeql/pull/697)
9+
- Fix a bug where it is not possible to download some database archives. This fix specifically addresses large archives and archives whose central directories do not align with file headers. [#700](https://github.com/github/vscode-codeql/pull/700)
910

1011
## 1.3.7 - 24 November 2020
1112

extensions/ql-vscode/src/databaseFetcher.ts

Lines changed: 50 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@ import * as path from 'path';
1212

1313
import { DatabaseManager, DatabaseItem } from './databases';
1414
import {
15+
reportStreamProgress,
1516
ProgressCallback,
1617
showAndLogInformationMessage,
1718
} from './helpers';
1819
import { logger } from './logging';
20+
import { tmpDir } from './run-queries';
1921

2022
/**
2123
* Prompts a user to fetch a database from a remote location. Database is assumed to be an archive file.
@@ -164,7 +166,7 @@ async function databaseArchiveFetcher(
164166
const unzipPath = await getStorageFolder(storagePath, databaseUrl);
165167

166168
if (isFile(databaseUrl)) {
167-
await readAndUnzip(databaseUrl, unzipPath);
169+
await readAndUnzip(databaseUrl, unzipPath, progress);
168170
} else {
169171
await fetchAndUnzip(databaseUrl, unzipPath, progress);
170172
}
@@ -237,48 +239,67 @@ function validateHttpsUrl(databaseUrl: string) {
237239
}
238240
}
239241

240-
async function readAndUnzip(databaseUrl: string, unzipPath: string) {
241-
const databaseFile = Uri.parse(databaseUrl).fsPath;
242-
const directory = await unzipper.Open.file(databaseFile);
242+
async function readAndUnzip(
243+
zipUrl: string,
244+
unzipPath: string,
245+
progress?: ProgressCallback
246+
) {
247+
// TODO: Providing progress as the file is unzipped is currently blocked
248+
// on https://github.com/ZJONSSON/node-unzipper/issues/222
249+
const zipFile = Uri.parse(zipUrl).fsPath;
250+
progress?.({
251+
maxStep: 10,
252+
step: 9,
253+
message: `Unzipping into ${path.basename(unzipPath)}`
254+
});
255+
// Must get the zip central directory since streaming the
256+
// zip contents may not have correct local file headers.
257+
// Instead, we can only rely on the central directory.
258+
const directory = await unzipper.Open.file(zipFile);
243259
await directory.extract({ path: unzipPath });
244260
}
245261

246262
async function fetchAndUnzip(
247263
databaseUrl: string,
248264
unzipPath: string,
249-
progressCallback?: ProgressCallback
265+
progress?: ProgressCallback
250266
) {
251-
const response = await fetch(databaseUrl);
252-
253-
await checkForFailingResponse(response);
267+
// Although it is possible to download and stream directly to an unzipped directory,
268+
// we need to avoid this for two reasons. The central directory is located at the
269+
// end of the zip file. It is the source of truth of the content locations. Individual
270+
// file headers may be incorrect. Additionally, saving to file first will reduce memory
271+
// pressure compared with unzipping while downloading the archive.
254272

255-
const unzipStream = unzipper.Extract({
256-
path: unzipPath,
257-
});
273+
const archivePath = path.join(tmpDir.name, `archive-${Date.now()}.zip`);
258274

259-
progressCallback?.({
275+
progress?.({
260276
maxStep: 3,
261-
message: 'Unzipping database',
262-
step: 2,
263-
});
264-
await new Promise((resolve, reject) => {
265-
const handler = (err: Error) => {
266-
if (err.message.startsWith('invalid signature')) {
267-
reject(new Error('Not a valid archive.'));
268-
} else {
269-
reject(err);
270-
}
271-
};
272-
response.body.on('error', handler);
273-
unzipStream.on('error', handler);
274-
unzipStream.on('close', resolve);
275-
response.body.pipe(unzipStream);
277+
message: 'Downloading database',
278+
step: 1,
276279
});
280+
281+
const response = await checkForFailingResponse(await fetch(databaseUrl));
282+
const archiveFileStream = fs.createWriteStream(archivePath);
283+
284+
const contentLength = response.headers.get('content-length');
285+
const totalNumBytes = contentLength ? parseInt(contentLength, 10) : undefined;
286+
reportStreamProgress(response.body, 'Downloading database', totalNumBytes, progress);
287+
288+
await new Promise((resolve, reject) =>
289+
response.body.pipe(archiveFileStream)
290+
.on('finish', resolve)
291+
.on('error', reject)
292+
);
293+
294+
await readAndUnzip(Uri.file(archivePath).toString(true), unzipPath, progress);
295+
296+
// remove archivePath eagerly since these archives can be large.
297+
await fs.remove(archivePath);
277298
}
278299

279-
async function checkForFailingResponse(response: Response): Promise<void | never> {
300+
async function checkForFailingResponse(response: Response): Promise<Response | never> {
280301
if (response.ok) {
281-
return;
302+
return response;
282303
}
283304

284305
// An error downloading the database. Attempt to extract the resaon behind it.

extensions/ql-vscode/src/distribution.ts

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -337,27 +337,8 @@ class ExtensionSpecificDistributionManager {
337337
const archiveFile = fs.createWriteStream(archivePath);
338338

339339
const contentLength = assetStream.headers.get('content-length');
340-
let numBytesDownloaded = 0;
341-
342-
if (progressCallback && contentLength !== null) {
343-
const totalNumBytes = parseInt(contentLength, 10);
344-
const bytesToDisplayMB = (numBytes: number): string => `${(numBytes / (1024 * 1024)).toFixed(1)} MB`;
345-
const updateProgress = (): void => {
346-
progressCallback({
347-
step: numBytesDownloaded,
348-
maxStep: totalNumBytes,
349-
message: `Downloading CodeQL CLI… [${bytesToDisplayMB(numBytesDownloaded)} of ${bytesToDisplayMB(totalNumBytes)}]`,
350-
});
351-
};
352-
353-
// Display the progress straight away rather than waiting for the first chunk.
354-
updateProgress();
355-
356-
assetStream.body.on('data', data => {
357-
numBytesDownloaded += data.length;
358-
updateProgress();
359-
});
360-
}
340+
const totalNumBytes = contentLength ? parseInt(contentLength, 10) : undefined;
341+
helpers.reportStreamProgress(assetStream.body, 'Downloading CodeQL CLI…', totalNumBytes, progressCallback);
361342

362343
await new Promise((resolve, reject) =>
363344
assetStream.body.pipe(archiveFile)

extensions/ql-vscode/src/helpers.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -530,3 +530,46 @@ export async function isLikelyDatabaseRoot(maybeRoot: string) {
530530
export function isLikelyDbLanguageFolder(dbPath: string) {
531531
return !!path.basename(dbPath).startsWith('db-');
532532
}
533+
534+
535+
/**
536+
* Displays a progress monitor that indicates how much progess has been made
537+
* reading from a stream.
538+
*
539+
* @param readable The stream to read progress from
540+
* @param messagePrefix A prefix for displaying the message
541+
* @param totalNumBytes Total number of bytes in this stream
542+
* @param progress The progress callback used to set messages
543+
*/
544+
export function reportStreamProgress(
545+
readable: NodeJS.ReadableStream,
546+
messagePrefix: string,
547+
totalNumBytes?: number,
548+
progress?: ProgressCallback
549+
) {
550+
if (progress && totalNumBytes) {
551+
let numBytesDownloaded = 0;
552+
const bytesToDisplayMB = (numBytes: number): string => `${(numBytes / (1024 * 1024)).toFixed(1)} MB`;
553+
const updateProgress = () => {
554+
progress({
555+
step: numBytesDownloaded,
556+
maxStep: totalNumBytes,
557+
message: `${messagePrefix} [${bytesToDisplayMB(numBytesDownloaded)} of ${bytesToDisplayMB(totalNumBytes)}]`,
558+
});
559+
};
560+
561+
// Display the progress straight away rather than waiting for the first chunk.
562+
updateProgress();
563+
564+
readable.on('data', data => {
565+
numBytesDownloaded += data.length;
566+
updateProgress();
567+
});
568+
} else if (progress) {
569+
progress({
570+
step: 1,
571+
maxStep: 2,
572+
message: `${messagePrefix} (Size unknown)`,
573+
});
574+
}
575+
}

0 commit comments

Comments
 (0)