-
Notifications
You must be signed in to change notification settings - Fork 226
Add LLM functionality using auto-model V2 #2633
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
84de8ad
Add creation of auto-model request V2
koesie10 de5dbea
Use promisify for gzip
koesie10 32c44cd
Rename Options to AutoModelQueriesOptions
koesie10 a79753d
Move all runAutoModelQuery arguments into the options object
koesie10 546f668
Move createMockUri to mocking helpers
koesie10 db06558
Merge remote-tracking branch 'origin/main' into koesie10/automodel-v2
koesie10 bebe130
Do not use mocked URI in locations test
koesie10 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| import { promisify } from "util"; | ||
| import { gzip, gunzip } from "zlib"; | ||
|
|
||
| /** | ||
| * Promisified version of zlib.gzip | ||
| */ | ||
| export const gzipEncode = promisify(gzip); | ||
|
|
||
| /** | ||
| * Promisified version of zlib.gunzip | ||
| */ | ||
| export const gzipDecode = promisify(gunzip); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
34 changes: 34 additions & 0 deletions
34
extensions/ql-vscode/src/data-extensions-editor/auto-model-api-v2.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| import { Credentials } from "../common/authentication"; | ||
| import { OctokitResponse } from "@octokit/types"; | ||
|
|
||
| export enum AutomodelMode { | ||
| Unspecified = "AUTOMODEL_MODE_UNSPECIFIED", | ||
| Framework = "AUTOMODEL_MODE_FRAMEWORK", | ||
| Application = "AUTOMODEL_MODE_APPLICATION", | ||
| } | ||
|
|
||
| export interface ModelRequest { | ||
| mode: AutomodelMode; | ||
| // Base64-encoded GZIP-compressed SARIF log | ||
| candidates: string; | ||
| } | ||
|
|
||
| export interface ModelResponse { | ||
| models: string; | ||
| } | ||
|
|
||
| export async function autoModelV2( | ||
| credentials: Credentials, | ||
| request: ModelRequest, | ||
| ): Promise<ModelResponse> { | ||
| const octokit = await credentials.getOctokit(); | ||
|
|
||
| const response: OctokitResponse<ModelResponse> = await octokit.request( | ||
| "POST /repos/github/codeql/code-scanning/codeql/auto-model", | ||
| { | ||
| data: request, | ||
| }, | ||
| ); | ||
|
|
||
| return response.data; | ||
| } |
230 changes: 230 additions & 0 deletions
230
extensions/ql-vscode/src/data-extensions-editor/auto-model-codeml-queries.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,230 @@ | ||
| import { CodeQLCliServer, SourceInfo } from "../codeql-cli/cli"; | ||
| import { QueryRunner } from "../query-server"; | ||
| import { DatabaseItem } from "../databases/local-databases"; | ||
| import { ProgressCallback } from "../common/vscode/progress"; | ||
| import * as Sarif from "sarif"; | ||
| import { qlpackOfDatabase, resolveQueries } from "../local-queries"; | ||
| import { extLogger } from "../common/logging/vscode"; | ||
| import { Mode } from "./shared/mode"; | ||
| import { QlPacksForLanguage } from "../databases/qlpack"; | ||
| import { createLockFileForStandardQuery } from "../local-queries/standard-queries"; | ||
| import { CancellationToken, CancellationTokenSource } from "vscode"; | ||
| import { getOnDiskWorkspaceFolders } from "../common/vscode/workspace-folders"; | ||
| import { showAndLogExceptionWithTelemetry, TeeLogger } from "../common/logging"; | ||
| import { QueryResultType } from "../query-server/new-messages"; | ||
| import { telemetryListener } from "../common/vscode/telemetry"; | ||
| import { redactableError } from "../common/errors"; | ||
| import { interpretResultsSarif } from "../query-results"; | ||
| import { join } from "path"; | ||
| import { assertNever } from "../common/helpers-pure"; | ||
|
|
||
| type AutoModelQueryOptions = { | ||
| queryTag: string; | ||
| mode: Mode; | ||
| cliServer: CodeQLCliServer; | ||
| queryRunner: QueryRunner; | ||
| databaseItem: DatabaseItem; | ||
| qlpack: QlPacksForLanguage; | ||
| sourceInfo: SourceInfo | undefined; | ||
| extensionPacks: string[]; | ||
| queryStorageDir: string; | ||
|
|
||
| progress: ProgressCallback; | ||
| token: CancellationToken; | ||
| }; | ||
|
|
||
| function modeTag(mode: Mode): string { | ||
| switch (mode) { | ||
| case Mode.Application: | ||
| return "application-mode"; | ||
| case Mode.Framework: | ||
| return "framework-mode"; | ||
| default: | ||
| assertNever(mode); | ||
| } | ||
| } | ||
|
|
||
| async function runAutoModelQuery({ | ||
| queryTag, | ||
| mode, | ||
| cliServer, | ||
| queryRunner, | ||
| databaseItem, | ||
| qlpack, | ||
| sourceInfo, | ||
| extensionPacks, | ||
| queryStorageDir, | ||
| progress, | ||
| token, | ||
| }: AutoModelQueryOptions): Promise<Sarif.Log | undefined> { | ||
| // First, resolve the query that we want to run. | ||
| // All queries are tagged like this: | ||
| // internal extract automodel <mode> <queryTag> | ||
| // Example: internal extract automodel framework-mode candidates | ||
| const queries = await resolveQueries( | ||
| cliServer, | ||
| qlpack, | ||
| `Extract automodel ${queryTag}`, | ||
| { | ||
| kind: "problem", | ||
| "tags contain all": ["automodel", modeTag(mode), ...queryTag.split(" ")], | ||
| }, | ||
| ); | ||
| if (queries.length > 1) { | ||
| throw new Error( | ||
| `Found multiple auto model queries for ${mode} ${queryTag}. Can't continue`, | ||
| ); | ||
| } | ||
| if (queries.length === 0) { | ||
| throw new Error( | ||
| `Did not found any auto model queries for ${mode} ${queryTag}. Can't continue`, | ||
| ); | ||
| } | ||
|
|
||
| const queryPath = queries[0]; | ||
| const { cleanup: cleanupLockFile } = await createLockFileForStandardQuery( | ||
| cliServer, | ||
| queryPath, | ||
| ); | ||
|
|
||
| // Get metadata for the query. This is required to interpret the results. We already know the kind is problem | ||
| // (because of the constraint in resolveQueries), so we don't need any more checks on the metadata. | ||
| const metadata = await cliServer.resolveMetadata(queryPath); | ||
|
|
||
| const queryRun = queryRunner.createQueryRun( | ||
| databaseItem.databaseUri.fsPath, | ||
| { | ||
| queryPath, | ||
| quickEvalPosition: undefined, | ||
| quickEvalCountOnly: false, | ||
| }, | ||
| false, | ||
| getOnDiskWorkspaceFolders(), | ||
| extensionPacks, | ||
| queryStorageDir, | ||
| undefined, | ||
| undefined, | ||
| ); | ||
|
|
||
| const completedQuery = await queryRun.evaluate( | ||
| progress, | ||
| token, | ||
| new TeeLogger(queryRunner.logger, queryRun.outputDir.logPath), | ||
| ); | ||
|
|
||
| await cleanupLockFile?.(); | ||
|
|
||
| if (completedQuery.resultType !== QueryResultType.SUCCESS) { | ||
| void showAndLogExceptionWithTelemetry( | ||
| extLogger, | ||
| telemetryListener, | ||
| redactableError`Auto-model query ${queryTag} failed: ${ | ||
| completedQuery.message ?? "No message" | ||
| }`, | ||
| ); | ||
| return; | ||
| } | ||
|
|
||
| const interpretedResultsPath = join( | ||
| queryStorageDir, | ||
| `interpreted-results-${queryTag.replaceAll(" ", "-")}-${queryRun.id}.sarif`, | ||
| ); | ||
|
|
||
| // eslint-disable-next-line @typescript-eslint/no-unused-vars -- We only need the actual SARIF data, not the extra fields added by SarifInterpretationData | ||
| const { t, sortState, ...sarif } = await interpretResultsSarif( | ||
| cliServer, | ||
| metadata, | ||
| { | ||
| resultsPath: completedQuery.outputDir.bqrsPath, | ||
| interpretedResultsPath, | ||
| }, | ||
| sourceInfo, | ||
| ["--sarif-add-snippets"], | ||
| ); | ||
|
|
||
| return sarif; | ||
| } | ||
|
|
||
| type AutoModelQueriesOptions = { | ||
| mode: Mode; | ||
| cliServer: CodeQLCliServer; | ||
| queryRunner: QueryRunner; | ||
| databaseItem: DatabaseItem; | ||
| queryStorageDir: string; | ||
|
|
||
| progress: ProgressCallback; | ||
| }; | ||
|
|
||
| export type AutoModelQueriesResult = { | ||
| candidates: Sarif.Log; | ||
| }; | ||
|
|
||
| export async function runAutoModelQueries({ | ||
| mode, | ||
| cliServer, | ||
| queryRunner, | ||
| databaseItem, | ||
| queryStorageDir, | ||
| progress, | ||
| }: AutoModelQueriesOptions): Promise<AutoModelQueriesResult | undefined> { | ||
| // maxStep for this part is 1500 | ||
| const maxStep = 1500; | ||
|
|
||
| const cancellationTokenSource = new CancellationTokenSource(); | ||
|
|
||
| const qlpack = await qlpackOfDatabase(cliServer, databaseItem); | ||
|
|
||
| // CodeQL needs to have access to the database to be able to retrieve the | ||
| // snippets from it. The source location prefix is used to determine the | ||
| // base path of the database. | ||
| const sourceLocationPrefix = await databaseItem.getSourceLocationPrefix( | ||
| cliServer, | ||
| ); | ||
| const sourceArchiveUri = databaseItem.sourceArchive; | ||
| const sourceInfo = | ||
| sourceArchiveUri === undefined | ||
| ? undefined | ||
| : { | ||
| sourceArchive: sourceArchiveUri.fsPath, | ||
| sourceLocationPrefix, | ||
| }; | ||
|
|
||
| const additionalPacks = getOnDiskWorkspaceFolders(); | ||
| const extensionPacks = Object.keys( | ||
| await cliServer.resolveQlpacks(additionalPacks, true), | ||
| ); | ||
|
|
||
| progress({ | ||
| step: 0, | ||
| maxStep, | ||
| message: "Finding candidates and examples", | ||
| }); | ||
|
|
||
| const candidates = await runAutoModelQuery({ | ||
| mode, | ||
| queryTag: "candidates", | ||
| cliServer, | ||
| queryRunner, | ||
| databaseItem, | ||
| qlpack, | ||
| sourceInfo, | ||
| extensionPacks, | ||
| queryStorageDir, | ||
| progress: (update) => { | ||
| progress({ | ||
| step: update.step, | ||
| maxStep, | ||
| message: "Finding candidates and examples", | ||
| }); | ||
| }, | ||
| token: cancellationTokenSource.token, | ||
| }); | ||
|
|
||
| if (!candidates) { | ||
| return undefined; | ||
| } | ||
|
|
||
| return { | ||
| candidates, | ||
| }; | ||
| } | ||
40 changes: 40 additions & 0 deletions
40
extensions/ql-vscode/src/data-extensions-editor/auto-model-v2.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| import { AutomodelMode, ModelRequest } from "./auto-model-api-v2"; | ||
| import { Mode } from "./shared/mode"; | ||
| import { AutoModelQueriesResult } from "./auto-model-codeml-queries"; | ||
| import { assertNever } from "../common/helpers-pure"; | ||
| import * as Sarif from "sarif"; | ||
| import { gzipEncode } from "../common/zlib"; | ||
|
|
||
| /** | ||
| * Encode a SARIF log to the format expected by the server: JSON, GZIP-compressed, base64-encoded | ||
| * @param log SARIF log to encode | ||
| * @returns base64-encoded GZIP-compressed SARIF log | ||
| */ | ||
| export async function encodeSarif(log: Sarif.Log): Promise<string> { | ||
| const json = JSON.stringify(log); | ||
| const buffer = Buffer.from(json, "utf-8"); | ||
| const compressed = await gzipEncode(buffer); | ||
| return compressed.toString("base64"); | ||
| } | ||
|
|
||
| export async function createAutoModelV2Request( | ||
| mode: Mode, | ||
| result: AutoModelQueriesResult, | ||
| ): Promise<ModelRequest> { | ||
| let requestMode: AutomodelMode; | ||
| switch (mode) { | ||
| case Mode.Application: | ||
| requestMode = AutomodelMode.Application; | ||
| break; | ||
| case Mode.Framework: | ||
| requestMode = AutomodelMode.Framework; | ||
| break; | ||
| default: | ||
| assertNever(mode); | ||
| } | ||
|
|
||
| return { | ||
| mode: requestMode, | ||
| candidates: await encodeSarif(result.candidates), | ||
| }; | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.