/
editor-context.ts
315 lines (288 loc) · 11.1 KB
/
editor-context.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
import path from 'path'
import fuzzysort from 'fuzzysort'
import throttle from 'lodash/throttle'
import * as vscode from 'vscode'
import {
type ContextFileSource,
type ContextFileType,
type ContextItem,
type ContextItemFile,
type ContextItemSymbol,
type Editor,
MAX_CURRENT_FILE_TOKENS,
type SymbolKind,
displayPath,
fetchContentForURLContextItem,
isCodyIgnoredFile,
isDefined,
isURLContextItem,
isWindows,
} from '@sourcegraph/cody-shared'
import {
ContextItemSource,
type ContextItemWithContent,
} from '@sourcegraph/cody-shared/src/codebase-context/messages'
import { CHARS_PER_TOKEN } from '@sourcegraph/cody-shared/src/prompt/constants'
import { getOpenTabsUris } from '.'
import { isURLContextFeatureFlagEnabled } from '../../chat/context/chatContext'
import { toVSCodeRange } from '../../common/range'
import { findWorkspaceFiles } from './findWorkspaceFiles'
// Some matches we don't want to ignore because they might be valid code (for example `bin/` in Dart)
// but could also be junk (`bin/` in .NET). If a file path contains a segment matching any of these
// items it will be ranked low unless the users query contains the exact segment.
const lowScoringPathSegments = ['bin']
/**
* This is expensive for large repos (e.g. Chromium), so we only do it max once every 10 seconds.
*
* We do NOT allow passing a cancellation token because that is highly likely to result in buggy
* behavior for a throttled function. If the first call to {@link findWorkspaceFiles} is cancelled,
* we still want it to complete so that its results are cached for subsequent calls. If we cancel
* and it throws an exception, then we lose all work we did until the cancellation and could
* potentially swallow errors and return (and cache) incomplete data.
*/
const throttledFindFiles = throttle(() => findWorkspaceFiles(), 10000)
/**
* Searches all workspaces for files matching the given string. VS Code doesn't
* provide an API for fuzzy file searching, only precise globs, so we recreate
* it by getting a list of all files across all workspaces and using fuzzysort.
* Large files over 1MB are filtered.
*/
export async function getFileContextFiles(
query: string,
maxResults: number,
charsLimit?: number
): Promise<ContextItemFile[]> {
if (!query.trim()) {
return []
}
const uris = await throttledFindFiles()
if (!uris) {
return []
}
if (isWindows()) {
// On Windows, if the user has typed forward slashes, map them to backslashes before
// running the search so they match the real paths.
query = query.replaceAll('/', '\\')
}
// Add on the relative URIs for search, so we only search the visible part
// of the path and not the full FS path.
const urisWithRelative = uris.map(uri => ({ uri, relative: displayPath(uri) }))
const results = fuzzysort.go(query, urisWithRelative, {
key: 'relative',
limit: maxResults,
// We add a threshold for performance as per fuzzysort’s
// recommendations. Testing with sg/sg path strings, somewhere over 10k
// threshold is where it seems to return results that make no sense. VS
// Code’s own fuzzy finder seems to cap out much higher. To be safer and
// to account for longer paths from even deeper source trees we use
// 100k. We may want to revisit this number if we get reports of missing
// file results from very large repos.
threshold: -100000,
})
// Apply a penalty for segments that are in the low scoring list.
const adjustedResults = [...results].map(result => {
const segments = result.obj.uri.fsPath.split(path.sep)
for (const lowScoringPathSegment of lowScoringPathSegments) {
if (segments.includes(lowScoringPathSegment) && !query.includes(lowScoringPathSegment)) {
return {
...result,
score: result.score - 100000,
}
}
}
return result
})
// fuzzysort can return results in different order for the same query if
// they have the same score :( So we do this hacky post-limit sorting (first
// by score, then by path) to ensure the order stays the same.
const sortedResults = adjustedResults
.sort((a, b) => {
return (
b.score - a.score ||
new Intl.Collator(undefined, { numeric: true }).compare(a.obj.uri.path, b.obj.uri.path)
)
})
.flatMap(result => createContextFileFromUri(result.obj.uri, ContextItemSource.User, 'file'))
// TODO(toolmantim): Add fuzzysort.highlight data to the result so we can show it in the UI
return await filterLargeFiles(sortedResults, charsLimit)
}
export async function getSymbolContextFiles(
query: string,
maxResults = 20
): Promise<ContextItemSymbol[]> {
if (!query.trim()) {
return []
}
// doesn't support cancellation tokens :(
const queryResults = await vscode.commands.executeCommand<vscode.SymbolInformation[]>(
'vscode.executeWorkspaceSymbolProvider',
query
)
const relevantQueryResults = queryResults?.filter(
symbol =>
(symbol.kind === vscode.SymbolKind.Function ||
symbol.kind === vscode.SymbolKind.Method ||
symbol.kind === vscode.SymbolKind.Class ||
symbol.kind === vscode.SymbolKind.Interface ||
symbol.kind === vscode.SymbolKind.Enum ||
symbol.kind === vscode.SymbolKind.Struct ||
symbol.kind === vscode.SymbolKind.Constant ||
// in TS an export const is considered a variable
symbol.kind === vscode.SymbolKind.Variable) &&
// TODO(toolmantim): Remove once https://github.com/microsoft/vscode/pull/192798 is in use (test: do a symbol search and check no symbols exist from node_modules)
!symbol.location?.uri?.fsPath.includes('node_modules/')
)
const results = fuzzysort.go(query, relevantQueryResults, {
key: 'name',
limit: maxResults,
})
// TODO(toolmantim): Add fuzzysort.highlight data to the result so we can show it in the UI
const symbols = results.map(result => result.obj)
if (!symbols.length) {
return []
}
const matches = []
for (const symbol of symbols) {
const contextFile = createContextFileFromUri(
symbol.location.uri,
ContextItemSource.User,
'symbol',
symbol.location.range,
// TODO(toolmantim): Update the kinds to match above
symbol.kind === vscode.SymbolKind.Class ? 'class' : 'function',
symbol.name
)
matches.push(contextFile)
}
return matches.flatMap(match => match)
}
/**
* Gets context files for each open editor tab in VS Code.
* Filters out large files over 1MB to avoid expensive parsing.
*/
export async function getOpenTabsContextFile(charsLimit?: number): Promise<ContextItemFile[]> {
return await filterLargeFiles(
getOpenTabsUris()
.filter(uri => !isCodyIgnoredFile(uri))
.flatMap(uri => createContextFileFromUri(uri, ContextItemSource.User, 'file')),
charsLimit
)
}
function createContextFileFromUri(
uri: vscode.Uri,
source: ContextFileSource,
type: 'symbol',
selectionRange: vscode.Range,
kind: SymbolKind,
symbolName: string
): ContextItemSymbol[]
function createContextFileFromUri(
uri: vscode.Uri,
source: ContextFileSource,
type: 'file',
selectionRange?: vscode.Range
): ContextItemFile[]
function createContextFileFromUri(
uri: vscode.Uri,
source: ContextFileSource,
type: ContextFileType,
selectionRange?: vscode.Range,
kind?: SymbolKind,
symbolName?: string
): ContextItem[] {
if (isCodyIgnoredFile(uri)) {
return []
}
const range = selectionRange ? createContextFileRange(selectionRange) : selectionRange
return [
type === 'file'
? {
type,
uri,
range,
source,
}
: {
type,
symbolName: symbolName!,
uri,
range,
source,
kind: kind!,
},
]
}
function createContextFileRange(selectionRange: vscode.Range): ContextItem['range'] {
return {
start: {
line: selectionRange.start.line,
character: selectionRange.start.character,
},
end: {
line: selectionRange.end.line,
character: selectionRange.end.character,
},
}
}
/**
* Filters the given context files to remove files larger than 1MB and non-text files.
* Sets {@link ContextItemFile.isTooLarge} for files contains more characters than the token limit.
*/
export async function filterLargeFiles(
contextFiles: ContextItemFile[],
charsLimit = CHARS_PER_TOKEN * MAX_CURRENT_FILE_TOKENS
): Promise<ContextItemFile[]> {
const filtered = []
for (const cf of contextFiles) {
// Remove file larger than 1MB and non-text files
// NOTE: Sourcegraph search only includes files up to 1MB
const fileStat = await vscode.workspace.fs.stat(cf.uri)?.then(
stat => stat,
error => undefined
)
if (cf.type !== 'file' || fileStat?.type !== vscode.FileType.File || fileStat?.size > 1000000) {
continue
}
// Check if file contains more characters than the token limit based on fileStat.size
// and set {@link ContextItemFile.isTooLarge} for webview to display file size
// warning.
if (fileStat.size > charsLimit) {
cf.isTooLarge = true
}
filtered.push(cf)
}
return filtered
}
export async function fillInContextItemContent(
editor: Editor,
items: ContextItem[]
): Promise<ContextItemWithContent[]> {
return (
await Promise.all(
items.map(async (item: ContextItem): Promise<ContextItemWithContent | null> => {
let content = item.content
if (!item.content) {
try {
if (isURLContextItem(item)) {
if (await isURLContextFeatureFlagEnabled()) {
content =
(await fetchContentForURLContextItem(item.uri.toString())) ?? ''
}
} else {
content = await editor.getTextEditorContentForFile(
item.uri,
toVSCodeRange(item.range)
)
}
} catch (error) {
void vscode.window.showErrorMessage(
`Cody could not include context from ${item.uri}. (Reason: ${error})`
)
return null
}
}
return { ...item, content: content! }
})
)
).filter(isDefined)
}