Skip to content

Commit 39e0b95

Browse files
authored
Fix push/pull directory from DVC tracked tree (#1781)
* use cached outs from experiments data to identify tracked data * rename internal variables * rename transform function * add integration test for using the exp show data
1 parent 22ac84a commit 39e0b95

File tree

9 files changed

+276
-92
lines changed

9 files changed

+276
-92
lines changed

extension/src/cli/reader.ts

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,12 @@ export interface ValueTreeOrError {
5656
error?: { type: string; msg: string }
5757
}
5858

59-
export interface ValueTreeRoot {
60-
[filename: string]: ValueTreeOrError
59+
type RelPathObject<T> = {
60+
[relPath: string]: T
6161
}
6262

63+
export type ValueTreeRoot = RelPathObject<ValueTreeOrError>
64+
6365
interface ValueTreeNode {
6466
[key: string]: Value | ValueTree
6567
}
@@ -76,18 +78,14 @@ export interface BaseExperimentFields {
7678
checkpoint_parent?: string
7779
}
7880

79-
type DepsDetails = { hash: string; size: number; nfiles: null | number }
80-
type OutsDetails = DepsDetails & { use_cache: boolean; is_data_source: boolean }
81-
82-
interface OutsOrDepsDetails {
83-
[filename: string]: DepsDetails | OutsDetails
84-
}
81+
type Dep = { hash: string; size: number; nfiles: null | number }
82+
type Out = Dep & { use_cache: boolean; is_data_source: boolean }
8583

8684
export interface ExperimentFields extends BaseExperimentFields {
8785
params?: ValueTreeRoot
8886
metrics?: ValueTreeRoot
89-
deps?: OutsOrDepsDetails
90-
outs?: OutsOrDepsDetails
87+
deps?: RelPathObject<Dep>
88+
outs?: RelPathObject<Out>
9189
}
9290

9391
export interface ExperimentFieldsOrError {

extension/src/experiments/workspace.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { BaseWorkspaceWebviews } from '../webview/workspace'
99
import { WorkspacePlots } from '../plots/workspace'
1010
import { Title } from '../vscode/title'
1111
import { setContextValue } from '../vscode/context'
12+
import { WorkspaceRepositories } from '../repository/workspace'
1213

1314
export class WorkspaceExperiments extends BaseWorkspaceWebviews<
1415
Experiments,
@@ -54,9 +55,9 @@ export class WorkspaceExperiments extends BaseWorkspaceWebviews<
5455
)
5556
}
5657

57-
public linkRepositories(workspacePlots: WorkspacePlots) {
58+
public linkRepositories(workspace: WorkspacePlots | WorkspaceRepositories) {
5859
for (const [dvcRoot, repository] of Object.entries(this.repositories)) {
59-
workspacePlots.getRepository(dvcRoot).setExperiments(repository)
60+
workspace.getRepository(dvcRoot).setExperiments(repository)
6061
}
6162
}
6263

extension/src/extension.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,7 @@ export class Extension extends Disposable implements IExtension {
359359
])
360360

361361
this.experiments.linkRepositories(this.plots)
362+
this.experiments.linkRepositories(this.repositories)
362363

363364
return Promise.all([
364365
this.repositories.isReady(),

extension/src/fileSystem/tree.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ import {
1717
InternalCommands
1818
} from '../commands/internal'
1919
import { tryThenMaybeForce } from '../cli/actions'
20-
import { Flag } from '../cli/constants'
2120
import { RegisteredCliCommands, RegisteredCommands } from '../commands/external'
2221
import { sendViewOpenedTelemetryEvent } from '../telemetry'
2322
import { EventName } from '../telemetry/constants'
@@ -27,7 +26,7 @@ import { warnOfConsequences } from '../vscode/modal'
2726
import { Response } from '../vscode/response'
2827
import { Resource } from '../repository/commands'
2928
import { WorkspaceRepositories } from '../repository/workspace'
30-
import { PathItem } from '../repository/model/collect'
29+
import { collectTrackedPaths, PathItem } from '../repository/model/collect'
3130
import { Title } from '../vscode/title'
3231
import { Disposable } from '../class/dispose'
3332

@@ -242,12 +241,13 @@ export class TrackedExplorerTree
242241
}
243242

244243
private tryThenForce(commandId: CommandId) {
245-
return ({ dvcRoot, resourceUri, isTracked }: PathItem) => {
246-
const relPath = relativeWithUri(dvcRoot, resourceUri)
247-
const args = [dvcRoot, relPath]
248-
if (!isTracked) {
249-
args.push(Flag.RECURSIVE)
250-
}
244+
return async (pathItem: PathItem) => {
245+
const { dvcRoot } = pathItem
246+
const tracked = await collectTrackedPaths(pathItem, (path: string) =>
247+
this.getRepoChildren(dvcRoot, path)
248+
)
249+
const args = [dvcRoot, ...tracked.sort()]
250+
251251
return tryThenMaybeForce(this.internalCommands, commandId, ...args)
252252
}
253253
}

extension/src/repository/index.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { RepositoryModel } from './model'
55
import { SourceControlManagement } from './sourceControlManagement'
66
import { InternalCommands } from '../commands/internal'
77
import { DeferredDisposable } from '../class/deferred'
8+
import { Experiments } from '../experiments'
89

910
export const RepositoryScale = {
1011
TRACKED: 'tracked'
@@ -63,6 +64,16 @@ export class Repository extends DeferredDisposable {
6364
return { tracked: this.getState().tracked.size }
6465
}
6566

67+
public setExperiments(experiments: Experiments) {
68+
this.dispose.track(
69+
experiments.onDidChangeExperiments(data => {
70+
if (data) {
71+
this.model.transformAndSetExperiments(data)
72+
}
73+
})
74+
)
75+
}
76+
6677
private async initialize() {
6778
this.dispose.track(
6879
this.data.onDidUpdate(data => {

extension/src/repository/model/collect.test.ts

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,25 @@ describe('collectTree', () => {
99
const makeAbsPath = (...paths: string[]): string => makeUri(...paths).fsPath
1010

1111
it('should transform recursive list output into a tree', () => {
12-
const paths = [
13-
join('data', 'MNIST', 'raw', 't10k-images-idx3-ubyte'),
14-
join('data', 'MNIST', 'raw', 't10k-images-idx3-ubyte.gz'),
15-
join('data', 'MNIST', 'raw', 't10k-labels-idx1-ubyte'),
16-
join('data', 'MNIST', 'raw', 't10k-labels-idx1-ubyte.gz'),
17-
join('data', 'MNIST', 'raw', 'train-images-idx3-ubyte'),
18-
join('data', 'MNIST', 'raw', 'train-images-idx3-ubyte.gz'),
19-
join('data', 'MNIST', 'raw', 'train-labels-idx1-ubyte'),
20-
join('data', 'MNIST', 'raw', 'train-labels-idx1-ubyte.gz'),
21-
join('logs', 'acc.tsv'),
22-
join('logs', 'loss.tsv'),
23-
'model.pt'
24-
]
12+
const paths = new Set([
13+
makeAbsPath('data', 'MNIST', 'raw', 't10k-images-idx3-ubyte'),
14+
makeAbsPath('data', 'MNIST', 'raw', 't10k-images-idx3-ubyte.gz'),
15+
makeAbsPath('data', 'MNIST', 'raw', 't10k-labels-idx1-ubyte'),
16+
makeAbsPath('data', 'MNIST', 'raw', 't10k-labels-idx1-ubyte.gz'),
17+
makeAbsPath('data', 'MNIST', 'raw', 'train-images-idx3-ubyte'),
18+
makeAbsPath('data', 'MNIST', 'raw', 'train-images-idx3-ubyte.gz'),
19+
makeAbsPath('data', 'MNIST', 'raw', 'train-labels-idx1-ubyte'),
20+
makeAbsPath('data', 'MNIST', 'raw', 'train-labels-idx1-ubyte.gz'),
21+
makeAbsPath('logs', 'acc.tsv'),
22+
makeAbsPath('logs', 'loss.tsv'),
23+
makeAbsPath('model.pt')
24+
])
2525

26-
const treeData = collectTree(dvcDemoPath, paths)
26+
const treeData = collectTree(
27+
dvcDemoPath,
28+
paths,
29+
new Set([join('data', 'MNIST', 'raw')])
30+
)
2731

2832
expect(treeData).toStrictEqual(
2933
new Map([
@@ -39,7 +43,7 @@ describe('collectTree', () => {
3943
{
4044
dvcRoot: dvcDemoPath,
4145
isDirectory: true,
42-
isTracked: true,
46+
isTracked: false,
4347
resourceUri: makeUri('logs')
4448
},
4549
{
Lines changed: 68 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,57 @@
1-
import { dirname, join, resolve } from 'path'
1+
import { dirname, join, relative, resolve } from 'path'
22
import { Uri } from 'vscode'
33
import { Resource } from '../commands'
44
import { addToMapSet } from '../../util/map'
5-
import { PathOutput } from '../../cli/reader'
6-
import { isSameOrChild } from '../../fileSystem'
5+
import { ExperimentsOutput, PathOutput } from '../../cli/reader'
6+
import { isSameOrChild, relativeWithUri } from '../../fileSystem'
77
import { getDirectChild, getPath, getPathArray } from '../../fileSystem/util'
88

99
export type PathItem = Resource & {
1010
isDirectory: boolean
1111
isTracked: boolean
1212
}
1313

14-
const transform = (
14+
const transformToAbsTree = (
1515
dvcRoot: string,
1616
acc: Map<string, Set<string>>,
17-
isTracked: Set<string>
17+
trackedRelPaths: Set<string>
1818
): Map<string, PathItem[]> => {
19-
const treeMap = new Map<string, PathItem[]>()
19+
const absTree = new Map<string, PathItem[]>()
2020

21-
for (const [path, paths] of acc.entries()) {
22-
const items = [...paths].map(path => ({
21+
for (const [path, childPaths] of acc.entries()) {
22+
const items = [...childPaths].map(childPath => ({
2323
dvcRoot,
24-
isDirectory: !!acc.get(path),
25-
isTracked: isTracked.has(path),
26-
resourceUri: Uri.file(join(dvcRoot, path))
24+
isDirectory: !!acc.get(childPath),
25+
isTracked: trackedRelPaths.has(childPath),
26+
resourceUri: Uri.file(join(dvcRoot, childPath))
2727
}))
2828
const absPath = Uri.file(join(dvcRoot, path)).fsPath
29-
treeMap.set(absPath, items)
29+
absTree.set(absPath, items)
3030
}
3131

32-
return treeMap
32+
return absTree
3333
}
3434

3535
export const collectTree = (
3636
dvcRoot: string,
37-
paths: string[]
37+
absLeafs: Set<string>,
38+
trackedRelPaths = new Set<string>()
3839
): Map<string, PathItem[]> => {
39-
const acc = new Map<string, Set<string>>()
40-
const isTracked = new Set<string>()
40+
const relTree = new Map<string, Set<string>>()
4141

42-
for (const path of paths) {
43-
const pathArray = getPathArray(path)
42+
for (const absLeaf of absLeafs) {
43+
const relPath = relative(dvcRoot, absLeaf)
44+
const relPathArray = getPathArray(relPath)
4445

45-
isTracked.add(path)
46-
const dir = dirname(path)
47-
if (dir !== '.') {
48-
isTracked.add(dir)
49-
}
46+
trackedRelPaths.add(relPath)
5047

51-
for (let idx = 0; idx < pathArray.length; idx++) {
52-
const path = getPath(pathArray, idx)
53-
addToMapSet(acc, path, getDirectChild(pathArray, idx))
48+
for (let idx = 0; idx < relPathArray.length; idx++) {
49+
const path = getPath(relPathArray, idx)
50+
addToMapSet(relTree, path, getDirectChild(relPathArray, idx))
5451
}
5552
}
5653

57-
return transform(dvcRoot, acc, isTracked)
54+
return transformToAbsTree(dvcRoot, relTree, trackedRelPaths)
5855
}
5956

6057
const collectMissingParents = (acc: string[], absPath: string) => {
@@ -94,28 +91,62 @@ export const collectModifiedAgainstHead = (
9491
return acc
9592
}
9693

97-
const collectPath = (acc: Set<string>, dvcRoot: string, path: string) => {
98-
const pathArray = getPathArray(path)
94+
const collectAbsPath = (
95+
acc: Set<string>,
96+
absLeafs: Set<string>,
97+
dvcRoot: string,
98+
absPath: string
99+
) => {
100+
const relPathArray = getPathArray(relative(dvcRoot, absPath))
99101

100-
for (let reverseIdx = pathArray.length; reverseIdx > 0; reverseIdx--) {
101-
const path = join(dvcRoot, getPath(pathArray, reverseIdx))
102-
if (acc.has(path)) {
102+
for (let reverseIdx = relPathArray.length; reverseIdx > 0; reverseIdx--) {
103+
const absPath = join(dvcRoot, getPath(relPathArray, reverseIdx))
104+
if (acc.has(absPath) || absLeafs.has(absPath)) {
103105
continue
104106
}
105107

106-
acc.add(path)
108+
acc.add(absPath)
107109
}
108110
}
109111

110-
export const collectTracked = (
112+
export const collectTrackedNonLeafs = (
111113
dvcRoot: string,
112-
paths: string[] = []
114+
absLeafs = new Set<string>()
113115
): Set<string> => {
114116
const acc = new Set<string>()
115117

116-
for (const path of paths) {
117-
collectPath(acc, dvcRoot, path)
118+
for (const absPath of absLeafs) {
119+
collectAbsPath(acc, absLeafs, dvcRoot, absPath)
120+
}
121+
122+
return acc
123+
}
124+
125+
export const collectTrackedOuts = (data: ExperimentsOutput): Set<string> => {
126+
const acc = new Set<string>()
127+
for (const [relPath, { use_cache }] of Object.entries(
128+
data.workspace.baseline.data?.outs || {}
129+
)) {
130+
if (use_cache) {
131+
acc.add(relPath)
132+
}
118133
}
134+
return acc
135+
}
119136

137+
export const collectTrackedPaths = async (
138+
{ dvcRoot, resourceUri, isTracked }: PathItem,
139+
getChildren: (path: string) => Promise<PathItem[]>
140+
): Promise<string[]> => {
141+
const acc = []
142+
143+
if (isTracked) {
144+
acc.push(relativeWithUri(dvcRoot, resourceUri))
145+
return acc
146+
}
147+
const children = await getChildren(resourceUri.fsPath)
148+
for (const child of children) {
149+
acc.push(...(await collectTrackedPaths(child, getChildren)))
150+
}
120151
return acc
121152
}

0 commit comments

Comments
 (0)