NERDDISCO
feat: record (#14)
b7ce6b9 unverified
import * as hub from "@huggingface/hub";
import { S3Client, HeadBucketCommand } from "@aws-sdk/client-s3";
import { Upload } from "@aws-sdk/lib-storage";
type FileArray = Array<{ path: string; content: Blob | Uint8Array }>;
/**
* Uploads a leRobot dataset to Hugging Face
*
* @param files Array of files to upload
* @param accessToken Hugging Face access token
* @param repoName Repository name (will be created if it doesn't exist)
* @param privateRepo Whether the repo should be private (default: false)
* @returns EventTarget that emits 'repoCreated', 'progress', 'finished', and 'error' events
*/
export async function uploadToHuggingFace(
files: FileArray,
accessToken: string,
repoName: string,
privateRepo: boolean = false
): Promise<EventTarget> {
const eventTarget = new EventTarget();
// Run upload asynchronously so UI can subscribe to events immediately
(async () => {
try {
// Get username from token
const { name: username } = await hub.whoAmI({ accessToken });
const repoDesignation = {
name: `${username}/${repoName}`,
type: "dataset" as const,
};
// Try to create repo; if it already exists (409), continue and upload
try {
await hub.createRepo({
repo: repoDesignation,
accessToken,
license: "mit",
private: privateRepo,
});
eventTarget.dispatchEvent(
new CustomEvent("repoCreated", { detail: repoDesignation })
);
} catch (error: any) {
const message = (error && (error.message || `${error}`)) as string;
const isConflict =
message?.includes("409") ||
message?.toLowerCase()?.includes("already created") ||
message?.toLowerCase()?.includes("already exists");
if (!isConflict) {
eventTarget.dispatchEvent(new CustomEvent("error", { detail: error }));
throw error;
}
// Repo exists: proceed as created
eventTarget.dispatchEvent(
new CustomEvent("repoCreated", { detail: repoDesignation })
);
}
// Upload files to v2.1 branch, fallback to main if branch doesn't exist
let uploadedBranch = "v2.1";
try {
await uploadFilesWithProgress(
files,
accessToken,
repoDesignation,
uploadedBranch,
eventTarget
);
} catch (error: any) {
const message = (error && (error.message || `${error}`)) as string;
const invalidRev = message?.toLowerCase()?.includes("invalid rev id");
if (invalidRev) {
console.warn(
"v2.1 branch not available. Falling back to main branch."
);
uploadedBranch = "main";
await uploadFilesWithProgress(
files,
accessToken,
repoDesignation,
uploadedBranch,
eventTarget
);
} else {
throw error;
}
}
console.log(
`Successfully uploaded dataset to ${username}/${repoName} (${uploadedBranch})`
);
eventTarget.dispatchEvent(
new CustomEvent("finished", { detail: { branch: uploadedBranch } })
);
} catch (error) {
console.error("Error uploading to Hugging Face:", error);
eventTarget.dispatchEvent(new CustomEvent("error", { detail: error }));
}
})();
return eventTarget;
}
/**
* Uploads a leRobot dataset to Amazon S3
*
* @param files Array of files to upload
* @param bucketName S3 bucket name
* @param accessKeyId AWS access key ID
* @param secretAccessKey AWS secret access key
* @param region AWS region (default: us-east-1)
* @param prefix Optional prefix/folder for uploaded files
* @returns EventTarget that emits 'bucketVerified', 'progress', 'finished', and 'error' events
*/
export async function uploadToS3(
files: FileArray,
bucketName: string,
accessKeyId: string,
secretAccessKey: string,
region: string = "us-east-1",
prefix: string = ""
): Promise<EventTarget> {
const eventTarget = new EventTarget();
// Run upload asynchronously
(async () => {
try {
const s3Client = new S3Client({
region,
credentials: {
accessKeyId,
secretAccessKey,
},
});
// Verify bucket exists
try {
await s3Client.send(
new HeadBucketCommand({ Bucket: bucketName })
);
eventTarget.dispatchEvent(
new CustomEvent("bucketVerified", {
detail: { bucketName, region },
})
);
} catch (error: any) {
const message = error?.message || `${error}`;
if (message.includes("404") || message.includes("NotFound")) {
throw new Error(
`S3 bucket "${bucketName}" not found in region "${region}"`
);
}
throw error;
}
// Upload files
for (const file of files) {
const key = prefix ? `${prefix}/${file.path}` : file.path;
const upload = new Upload({
client: s3Client,
params: {
Bucket: bucketName,
Key: key,
Body:
file.content instanceof Blob
? Buffer.from(await file.content.arrayBuffer())
: Buffer.from(file.content),
},
});
upload.on("httpUploadProgress", (progress) => {
eventTarget.dispatchEvent(
new CustomEvent("progress", { detail: { file: file.path, progress } })
);
});
await upload.done();
console.log(`Uploaded ${key}`);
}
console.log(
`Successfully uploaded dataset to S3 bucket: ${bucketName}${prefix ? `/${prefix}` : ""}`
);
eventTarget.dispatchEvent(
new CustomEvent("finished", {
detail: { bucketName, prefix, filesCount: files.length },
})
);
} catch (error) {
console.error("Error uploading to S3:", error);
eventTarget.dispatchEvent(new CustomEvent("error", { detail: error }));
}
})();
return eventTarget;
}
/**
* Helper function to upload files to Hugging Face with progress tracking
*/
async function uploadFilesWithProgress(
files: FileArray,
accessToken: string,
repoDesignation: { name: string; type: "dataset" },
branch: string,
eventTarget: EventTarget
): Promise<void> {
const referenceId = `lerobot-upload-${Date.now()}`;
// Upload each file
for (const file of files) {
let blob: Blob;
if (file.content instanceof Blob) {
blob = file.content;
} else {
blob = new Blob([file.content]);
}
await hub.uploadFile({
repo: repoDesignation,
credentials: { accessToken },
file: {
content: blob,
path: file.path,
},
revision: branch,
});
eventTarget.dispatchEvent(
new CustomEvent("progress", {
detail: { file: file.path, referenceId },
})
);
console.log(`Uploaded ${file.path}`);
}
}