File size: 1,868 Bytes
9717c3d 126d3a2 9717c3d 126d3a2 9717c3d 126d3a2 9717c3d 126d3a2 9717c3d 126d3a2 9717c3d 126d3a2 9717c3d 126d3a2 9717c3d 126d3a2 9717c3d 126d3a2 9717c3d 126d3a2 9717c3d 126d3a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import path from "node:path"
import { python } from "pythonia"
const torch = await python('torch')
const sys = await python('sys')
const { AutoTokenizer, AutoModelForCausalLM } = await python ('transformers')
let device = await torch.cuda.is_available() ? 'cuda' : 'cpu'
try {
if (await torch.backends.mps.is_available()) {
device = 'mps'
}
} catch (_err) {
}
console.log('device: ' + device)
const baseModel = "WizardLM/WizardCoder-15B-V1.0"
const load8bit = false
let model = null
console.log("loading tokenizer..")
const tokenizer = AutoTokenizer.from_pretrained(baseModel)
if (device == "cuda") {
model = await AutoModelForCausalLM.from_pretrained$(
baseModel, {
load_in_8bit: load8bit,
torch_dtype: torch.float16,
device_map: "auto",
})
} else if (device == "mps") {
model = await AutoModelForCausalLM.from_pretrained(
baseModel, {
device_map: {"": device},
torch_dtype: torch.float16,
})
}
console.log("loaded tokenizer")
model.config.pad_token_id = tokenizer.pad_token_id
if (!load8bit) {
await model.half()
}
console.log("calling model.eval()")
await model.eval()
if (torch.__version__ >= "2" && sys.platform != "win32") {
console.log("calling torch.compile(model)")
model = await torch.compile(model)
}
console.log("calling evaluate..")
const prompt = `Below is an instruction that describes a task.
Write a response that appropriately completes the request.
### Instruction:
Write a short summary about AI.
### Response:`
const inputs = await tokenizer(
prompt, {
return_tensors: "pt",
truncation: true,
padding: true
})
const input_ids = await inputs["input_ids"].to(device)
await torch.no_grad()
const generated = await model.generate(input_ids)
const s = generated
const output = await tokenizer.decode$(generated[0], { skip_special_tokens: true })
console.log('output: ', output) |