Spaces:

jbilcke-hf
/

webapp-factory-wizardcoder-transformers

Paused

Julian Bilcke commited on Jun 27, 2023

Commit

e0464d7

1 Parent(s): 16386fc

fix for test.py

Files changed (4) hide show

Dockerfile CHANGED Viewed

@@ -59,7 +59,7 @@ RUN pip install -r requirements.txt
 COPY --chown=user . .
 # temporary skip model download, to make things faster
-# RUN git clone https://huggingface.co/WizardLM/WizardCoder-15B-V1.0
 # help Pythonia by giving it the path to Python
 ENV PYTHON_BIN /usr/bin/python3

 COPY --chown=user . .
 # temporary skip model download, to make things faster
+RUN git clone https://huggingface.co/WizardLM/WizardCoder-15B-V1.0
 # help Pythonia by giving it the path to Python
 ENV PYTHON_BIN /usr/bin/python3

README.md CHANGED Viewed

@@ -46,8 +46,11 @@ To install those dependencies, first you should create and activate a new virtua
 python -m venv .venv
 source .venv/bin/activate
 pip install --upgrade pip
 ```
 Then install the dependencies in it:
 ```bash
 pip install -r requirements.txt

 python -m venv .venv
 source .venv/bin/activate
 pip install --upgrade pip
+pip install torch
 ```
+Note: the Dockerfile will install pytorch itself
 Then install the dependencies in it:
 ```bash
 pip install -r requirements.txt

requirements.txt CHANGED Viewed

test.py CHANGED Viewed

@@ -1,12 +1,5 @@
-import sys
-import os
-import fire
 import torch
-import transformers
-import json
-import jsonlines
-from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
 if torch.cuda.is_available():
     device = "cuda"
@@ -21,9 +14,27 @@ except:
 print("device: " + device)
 base_model = "./models/WizardCoder-15B-V1.0"
 load_8bit = False
 tokenizer = AutoTokenizer.from_pretrained(base_model)
 if device == "cuda":
     model = AutoModelForCausalLM.from_pretrained(
@@ -38,10 +49,21 @@ elif device == "mps":
         device_map={"": device},
         torch_dtype=torch.float16,
     )
 model.config.pad_token_id = tokenizer.pad_token_id
 if not load_8bit:
     model.half()
 model.eval()
 if torch.__version__ >= "2" and sys.platform != "win32":
-  model = torch.compile(model)

 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
 if torch.cuda.is_available():
     device = "cuda"
 print("device: " + device)
+def evaluate(instruction, tokenizer, model):
+    prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
+### Instruction:
+{instruction}
+### Response:"""
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
+    input_ids = inputs["input_ids"].to(device)
+    with torch.no_grad():
+        generation_output = model.generate(input_ids)
+    s = generation_output
+    output = tokenizer.decode(s[0], skip_special_tokens=True)
+    return output.split("### Response:")[1].strip()
 base_model = "./models/WizardCoder-15B-V1.0"
 load_8bit = False
+print("loading tokenizer..")
 tokenizer = AutoTokenizer.from_pretrained(base_model)
 if device == "cuda":
     model = AutoModelForCausalLM.from_pretrained(
         device_map={"": device},
         torch_dtype=torch.float16,
     )
+print("loaded tokenizer")
 model.config.pad_token_id = tokenizer.pad_token_id
 if not load_8bit:
     model.half()
+print("calling model.eval()")
 model.eval()
 if torch.__version__ >= "2" and sys.platform != "win32":
+    print("calling torch.compile(model)")
+    model = torch.compile(model)
+instruction = "Write a short summary about AI."
+print("calling evaluate..")
+result = evaluate(instruction, tokenizer, model)
+print("result: ")
+print(result)