Spaces:
Runtime error
Runtime error
update llama model
Browse files- app.py +2 -2
- llama2_response_mail_generator.py +10 -0
app.py
CHANGED
|
@@ -72,14 +72,14 @@ model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
|
|
| 72 |
model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin" # The model is in bin format
|
| 73 |
|
| 74 |
# Download the model file
|
|
|
|
| 75 |
model_path_llama = hf_hub_download(repo_id=model_name_or_path, filename=model_basename, force_download=True)
|
| 76 |
-
|
| 77 |
# Initialize the Llama model with appropriate settings for GPU
|
| 78 |
lcpp_llm = Llama(
|
| 79 |
model_path=model_path_llama,
|
| 80 |
n_threads=2, # CPU cores to use
|
| 81 |
n_batch=512, # Batch size for processing; adjust as per your VRAM capacity
|
| 82 |
-
n_gpu_layers=32 # Number of layers to run on GPU, dependent on your GPU's VRAM
|
| 83 |
)
|
| 84 |
|
| 85 |
def generate_email_response(email_prompt):
|
|
|
|
| 72 |
model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin" # The model is in bin format
|
| 73 |
|
| 74 |
# Download the model file
|
| 75 |
+
print('downloading llama model...')
|
| 76 |
model_path_llama = hf_hub_download(repo_id=model_name_or_path, filename=model_basename, force_download=True)
|
| 77 |
+
print('finished download...')
|
| 78 |
# Initialize the Llama model with appropriate settings for GPU
|
| 79 |
lcpp_llm = Llama(
|
| 80 |
model_path=model_path_llama,
|
| 81 |
n_threads=2, # CPU cores to use
|
| 82 |
n_batch=512, # Batch size for processing; adjust as per your VRAM capacity
|
|
|
|
| 83 |
)
|
| 84 |
|
| 85 |
def generate_email_response(email_prompt):
|
llama2_response_mail_generator.py
CHANGED
|
@@ -2,6 +2,16 @@ from huggingface_hub import hf_hub_download
|
|
| 2 |
|
| 3 |
from llama_cpp import Llama
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
def generate_email_response(email_prompt):
|
| 6 |
# Check input received by the function
|
| 7 |
print("Received prompt:", email_prompt)
|
|
|
|
| 2 |
|
| 3 |
from llama_cpp import Llama
|
| 4 |
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
# Initialize the Llama model with appropriate settings for GPU
|
| 8 |
+
lcpp_llm = Llama(
|
| 9 |
+
model_path=model_path,
|
| 10 |
+
n_threads=2, # CPU cores to use
|
| 11 |
+
n_batch=512, # Batch size for processing; adjust as per your VRAM capacity
|
| 12 |
+
n_gpu_layers=32 # Number of layers to run on GPU, dependent on your GPU's VRAM
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
def generate_email_response(email_prompt):
|
| 16 |
# Check input received by the function
|
| 17 |
print("Received prompt:", email_prompt)
|