rabiyulfahim commited on
Commit
6412a86
·
verified ·
1 Parent(s): ef9e4f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -7,25 +7,26 @@ from fastapi.staticfiles import StaticFiles
7
  import os
8
  import torch
9
 
10
- # ✅ Hugging Face cache directory
11
- os.environ["HF_HOME"] = "/tmp"
12
- os.environ["TRANSFORMERS_CACHE"] = "/tmp"
 
 
13
 
14
  # -----------------------
15
  # Model Setup
16
  # -----------------------
17
  model_id = "LLM360/K2-Think"
18
 
19
- # Load tokenizer and model
20
  print("Loading tokenizer and model...")
21
  tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir="/tmp")
22
  model = AutoModelForCausalLM.from_pretrained(
23
  model_id,
24
- cache_dir="/tmp",
25
- device_map="auto", # Automatically select GPU/CPU
26
- torch_dtype=torch.float16
27
  )
28
- print("Model loaded successfully!")
29
 
30
  # -----------------------
31
  # FastAPI Setup
@@ -40,7 +41,6 @@ app.add_middleware(
40
  allow_headers=["*"],
41
  )
42
 
43
- # Mount static folder
44
  app.mount("/static", StaticFiles(directory="static"), name="static")
45
 
46
  # -----------------------
 
7
  import os
8
  import torch
9
 
10
+ # -----------------------
11
+ # Hugging Face cache
12
+ # -----------------------
13
+ os.environ["HF_HOME"] = "/tmp" # writable cache
14
+ os.environ["TRANSFORMERS_CACHE"] = "/tmp" # optional
15
 
16
  # -----------------------
17
  # Model Setup
18
  # -----------------------
19
  model_id = "LLM360/K2-Think"
20
 
 
21
  print("Loading tokenizer and model...")
22
  tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir="/tmp")
23
  model = AutoModelForCausalLM.from_pretrained(
24
  model_id,
25
+ device_map="auto", # auto assign to GPU/CPU
26
+ load_in_8bit=True, # 8-bit quantization for low memory
27
+ cache_dir="/tmp"
28
  )
29
+ print("Model loaded!")
30
 
31
  # -----------------------
32
  # FastAPI Setup
 
41
  allow_headers=["*"],
42
  )
43
 
 
44
  app.mount("/static", StaticFiles(directory="static"), name="static")
45
 
46
  # -----------------------