Spaces:
Running
on
A10G
Running
on
A10G
MekkCyber
commited on
Commit
Β·
0d12afd
1
Parent(s):
886605d
add size
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
|
| 8 |
from bitsandbytes.nn import Linear4bit
|
| 9 |
from packaging import version
|
| 10 |
import os
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
def hello(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) -> str:
|
|
@@ -108,6 +109,9 @@ def quantize_model(
|
|
| 108 |
progress(0.33, desc="Quantizing")
|
| 109 |
|
| 110 |
# Quantize model
|
|
|
|
|
|
|
|
|
|
| 111 |
modules = list(model.named_modules())
|
| 112 |
for idx, (_, module) in enumerate(modules):
|
| 113 |
if isinstance(module, Linear4bit):
|
|
@@ -116,12 +120,13 @@ def quantize_model(
|
|
| 116 |
progress(0.33 + (0.33 * idx / len(modules)), desc="Quantizing")
|
| 117 |
|
| 118 |
progress(0.66, desc="Quantized successfully")
|
| 119 |
-
return model
|
| 120 |
|
| 121 |
|
| 122 |
def save_model(
|
| 123 |
model,
|
| 124 |
model_name,
|
|
|
|
| 125 |
quant_type_4,
|
| 126 |
double_quant_4,
|
| 127 |
compute_type_4,
|
|
@@ -189,7 +194,7 @@ def save_model(
|
|
| 189 |
<div style="line-height: 1.2; font-size: 0.75em;">{model_architecture_str_html}</div>
|
| 190 |
</div>
|
| 191 |
"""
|
| 192 |
-
return f'
|
| 193 |
|
| 194 |
|
| 195 |
def quantize_and_save(
|
|
@@ -231,7 +236,7 @@ def quantize_and_save(
|
|
| 231 |
try:
|
| 232 |
# Download phase
|
| 233 |
progress(0, desc="Starting quantization process")
|
| 234 |
-
quantized_model = quantize_model(
|
| 235 |
model_name,
|
| 236 |
quant_type_4,
|
| 237 |
double_quant_4,
|
|
@@ -243,6 +248,7 @@ def quantize_and_save(
|
|
| 243 |
final_message = save_model(
|
| 244 |
quantized_model,
|
| 245 |
model_name,
|
|
|
|
| 246 |
quant_type_4,
|
| 247 |
double_quant_4,
|
| 248 |
compute_type_4,
|
|
@@ -264,7 +270,30 @@ def quantize_and_save(
|
|
| 264 |
<p>{error_message}</p>
|
| 265 |
</div>
|
| 266 |
"""
|
| 267 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
|
| 269 |
css = """/* Custom CSS to allow scrolling */
|
| 270 |
.gradio-container {overflow-y: auto;}
|
|
@@ -477,7 +506,8 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
|
|
| 477 |
gr.Markdown(
|
| 478 |
"""
|
| 479 |
# π€ BitsAndBytes Quantizer : Create your own BNB Quants ! β¨
|
| 480 |
-
|
|
|
|
| 481 |
"""
|
| 482 |
)
|
| 483 |
|
|
|
|
| 8 |
from bitsandbytes.nn import Linear4bit
|
| 9 |
from packaging import version
|
| 10 |
import os
|
| 11 |
+
from tqdm import tqdm
|
| 12 |
|
| 13 |
|
| 14 |
def hello(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) -> str:
|
|
|
|
| 109 |
progress(0.33, desc="Quantizing")
|
| 110 |
|
| 111 |
# Quantize model
|
| 112 |
+
# Calculate original model sizeo
|
| 113 |
+
original_size_gb = get_model_size(model)
|
| 114 |
+
|
| 115 |
modules = list(model.named_modules())
|
| 116 |
for idx, (_, module) in enumerate(modules):
|
| 117 |
if isinstance(module, Linear4bit):
|
|
|
|
| 120 |
progress(0.33 + (0.33 * idx / len(modules)), desc="Quantizing")
|
| 121 |
|
| 122 |
progress(0.66, desc="Quantized successfully")
|
| 123 |
+
return model, original_size_gb
|
| 124 |
|
| 125 |
|
| 126 |
def save_model(
|
| 127 |
model,
|
| 128 |
model_name,
|
| 129 |
+
original_size_gb,
|
| 130 |
quant_type_4,
|
| 131 |
double_quant_4,
|
| 132 |
compute_type_4,
|
|
|
|
| 194 |
<div style="line-height: 1.2; font-size: 0.75em;">{model_architecture_str_html}</div>
|
| 195 |
</div>
|
| 196 |
"""
|
| 197 |
+
return f'π Quantized Model <br/><h1> π DONE</h1><br/>Find your repo here: <a href="https://huggingface.co/{repo_name}" target="_blank" style="text-decoration:underline">{repo_name}</a><br/><br/>π Model Architecture<br/>{model_architecture_info}<br/><br/>π¦ Model Size <br/><br/> Original (bf16)β {original_size_gb} GB β Quantized β {get_model_size(model)} GB'
|
| 198 |
|
| 199 |
|
| 200 |
def quantize_and_save(
|
|
|
|
| 236 |
try:
|
| 237 |
# Download phase
|
| 238 |
progress(0, desc="Starting quantization process")
|
| 239 |
+
quantized_model, original_size_gb = quantize_model(
|
| 240 |
model_name,
|
| 241 |
quant_type_4,
|
| 242 |
double_quant_4,
|
|
|
|
| 248 |
final_message = save_model(
|
| 249 |
quantized_model,
|
| 250 |
model_name,
|
| 251 |
+
original_size_gb,
|
| 252 |
quant_type_4,
|
| 253 |
double_quant_4,
|
| 254 |
compute_type_4,
|
|
|
|
| 270 |
<p>{error_message}</p>
|
| 271 |
</div>
|
| 272 |
"""
|
| 273 |
+
def get_model_size(model):
|
| 274 |
+
"""
|
| 275 |
+
Calculate the size of a PyTorch model in gigabytes.
|
| 276 |
+
|
| 277 |
+
Args:
|
| 278 |
+
model: PyTorch model
|
| 279 |
+
|
| 280 |
+
Returns:
|
| 281 |
+
float: Size of the model in GB
|
| 282 |
+
"""
|
| 283 |
+
# Get model state dict
|
| 284 |
+
state_dict = model.state_dict()
|
| 285 |
+
|
| 286 |
+
# Calculate total size in bytes
|
| 287 |
+
total_size = 0
|
| 288 |
+
for param in state_dict.values():
|
| 289 |
+
# Calculate bytes for each parameter
|
| 290 |
+
total_size += param.nelement() * param.element_size()
|
| 291 |
+
|
| 292 |
+
# Convert bytes to gigabytes (1 GB = 1,073,741,824 bytes)
|
| 293 |
+
size_gb = total_size / (1024 ** 3)
|
| 294 |
+
size_gb = round(size_gb, 2)
|
| 295 |
+
|
| 296 |
+
return size_gb
|
| 297 |
|
| 298 |
css = """/* Custom CSS to allow scrolling */
|
| 299 |
.gradio-container {overflow-y: auto;}
|
|
|
|
| 506 |
gr.Markdown(
|
| 507 |
"""
|
| 508 |
# π€ BitsAndBytes Quantizer : Create your own BNB Quants ! β¨
|
| 509 |
+
<br/>
|
| 510 |
+
<br/>
|
| 511 |
"""
|
| 512 |
)
|
| 513 |
|