Spaces:
Runtime error
Runtime error
removing more trailing s from units
Browse files
app.py
CHANGED
|
@@ -333,9 +333,9 @@ def filter_by_extension_month(_df, _extension):
|
|
| 333 |
|
| 334 |
# Update layout
|
| 335 |
fig.update_layout(
|
| 336 |
-
title="Monthly Additions of LFS Files by Extension (in
|
| 337 |
xaxis_title="Date",
|
| 338 |
-
yaxis_title="Size (
|
| 339 |
legend_title="Type",
|
| 340 |
yaxis=dict(tickformat=".2f"), # Format y-axis labels to 2 decimal places
|
| 341 |
)
|
|
@@ -431,7 +431,7 @@ with gr.Blocks(theme="citrus") as demo:
|
|
| 431 |
with gr.Column(scale=2):
|
| 432 |
gr.Markdown("### Current Storage Usage")
|
| 433 |
gr.Markdown(
|
| 434 |
-
"As of September 20, 2024, total files stored in Git LFS summed to almost 29 PB. To put this into perspective, the last [Common Crawl](https://commoncrawl.org/) download was [451
|
| 435 |
)
|
| 436 |
with gr.Column(scale=3):
|
| 437 |
# Convert the total size to petabytes and format to two decimal places
|
|
@@ -457,14 +457,14 @@ with gr.Blocks(theme="citrus") as demo:
|
|
| 457 |
# drop the unnamed: 0 column
|
| 458 |
by_extension_size = by_extension_size.drop(columns=["Unnamed: 0"])
|
| 459 |
# average size
|
| 460 |
-
by_extension_size["Average File Size (
|
| 461 |
by_extension_size["size"].astype(float) / by_extension_size["count"]
|
| 462 |
)
|
| 463 |
-
by_extension_size["Average File Size (
|
| 464 |
-
by_extension_size["Average File Size (
|
| 465 |
)
|
| 466 |
-
by_extension_size["Average File Size (
|
| 467 |
-
"Average File Size (
|
| 468 |
].map("{:.2f}".format)
|
| 469 |
# format the size column
|
| 470 |
by_extension_size = format_dataframe_size_column(by_extension_size, ["size"])
|
|
@@ -487,7 +487,7 @@ with gr.Blocks(theme="citrus") as demo:
|
|
| 487 |
"File Extension",
|
| 488 |
"Total Size (PB)",
|
| 489 |
"Number of Files",
|
| 490 |
-
"Average File Size (
|
| 491 |
]
|
| 492 |
]
|
| 493 |
)
|
|
@@ -501,7 +501,7 @@ with gr.Blocks(theme="citrus") as demo:
|
|
| 501 |
|
| 502 |
gr.HTML(div_px(5))
|
| 503 |
gr.Markdown(
|
| 504 |
-
"To dig deeper, use the dropdown to filter by file extension and see the bytes added (in
|
| 505 |
)
|
| 506 |
|
| 507 |
# get the unique values in the extension column and remove any empty strings
|
|
|
|
| 333 |
|
| 334 |
# Update layout
|
| 335 |
fig.update_layout(
|
| 336 |
+
title="Monthly Additions of LFS Files by Extension (in TB)",
|
| 337 |
xaxis_title="Date",
|
| 338 |
+
yaxis_title="Size (TB)",
|
| 339 |
legend_title="Type",
|
| 340 |
yaxis=dict(tickformat=".2f"), # Format y-axis labels to 2 decimal places
|
| 341 |
)
|
|
|
|
| 431 |
with gr.Column(scale=2):
|
| 432 |
gr.Markdown("### Current Storage Usage")
|
| 433 |
gr.Markdown(
|
| 434 |
+
"As of September 20, 2024, total files stored in Git LFS summed to almost 29 PB. To put this into perspective, the last [Common Crawl](https://commoncrawl.org/) download was [451 TB](https://github.com/commoncrawl/cc-crawl-statistics/blob/master/stats/crawler/CC-MAIN-2024-38.json#L31) - the Hub stores the equivalent of more than **64 Common Crawls** 🤯."
|
| 435 |
)
|
| 436 |
with gr.Column(scale=3):
|
| 437 |
# Convert the total size to petabytes and format to two decimal places
|
|
|
|
| 457 |
# drop the unnamed: 0 column
|
| 458 |
by_extension_size = by_extension_size.drop(columns=["Unnamed: 0"])
|
| 459 |
# average size
|
| 460 |
+
by_extension_size["Average File Size (MB)"] = (
|
| 461 |
by_extension_size["size"].astype(float) / by_extension_size["count"]
|
| 462 |
)
|
| 463 |
+
by_extension_size["Average File Size (MB)"] = (
|
| 464 |
+
by_extension_size["Average File Size (MB)"] / 1e6
|
| 465 |
)
|
| 466 |
+
by_extension_size["Average File Size (MB)"] = by_extension_size[
|
| 467 |
+
"Average File Size (MB)"
|
| 468 |
].map("{:.2f}".format)
|
| 469 |
# format the size column
|
| 470 |
by_extension_size = format_dataframe_size_column(by_extension_size, ["size"])
|
|
|
|
| 487 |
"File Extension",
|
| 488 |
"Total Size (PB)",
|
| 489 |
"Number of Files",
|
| 490 |
+
"Average File Size (MB)",
|
| 491 |
]
|
| 492 |
]
|
| 493 |
)
|
|
|
|
| 501 |
|
| 502 |
gr.HTML(div_px(5))
|
| 503 |
gr.Markdown(
|
| 504 |
+
"To dig deeper, use the dropdown to filter by file extension and see the bytes added (in TB) each month for specific file types."
|
| 505 |
)
|
| 506 |
|
| 507 |
# get the unique values in the extension column and remove any empty strings
|