Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -34,29 +34,29 @@ with open('config.json', 'r') as config_file:
|
|
| 34 |
num_args = len(config)
|
| 35 |
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
if num_args ==
|
| 45 |
# cmd args
|
| 46 |
# sys.argv[0] is the script name, sys.argv[1] is the first argument, etc.
|
| 47 |
-
should_train_model =
|
| 48 |
-
train_file =
|
| 49 |
-
test_file =
|
| 50 |
-
batch_size_for_trainer = int(
|
| 51 |
-
should_produce_eval_matrix = int(
|
| 52 |
-
path_to_save_trained_model_to =
|
| 53 |
-
|
| 54 |
-
print(f"should train model? : {
|
| 55 |
-
print (f"file to train on : {
|
| 56 |
-
print (f"file to evaluate on : {
|
| 57 |
-
print (f"batch size : {
|
| 58 |
-
print (f"should produce eval matrix : {
|
| 59 |
-
print (f"path to save trained model : {
|
| 60 |
|
| 61 |
print(f"should train model? : {should_train_model}")
|
| 62 |
print (f"file to train on : {train_file}")
|
|
@@ -331,27 +331,27 @@ if (should_train_model=='1'): #train model
|
|
| 331 |
# Upload the model and tokenizer to the Hugging Face repository
|
| 332 |
|
| 333 |
upload_folder(
|
| 334 |
-
folder_path=f"{model_save_path}",
|
| 335 |
-
path_in_repo=f"{model_save_path}",
|
| 336 |
repo_id=repo_name,
|
| 337 |
token=api_token,
|
| 338 |
-
commit_message="Push
|
| 339 |
#overwrite=True # Force overwrite existing files
|
| 340 |
)
|
| 341 |
|
| 342 |
upload_folder(
|
| 343 |
-
folder_path="
|
| 344 |
-
path_in_repo="
|
| 345 |
repo_id=repo_name,
|
| 346 |
token=api_token,
|
| 347 |
-
commit_message="Push
|
| 348 |
#overwrite=True # Force overwrite existing files
|
| 349 |
)
|
| 350 |
|
| 351 |
else:
|
| 352 |
print('Load Pre-trained')
|
| 353 |
-
model_save_path = "./
|
| 354 |
-
tokenizer_save_path = "./
|
| 355 |
# RobertaTokenizer.from_pretrained(model_save_path)
|
| 356 |
model = AutoModelForSequenceClassification.from_pretrained(model_save_path).to('cpu')
|
| 357 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_save_path)
|
|
|
|
| 34 |
num_args = len(config)
|
| 35 |
|
| 36 |
|
| 37 |
+
arg1 = config.get('arg1', '1')
|
| 38 |
+
arg2 = config.get('arg2', 'train_fleet')
|
| 39 |
+
arg3 = config.get('arg3', 'test_fleet')
|
| 40 |
+
arg4 = config.get('arg4', '1')
|
| 41 |
+
arg5 = config.get('arg5', '1')
|
| 42 |
+
arg6 = config.get('arg6', 'saved_fleet_model')
|
| 43 |
+
|
| 44 |
+
if num_args == 6:
|
| 45 |
# cmd args
|
| 46 |
# sys.argv[0] is the script name, sys.argv[1] is the first argument, etc.
|
| 47 |
+
should_train_model = arg1 # should train model?
|
| 48 |
+
train_file = arg2 # training file name
|
| 49 |
+
test_file = arg3 # eval file name
|
| 50 |
+
batch_size_for_trainer = int(arg4) # batch sizes to send to trainer
|
| 51 |
+
should_produce_eval_matrix = int(arg5) # should produce matrix?
|
| 52 |
+
path_to_save_trained_model_to = arg6
|
| 53 |
+
|
| 54 |
+
print(f"should train model? : {arg1}")
|
| 55 |
+
print (f"file to train on : {arg2}")
|
| 56 |
+
print (f"file to evaluate on : {arg3}")
|
| 57 |
+
print (f"batch size : {arg4}")
|
| 58 |
+
print (f"should produce eval matrix : {arg5}")
|
| 59 |
+
print (f"path to save trained model : {arg6}")
|
| 60 |
|
| 61 |
print(f"should train model? : {should_train_model}")
|
| 62 |
print (f"file to train on : {train_file}")
|
|
|
|
| 331 |
# Upload the model and tokenizer to the Hugging Face repository
|
| 332 |
|
| 333 |
upload_folder(
|
| 334 |
+
folder_path=f"{model_save_path}_model",
|
| 335 |
+
path_in_repo=f"{model_save_path}_model",
|
| 336 |
repo_id=repo_name,
|
| 337 |
token=api_token,
|
| 338 |
+
commit_message="Push model",
|
| 339 |
#overwrite=True # Force overwrite existing files
|
| 340 |
)
|
| 341 |
|
| 342 |
upload_folder(
|
| 343 |
+
folder_path=f"{model_save_path}_tokenizer",
|
| 344 |
+
path_in_repo=f"{model_save_path}_tokenizer",
|
| 345 |
repo_id=repo_name,
|
| 346 |
token=api_token,
|
| 347 |
+
commit_message="Push tokenizer",
|
| 348 |
#overwrite=True # Force overwrite existing files
|
| 349 |
)
|
| 350 |
|
| 351 |
else:
|
| 352 |
print('Load Pre-trained')
|
| 353 |
+
model_save_path = f"./{model_save_path}_model"
|
| 354 |
+
tokenizer_save_path = f"./{model_save_path}_tokenizer"
|
| 355 |
# RobertaTokenizer.from_pretrained(model_save_path)
|
| 356 |
model = AutoModelForSequenceClassification.from_pretrained(model_save_path).to('cpu')
|
| 357 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_save_path)
|