Spaces:
Running
Running
Update pipeline.py
Browse files- pipeline.py +7 -2
pipeline.py
CHANGED
|
@@ -45,11 +45,16 @@ def get_or_create_drive_folder(name, parent_id=None):
|
|
| 45 |
file_metadata["parents"] = [parent_id]
|
| 46 |
file = drive_service.files().create(body=file_metadata, fields="id").execute()
|
| 47 |
return file["id"]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
def find_drive_file(filename, parent_id):
|
| 49 |
"""
|
| 50 |
Checks if a file with the given name exists inside the specified Google Drive folder.
|
| 51 |
Returns the file ID if found, else None.
|
| 52 |
"""
|
|
|
|
| 53 |
try:
|
| 54 |
print(f"🔍 Searching for '{filename}' in folder: {parent_id}")
|
| 55 |
query = f"'{parent_id}' in parents and name = '{filename}' and trashed = false"
|
|
@@ -262,8 +267,8 @@ async def process_link_chunk_allOutput(link, iso, acc, saveLinkFolder, out_links
|
|
| 262 |
if len(final_input_link) > 1000000:
|
| 263 |
final_input_link = final_input_link[:1000000]
|
| 264 |
|
| 265 |
-
all_output
|
| 266 |
-
|
| 267 |
return context, all_output, chunk
|
| 268 |
|
| 269 |
# Main execution
|
|
|
|
| 45 |
file_metadata["parents"] = [parent_id]
|
| 46 |
file = drive_service.files().create(body=file_metadata, fields="id").execute()
|
| 47 |
return file["id"]
|
| 48 |
+
|
| 49 |
+
def build_fresh_drive():
|
| 50 |
+
return build("drive", "v3", credentials=Credentials.from_authorized_user_file("token.json"))
|
| 51 |
+
|
| 52 |
def find_drive_file(filename, parent_id):
|
| 53 |
"""
|
| 54 |
Checks if a file with the given name exists inside the specified Google Drive folder.
|
| 55 |
Returns the file ID if found, else None.
|
| 56 |
"""
|
| 57 |
+
drive = build_fresh_drive()
|
| 58 |
try:
|
| 59 |
print(f"🔍 Searching for '{filename}' in folder: {parent_id}")
|
| 60 |
query = f"'{parent_id}' in parents and name = '{filename}' and trashed = false"
|
|
|
|
| 267 |
if len(final_input_link) > 1000000:
|
| 268 |
final_input_link = final_input_link[:1000000]
|
| 269 |
|
| 270 |
+
all_output = all_output+ data_preprocess.normalize_for_overlap(all_output) + final_input_link
|
| 271 |
+
print("done process link chunk_alloutput")
|
| 272 |
return context, all_output, chunk
|
| 273 |
|
| 274 |
# Main execution
|