Spaces:

awacke1
/

CB-GR-Chatbot-Blenderbot

Runtime error

App Files Files Community

awacke1 commited on Jan 29, 2023

Commit

fc9c564

1 Parent(s): c93920f

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -82

app.py CHANGED Viewed

@@ -19,66 +19,61 @@ import httpx
 import pandas as pd
 import datasets as ds
-# -------------------------------------------- For Memory - you will need to set up a dataset and HF_TOKEN ---------
-#UseMemory=False
 UseMemory=True
-DATASET_REPO_URL="https://huggingface.co/datasets/awacke1/ChatbotMemory.csv"
-DATASET_REPO_ID="awacke1/ChatbotMemory.csv"
-DATA_FILENAME="ChatbotMemory.csv"
-DATA_FILE=os.path.join("data", DATA_FILENAME)
 HF_TOKEN=os.environ.get("HF_TOKEN")
-if UseMemory:
-   try:
-      hf_hub_download(
-      repo_id=DATASET_REPO_ID,
-      filename=DATA_FILENAME,
-      cache_dir=DATA_DIRNAME,
-      force_filename=DATA_FILENAME
-      )
-   except:
-      print("file not found")
-      repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
-      #repo.git_pull(rebase=True)
-      #repo.git_pull()
-def get_df(name: str):
-    dataset = load_dataset(str, split="train")
-    return dataset
-#def store_message(name: str, message: str) -> str:
-def store_message(name: str, message: str):
-    if name and message:
-        #repo.git_pull() # test repull to avoid out of sync rrepo error due to others commits
-        #repo = repo.git_pull(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN) # test repull to avoid out of sync rrepo error due to others commits
-        #repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
-        #
-        with open(DATA_FILE, "a") as csvfile:
-            writer = csv.DictWriter(csvfile, fieldnames=[ "time", "message", "name", ])
-            writer.writerow(
-                {"time": str(datetime.now()), "message": message.strip(), "name": name.strip()  }
-            )
-        #repo.git_pull(rebase=True)
-        commit_url = repo.push_to_hub()
-        # test api retrieval of any dataset that is saved, then return it...
-        # app = FastAPI()
-        # see: https://gradio.app/sharing_your_app/#api-page
-        # f=get_df(DATASET_REPO_ID)
-        # print(f)
-    #return commit_url
-    return ""
-# ----------------------------------------------- For Memory
 mname = "facebook/blenderbot-400M-distill"
 model = BlenderbotForConditionalGeneration.from_pretrained(mname)
 tokenizer = BlenderbotTokenizer.from_pretrained(mname)
 def take_last_tokens(inputs, note_history, history):
-    """Filter the last 128 tokens"""
     if inputs['input_ids'].shape[1] > 128:
         inputs['input_ids'] = torch.tensor([inputs['input_ids'][0][-128:].tolist()])
         inputs['attention_mask'] = torch.tensor([inputs['attention_mask'][0][-128:].tolist()])
@@ -87,7 +82,6 @@ def take_last_tokens(inputs, note_history, history):
     return inputs, note_history, history
 def add_note_to_history(note, note_history):# good example of non async since we wait around til we know it went okay.
-    """Add a note to the historical information"""
     note_history.append(note)
     note_history = '</s> <s>'.join(note_history)
     return [note_history]
@@ -96,12 +90,21 @@ title = "💬ChatBack🧠💾"
 description = """Chatbot With persistent memory dataset allowing multiagent system AI to access a shared dataset as memory pool with stored interactions.
  Current Best SOTA Chatbot:  https://huggingface.co/facebook/blenderbot-400M-distill?text=Hey+my+name+is+ChatBack%21+Are+you+ready+to+rock%3F  """
 def chat(message, history):
     history = history or []
     if history:
         history_useful = ['</s> <s>'.join([str(a[0])+'</s> <s>'+str(a[1]) for a in history])]
     else:
         history_useful = []
     history_useful = add_note_to_history(message, history_useful)
     inputs = tokenizer(history_useful, return_tensors="pt")
     inputs, history_useful, history = take_last_tokens(inputs, history_useful, history)
@@ -114,46 +117,28 @@ def chat(message, history):
     df=pd.DataFrame()
     if UseMemory:
-        store_message(message, response) # Save to dataset  -- uncomment with code above, create a dataset to store and add your HF_TOKEN from profile to this repo to use.
-        df = ds.load_dataset("awacke1/ChatbotMemory.csv")
-        df = df["train"].to_pandas()
-        df = df.sort_values(by="time",ascending=False)
-        #df.sort_index(axis=1, ascending=False)
-    return history, df
-    #return df
-    #return history,  df
-#gr.Interface(
-#    fn=chat,
-#    theme="huggingface",
-#    css=".footer {display:none !important}",
-#    inputs=["text", "state"],
-#    #outputs=["chatbot", "state", "text"],
-#    outputs=["chatbot", "state", "dataframe"],
-#    title=title,
-#    allow_flagging="never",
-#    description=f"Gradio chatbot backed by memory in a dataset repository.",
-#    article=f"The memory dataset for saves is [{DATASET_REPO_URL}]({DATASET_REPO_URL}) And here: https://huggingface.co/spaces/awacke1/DatasetAnalyzer  Code and datasets on chat are here  hf  tk: https://paperswithcode.com/datasets?q=chat&v=lst&o=newest"
-#    ).launch(debug=True)
 with gr.Blocks() as demo:
-  gr.Markdown("<h1><center>🍰Gradio chatbot backed by memory in a dataset repository.🎨</center></h1>")
-  #gr.Markdown("The memory dataset for saves is [{DATASET_REPO_URL}]({DATASET_REPO_URL}) And here: https://huggingface.co/spaces/awacke1/DatasetAnalyzer  Code and datasets on chat are here  hf  tk: https://paperswithcode.com/datasets?q=chat&v=lst&o=newest")
   with gr.Row():
     t1 = gr.Textbox(lines=1, default="", label="Chat Text:")
-    b1 = gr.Button("Send Message")
   with gr.Row(): # inputs and buttons
     s1 = gr.State([])
-    s2 = gr.Markdown()
-  with gr.Row():
     df1 = gr.Dataframe(wrap=True, max_rows=1000, overflow_row_behaviour= "paginate")
-    #chatoutput = gr.Dataframe(wrap=True, max_rows=1000, overflow_row_behaviour= "paginate", datatype = ["markdown", "markdown"], headers=['url', 'prompt'])
-  b1.click(fn=chat, inputs=[t1, s1], outputs=[s1, df1])
-demo.launch(debug=True, show_error=True)

 import pandas as pd
 import datasets as ds
 UseMemory=True
 HF_TOKEN=os.environ.get("HF_TOKEN")
+def SaveResult(text, outputfileName):
+    basedir = os.path.dirname(__file__)
+    savePath = outputfileName
+    print("Saving: " + text + " to " + savePath)
+    from os.path import exists
+    file_exists = exists(savePath)
+    if file_exists:
+        with open(outputfileName, "a") as f: #append
+            f.write(str(text.replace("\n","  ")))
+            f.write('\n')
+    else:
+        with open(outputfileName, "w") as f: #write
+            f.write(str("time, message, text\n")) # one time only to get column headers for CSV file
+            f.write(str(text.replace("\n","  ")))
+            f.write('\n')
+    return
+def store_message(name: str, message: str, outputfileName: str):
+    basedir = os.path.dirname(__file__)
+    savePath = outputfileName
+    # if file doesnt exist, create it with labels
+    from os.path import exists
+    file_exists = exists(savePath)
+    if (file_exists==False):
+        with open(savePath, "w") as f: #write
+            f.write(str("time, message, text\n")) # one time only to get column headers for CSV file
+            if name and message:
+                writer = csv.DictWriter(f, fieldnames=["time", "message", "name"])
+                writer.writerow(
+                    {"time": str(datetime.now()), "message": message.strip(), "name": name.strip()  }
+                )
+        df = pd.read_csv(savePath)
+        df = df.sort_values(df.columns[0],ascending=False)
+    else:
+        if name and message:
+            with open(savePath, "a") as csvfile:
+                writer = csv.DictWriter(csvfile, fieldnames=[ "time", "message", "name", ])
+                writer.writerow(
+                    {"time": str(datetime.now()), "message": message.strip(), "name": name.strip()  }
+                )
+        df = pd.read_csv(savePath)
+        df = df.sort_values(df.columns[0],ascending=False)
+    return df
 mname = "facebook/blenderbot-400M-distill"
 model = BlenderbotForConditionalGeneration.from_pretrained(mname)
 tokenizer = BlenderbotTokenizer.from_pretrained(mname)
 def take_last_tokens(inputs, note_history, history):
     if inputs['input_ids'].shape[1] > 128:
         inputs['input_ids'] = torch.tensor([inputs['input_ids'][0][-128:].tolist()])
         inputs['attention_mask'] = torch.tensor([inputs['attention_mask'][0][-128:].tolist()])
     return inputs, note_history, history
 def add_note_to_history(note, note_history):# good example of non async since we wait around til we know it went okay.
     note_history.append(note)
     note_history = '</s> <s>'.join(note_history)
     return [note_history]
 description = """Chatbot With persistent memory dataset allowing multiagent system AI to access a shared dataset as memory pool with stored interactions.
  Current Best SOTA Chatbot:  https://huggingface.co/facebook/blenderbot-400M-distill?text=Hey+my+name+is+ChatBack%21+Are+you+ready+to+rock%3F  """
+def get_base(filename):
+        basedir = os.path.dirname(__file__)
+        print(basedir)
+        #loadPath = basedir + "\\" + filename # works on windows
+        loadPath = basedir + filename
+        print(loadPath)
+        return loadPath
 def chat(message, history):
     history = history or []
     if history:
         history_useful = ['</s> <s>'.join([str(a[0])+'</s> <s>'+str(a[1]) for a in history])]
     else:
         history_useful = []
     history_useful = add_note_to_history(message, history_useful)
     inputs = tokenizer(history_useful, return_tensors="pt")
     inputs, history_useful, history = take_last_tokens(inputs, history_useful, history)
     df=pd.DataFrame()
     if UseMemory:
+        #outputfileName = 'ChatbotMemory.csv'
+        outputfileName = 'ChatbotMemory3.csv' # Test first time file create
+        df = store_message(message, response, outputfileName) # Save to dataset
+        basedir = get_base(outputfileName)
+    return history, df, basedir
 with gr.Blocks() as demo:
+  gr.Markdown("<h1><center>🍰Gradio chatbot backed by dataframe CSV memory🎨</center></h1>")
   with gr.Row():
     t1 = gr.Textbox(lines=1, default="", label="Chat Text:")
+    b1 = gr.Button("Respond and Retrieve Messages")
   with gr.Row(): # inputs and buttons
     s1 = gr.State([])
     df1 = gr.Dataframe(wrap=True, max_rows=1000, overflow_row_behaviour= "paginate")
+  with gr.Row(): # inputs and buttons
+    file = gr.File(label="File")
+    s2 = gr.Markdown()
+  b1.click(fn=chat, inputs=[t1, s1], outputs=[s1, df1, file])
+demo.launch(debug=True, show_error=True)