transformers-chat

Runtime error

App Files Files Community

enoreyes commited on Jan 30, 2023

Commit

a3c3fcf

1 Parent(s): 17305b3

Update ingest_examples.py

Browse files

Files changed (1) hide show

ingest_examples.py +39 -199

ingest_examples.py CHANGED Viewed

@@ -1,219 +1,59 @@
-"""Ingest examples into Weaviate."""
 import os
 from pathlib import Path
-import weaviate
-WEAVIATE_URL = os.environ["WEAVIATE_URL"]
-client = weaviate.Client(
-    url=WEAVIATE_URL,
-    additional_headers={"X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]},
-)
-client.schema.delete_class("Rephrase")
-client.schema.delete_class("QA")
-client.schema.get()
-schema = {
-    "classes": [
-        {
-            "class": "Rephrase",
-            "description": "Rephrase Examples",
-            "vectorizer": "text2vec-openai",
-            "moduleConfig": {
-                "text2vec-openai": {
-                    "model": "ada",
-                    "modelVersion": "002",
-                    "type": "text",
-                }
-            },
-            "properties": [
-                {
-                    "dataType": ["text"],
-                    "moduleConfig": {
-                        "text2vec-openai": {
-                            "skip": False,
-                            "vectorizePropertyName": False,
-                        }
-                    },
-                    "name": "content",
-                },
-                {
-                    "dataType": ["text"],
-                    "description": "The link",
-                    "moduleConfig": {
-                        "text2vec-openai": {
-                            "skip": True,
-                            "vectorizePropertyName": False,
-                        }
-                    },
-                    "name": "question",
-                },
-                {
-                    "dataType": ["text"],
-                    "description": "The link",
-                    "moduleConfig": {
-                        "text2vec-openai": {
-                            "skip": True,
-                            "vectorizePropertyName": False,
-                        }
-                    },
-                    "name": "answer",
-                },
-                {
-                    "dataType": ["text"],
-                    "description": "The link",
-                    "moduleConfig": {
-                        "text2vec-openai": {
-                            "skip": True,
-                            "vectorizePropertyName": False,
-                        }
-                    },
-                    "name": "chat_history",
-                },
-            ],
-        },
-    ]
-}
-client.schema.create(schema)
-documents = [
     {
         "question": "how do i load those?",
-        "chat_history": "Human: What types of memory exist?\nAssistant: \n\nThere are a few different types of memory: Buffer, Summary, and Conversational Memory.",
-        "answer": "How do I load Buffer, Summary, and Conversational Memory",
     },
     {
         "question": "how do i install this package?",
         "chat_history": "",
-        "answer": "How do I install langchain?",
     },
     {
-        "question": "how do I set serpapi_api_key?",
-        "chat_history": "Human: can you write me a code snippet for that?\nAssistant: \n\nYes, you can create an Agent with a custom LLMChain in LangChain. Here is a [link](https://langchain.readthedocs.io/en/latest/modules/agents/examples/custom_agent.html) to the documentation that provides a code snippet for creating a custom Agent.",
-        "answer": "How do I set the serpapi_api_key?",
     },
     {
-        "question": "What are some methods for data augmented generation?",
-        "chat_history": "Human: List all methods of an Agent class please\nAssistant: \n\nTo answer your question, you can find a list of all the methods of the Agent class in the [API reference documentation](https://langchain.readthedocs.io/en/latest/modules/agents/reference.html).",
-        "answer": "What are some methods for data augmented generation?",
     },
     {
         "question": "can you write me a code snippet for that?",
-        "chat_history": "Human: how do I create an agent with custom LLMChain?\nAssistant: \n\nTo create an Agent with a custom LLMChain in LangChain, you can use the [Custom Agent example](https://langchain.readthedocs.io/en/latest/modules/agents/examples/custom_agent.html). This example shows how to create a custom LLMChain and use an existing Agent class to parse the output. For more information on Agents and Tools, check out the [Key Concepts](https://langchain.readthedocs.io/en/latest/modules/agents/key_concepts.html) documentation.",
-        "answer": "Can you provide a code snippet for creating an Agent with a custom LLMChain?",
     },
 ]
-from langchain.prompts.example_selector.semantic_similarity import \
-    sorted_values
-for d in documents:
-    d["content"] = " ".join(sorted_values(d))
-with client.batch as batch:
-    for text in documents:
-        batch.add_data_object(
-            text,
-            "Rephrase",
-        )
-client.schema.get()
-schema = {
-    "classes": [
-        {
-            "class": "QA",
-            "description": "Rephrase Examples",
-            "vectorizer": "text2vec-openai",
-            "moduleConfig": {
-                "text2vec-openai": {
-                    "model": "ada",
-                    "modelVersion": "002",
-                    "type": "text",
-                }
-            },
-            "properties": [
-                {
-                    "dataType": ["text"],
-                    "moduleConfig": {
-                        "text2vec-openai": {
-                            "skip": False,
-                            "vectorizePropertyName": False,
-                        }
-                    },
-                    "name": "content",
-                },
-                {
-                    "dataType": ["text"],
-                    "description": "The link",
-                    "moduleConfig": {
-                        "text2vec-openai": {
-                            "skip": True,
-                            "vectorizePropertyName": False,
-                        }
-                    },
-                    "name": "question",
-                },
-                {
-                    "dataType": ["text"],
-                    "description": "The link",
-                    "moduleConfig": {
-                        "text2vec-openai": {
-                            "skip": True,
-                            "vectorizePropertyName": False,
-                        }
-                    },
-                    "name": "answer",
-                },
-                {
-                    "dataType": ["text"],
-                    "description": "The link",
-                    "moduleConfig": {
-                        "text2vec-openai": {
-                            "skip": True,
-                            "vectorizePropertyName": False,
-                        }
-                    },
-                    "name": "summaries",
-                },
-                {
-                    "dataType": ["text"],
-                    "description": "The link",
-                    "moduleConfig": {
-                        "text2vec-openai": {
-                            "skip": True,
-                            "vectorizePropertyName": False,
-                        }
-                    },
-                    "name": "sources",
-                },
-            ],
-        },
-    ]
-}
-client.schema.create(schema)
-documents = [
-    {
-        "question": "how do i install langchain?",
-        "answer": "```pip install langchain```",
-        "summaries": ">Example:\nContent:\n---------\nYou can pip install langchain package by running 'pip install langchain'\n----------\nSource: foo.html",
-        "sources": "foo.html",
-    },
-    {
-        "question": "how do i import an openai LLM?",
-        "answer": "```from langchain.llm import OpenAI```",
-        "summaries": ">Example:\nContent:\n---------\nyou can import the open ai wrapper (OpenAI) from the langchain.llm module\n----------\nSource: bar.html",
-        "sources": "bar.html",
-    },
-]
-from langchain.prompts.example_selector.semantic_similarity import \
-    sorted_values
-for d in documents:
-    d["content"] = " ".join(sorted_values(d))
-with client.batch as batch:
-    for text in documents:
-        batch.add_data_object(
-            text,
-            "QA",
-        )

+"""Ingest examples into FAISS."""
 import os
 from pathlib import Path
+import pickle
+from langchain.vectorstores import FAISS
+from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.prompts.example_selector import \
+    SemanticSimilarityExampleSelector
+rephrase_documents = [
     {
         "question": "how do i load those?",
+        "chat_history": "Human: What types of tasks can I do with Pipelines?\nAssistant: \n\nThere are a few different types of tasks pipelines can do. Some examples: Text classification, Text generation, name entity recognition, question answering, summarization, translation, image classification, image segmentation, object detection, audio classification, and visual question answering.",
+        "answer": "How do I load a pipeline for a specific task",
     },
     {
         "question": "how do i install this package?",
         "chat_history": "",
+        "answer": "How do I install transformers?",
     },
     {
+        "question": "where do i find the models?",
+        "chat_history": "Human: can you write me a code snippet for that?\nAssistant: \n\nYes, you can load a pretained model with the from_pretrained() method. Here is a [link](https://huggingface.co/docs/transformers/autoclass_tutorial) to the documentation that provides a code snippet for loading a pretrained model with AutoClass.",
+        "answer": "Where do I find the models that can be loaded into an autoclass?",
     },
     {
+        "question": "how do I finetune a pre-trained model?",
+        "chat_history": "Human: List all methods of a pipeline please\nAssistant: \n\nTo answer your question, you can find a list of all the methods of the Pipeline class in the [API reference documentation](https://huggingface.co/docs/transformers/main_classes/pipelines).",
+        "answer": "What are some methods for finetuning a pre-trained model?",
     },
     {
         "question": "can you write me a code snippet for that?",
+        "chat_history": "Human: how do I do train on multiple gpus?\nAssistant: \n\nTo perform distributed training, you can use the [Accelerate](https://huggingface.co/docs/transformers/accelerate) library. This example shows how to perform distributed training on multiple GPUs with accelerate. For more information on distributed training, check out the [Full Accelerate Documentation](https://huggingface.co/docs/accelerate/).",
+        "answer": "Can you provide a code snippet for training on multiple GPUs with accelerate?",
     },
+    {
+        "question": "show me how to do it with trainer",
+        "chat_history": "Human: How do I finetune a pre-trained model?\nAssistant: \n\nYou can fine-tune a pretrained model with 🤗 Transformers Trainer, in TensorFlow with Keras, and in native PyTorch. For more information on how to do this, visit our [training tutorial](https://huggingface.co/docs/transformers/training)",
+        "answer": "How do I finetune a pre-trained model with Transformers trainer?",
+    }
 ]
+example_selector = SemanticSimilarityExampleSelector.from_examples(
+    # This is the list of examples available to select from.
+    rephrase_documents,
+    # This is the embedding class used to produce embeddings which are used to measure semantic similarity.
+    HuggingFaceEmbeddings(),
+    # This is the VectorStore class that is used to store the embeddings and do a similarity search over.
+    FAISS,
+    # This is the number of examples to produce.
+    k=4
+)
+print("beginning pickle")
+with open("rephrase_eg.pkl", 'wb') as f:
+    pickle.dump(example_selector, f)
+print("Rephrase pickle complete")