Spaces:

p3rc03
/

2B

Sleeping

App Files Files Community

37-AN commited on May 12

Commit

207d24c

1 Parent(s): 31cd25b

Initial commit for Hugging Face Space deployment

Browse files

Files changed (2) hide show

Dockerfile +2 -1
app/core/llm.py +51 -28

Dockerfile CHANGED Viewed

@@ -36,7 +36,8 @@ ENV HF_HOME=/app/.cache
 ENV XDG_CACHE_HOME=/app/.cache
 ENV HUGGINGFACEHUB_API_TOKEN=""
 ENV HF_API_KEY=""
-ENV LLM_MODEL="google/flan-t5-small"
 ENV EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2"
 # Expose the port required by Hugging Face Spaces

 ENV XDG_CACHE_HOME=/app/.cache
 ENV HUGGINGFACEHUB_API_TOKEN=""
 ENV HF_API_KEY=""
+# Use completely open models that don't require API keys
+ENV LLM_MODEL="distilgpt2"
 ENV EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2"
 # Expose the port required by Hugging Face Spaces

app/core/llm.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from langchain.llms import HuggingFaceHub
-from langchain_community.llms import HuggingFaceEndpoint
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
@@ -25,39 +25,62 @@ def get_llm():
     # Set environment variable for Hugging Face Hub
     os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_API_KEY
-    # For Hugging Face Spaces, we'll use a simpler model approach
-    # that doesn't require authentication for free models
     try:
-        if HF_API_KEY:
-            # If we have an API key, use the HuggingFaceHub
-            llm = HuggingFaceHub(
-                huggingfacehub_api_token=HF_API_KEY,
-                repo_id=LLM_MODEL,
-                model_kwargs={
-                    "temperature": DEFAULT_TEMPERATURE,
-                    "max_length": MAX_TOKENS
-                }
-            )
-        else:
-            # If no API key, inform the user
-            print("No Hugging Face API key found. Using a simpler approach with HuggingFaceEndpoint.")
-            llm = HuggingFaceEndpoint(
-                endpoint_url=f"https://api-inference.huggingface.co/models/{LLM_MODEL}",
-                task="text-generation",
-                model_kwargs={
-                    "temperature": DEFAULT_TEMPERATURE,
-                    "max_length": MAX_TOKENS
-                }
             )
-        return llm
     except Exception as e:
-        print(f"Error initializing Hugging Face LLM: {e}")
-        print("Using a fallback approach with a mock LLM.")
         # Create a very simple mock LLM for fallback
         from langchain.llms.fake import FakeListLLM
         return FakeListLLM(
-            responses=["I'm a simple AI assistant. I can't access external knowledge right now, but I'll try to help with basic questions."]
         )
 def get_embeddings():
@@ -72,7 +95,7 @@ def get_embeddings():
             print(f"Warning: Could not create cache directory: {e}")
             cache_dir = None
-    # SentenceTransformers can be used locally without an API key
     try:
         return HuggingFaceEmbeddings(
             model_name=EMBEDDING_MODEL,

 from langchain.llms import HuggingFaceHub
+from langchain_community.llms import HuggingFaceEndpoint, HuggingFacePipeline
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
     # Set environment variable for Hugging Face Hub
     os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_API_KEY
+    # Try different approaches to load a model, from most to least sophisticated
     try:
+        print(f"Attempting to load model {LLM_MODEL} using local pipeline...")
+        # Try using Hugging Face pipeline locally
+        try:
+            from transformers import pipeline
+            # Use a simple pipeline with a small model
+            pipe = pipeline(
+                "text-generation",
+                model=LLM_MODEL,
+                max_length=MAX_TOKENS,
+                temperature=DEFAULT_TEMPERATURE
             )
+            return HuggingFacePipeline(pipeline=pipe)
+        except Exception as pipe_error:
+            print(f"Error loading pipeline: {pipe_error}")
+            # Try using the API if we have a token
+            if HF_API_KEY:
+                print("Falling back to API with auth token...")
+                return HuggingFaceHub(
+                    huggingfacehub_api_token=HF_API_KEY,
+                    repo_id=LLM_MODEL,
+                    model_kwargs={
+                        "temperature": DEFAULT_TEMPERATURE,
+                        "max_length": MAX_TOKENS
+                    }
+                )
+            else:
+                print("No API key, using endpoint without auth...")
+                # Try a simple endpoint without auth
+                return HuggingFaceEndpoint(
+                    endpoint_url=f"https://api-inference.huggingface.co/models/{LLM_MODEL}",
+                    task="text-generation",
+                    model_kwargs={
+                        "temperature": DEFAULT_TEMPERATURE,
+                        "max_length": MAX_TOKENS
+                    }
+                )
     except Exception as e:
+        print(f"All LLM approaches failed: {e}")
+        print("Using a fallback mock LLM.")
         # Create a very simple mock LLM for fallback
         from langchain.llms.fake import FakeListLLM
         return FakeListLLM(
+            responses=[
+                "I'm a simple AI assistant. I can't access external knowledge right now, but I'll try to help with basic questions.",
+                "I'm currently operating in a limited mode. How else can I assist you?",
+                "I'm sorry, but I don't have access to that information at the moment.",
+                "I'm a basic AI assistant running in fallback mode. Let me try to help.",
+                "I'm operating with limited capabilities right now. Could you ask something simpler?"
+            ]
         )
 def get_embeddings():
             print(f"Warning: Could not create cache directory: {e}")
             cache_dir = None
+    # Try to use local embeddings first (most reliable)
     try:
         return HuggingFaceEmbeddings(
             model_name=EMBEDDING_MODEL,