37-AN commited on
Commit
207d24c
·
1 Parent(s): 31cd25b

Initial commit for Hugging Face Space deployment

Browse files
Files changed (2) hide show
  1. Dockerfile +2 -1
  2. app/core/llm.py +51 -28
Dockerfile CHANGED
@@ -36,7 +36,8 @@ ENV HF_HOME=/app/.cache
36
  ENV XDG_CACHE_HOME=/app/.cache
37
  ENV HUGGINGFACEHUB_API_TOKEN=""
38
  ENV HF_API_KEY=""
39
- ENV LLM_MODEL="google/flan-t5-small"
 
40
  ENV EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2"
41
 
42
  # Expose the port required by Hugging Face Spaces
 
36
  ENV XDG_CACHE_HOME=/app/.cache
37
  ENV HUGGINGFACEHUB_API_TOKEN=""
38
  ENV HF_API_KEY=""
39
+ # Use completely open models that don't require API keys
40
+ ENV LLM_MODEL="distilgpt2"
41
  ENV EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2"
42
 
43
  # Expose the port required by Hugging Face Spaces
app/core/llm.py CHANGED
@@ -1,5 +1,5 @@
1
  from langchain.llms import HuggingFaceHub
2
- from langchain_community.llms import HuggingFaceEndpoint
3
  from langchain_community.embeddings import HuggingFaceEmbeddings
4
  from langchain.chains import LLMChain
5
  from langchain.prompts import PromptTemplate
@@ -25,39 +25,62 @@ def get_llm():
25
  # Set environment variable for Hugging Face Hub
26
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_API_KEY
27
 
28
- # For Hugging Face Spaces, we'll use a simpler model approach
29
- # that doesn't require authentication for free models
30
  try:
31
- if HF_API_KEY:
32
- # If we have an API key, use the HuggingFaceHub
33
- llm = HuggingFaceHub(
34
- huggingfacehub_api_token=HF_API_KEY,
35
- repo_id=LLM_MODEL,
36
- model_kwargs={
37
- "temperature": DEFAULT_TEMPERATURE,
38
- "max_length": MAX_TOKENS
39
- }
40
- )
41
- else:
42
- # If no API key, inform the user
43
- print("No Hugging Face API key found. Using a simpler approach with HuggingFaceEndpoint.")
44
- llm = HuggingFaceEndpoint(
45
- endpoint_url=f"https://api-inference.huggingface.co/models/{LLM_MODEL}",
46
- task="text-generation",
47
- model_kwargs={
48
- "temperature": DEFAULT_TEMPERATURE,
49
- "max_length": MAX_TOKENS
50
- }
51
  )
52
- return llm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  except Exception as e:
54
- print(f"Error initializing Hugging Face LLM: {e}")
55
- print("Using a fallback approach with a mock LLM.")
56
 
57
  # Create a very simple mock LLM for fallback
58
  from langchain.llms.fake import FakeListLLM
59
  return FakeListLLM(
60
- responses=["I'm a simple AI assistant. I can't access external knowledge right now, but I'll try to help with basic questions."]
 
 
 
 
 
 
61
  )
62
 
63
  def get_embeddings():
@@ -72,7 +95,7 @@ def get_embeddings():
72
  print(f"Warning: Could not create cache directory: {e}")
73
  cache_dir = None
74
 
75
- # SentenceTransformers can be used locally without an API key
76
  try:
77
  return HuggingFaceEmbeddings(
78
  model_name=EMBEDDING_MODEL,
 
1
  from langchain.llms import HuggingFaceHub
2
+ from langchain_community.llms import HuggingFaceEndpoint, HuggingFacePipeline
3
  from langchain_community.embeddings import HuggingFaceEmbeddings
4
  from langchain.chains import LLMChain
5
  from langchain.prompts import PromptTemplate
 
25
  # Set environment variable for Hugging Face Hub
26
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_API_KEY
27
 
28
+ # Try different approaches to load a model, from most to least sophisticated
 
29
  try:
30
+ print(f"Attempting to load model {LLM_MODEL} using local pipeline...")
31
+
32
+ # Try using Hugging Face pipeline locally
33
+ try:
34
+ from transformers import pipeline
35
+
36
+ # Use a simple pipeline with a small model
37
+ pipe = pipeline(
38
+ "text-generation",
39
+ model=LLM_MODEL,
40
+ max_length=MAX_TOKENS,
41
+ temperature=DEFAULT_TEMPERATURE
 
 
 
 
 
 
 
 
42
  )
43
+
44
+ return HuggingFacePipeline(pipeline=pipe)
45
+ except Exception as pipe_error:
46
+ print(f"Error loading pipeline: {pipe_error}")
47
+
48
+ # Try using the API if we have a token
49
+ if HF_API_KEY:
50
+ print("Falling back to API with auth token...")
51
+ return HuggingFaceHub(
52
+ huggingfacehub_api_token=HF_API_KEY,
53
+ repo_id=LLM_MODEL,
54
+ model_kwargs={
55
+ "temperature": DEFAULT_TEMPERATURE,
56
+ "max_length": MAX_TOKENS
57
+ }
58
+ )
59
+ else:
60
+ print("No API key, using endpoint without auth...")
61
+ # Try a simple endpoint without auth
62
+ return HuggingFaceEndpoint(
63
+ endpoint_url=f"https://api-inference.huggingface.co/models/{LLM_MODEL}",
64
+ task="text-generation",
65
+ model_kwargs={
66
+ "temperature": DEFAULT_TEMPERATURE,
67
+ "max_length": MAX_TOKENS
68
+ }
69
+ )
70
  except Exception as e:
71
+ print(f"All LLM approaches failed: {e}")
72
+ print("Using a fallback mock LLM.")
73
 
74
  # Create a very simple mock LLM for fallback
75
  from langchain.llms.fake import FakeListLLM
76
  return FakeListLLM(
77
+ responses=[
78
+ "I'm a simple AI assistant. I can't access external knowledge right now, but I'll try to help with basic questions.",
79
+ "I'm currently operating in a limited mode. How else can I assist you?",
80
+ "I'm sorry, but I don't have access to that information at the moment.",
81
+ "I'm a basic AI assistant running in fallback mode. Let me try to help.",
82
+ "I'm operating with limited capabilities right now. Could you ask something simpler?"
83
+ ]
84
  )
85
 
86
  def get_embeddings():
 
95
  print(f"Warning: Could not create cache directory: {e}")
96
  cache_dir = None
97
 
98
+ # Try to use local embeddings first (most reliable)
99
  try:
100
  return HuggingFaceEmbeddings(
101
  model_name=EMBEDDING_MODEL,