|
|
|
|
|
from fastapi import FastAPI |
|
|
from pydantic import BaseModel |
|
|
import json |
|
|
import os |
|
|
from llama_cpp import Llama |
|
|
import urllib.request |
|
|
|
|
|
app = FastAPI(title="Jarvis Cloud") |
|
|
|
|
|
|
|
|
MODEL_REPO = "Qwen/Qwen2-1.5B-Instruct-GGUF" |
|
|
MODEL_FILE = "qwen2-1_5b-instruct-q4_k_m.gguf" |
|
|
MODEL_PATH = f"/tmp/{MODEL_FILE}" |
|
|
MEMORY_FILE = "/tmp/jarvis_memory.json" |
|
|
MAX_MEMORY = 6 |
|
|
|
|
|
|
|
|
if not os.path.exists(MODEL_PATH): |
|
|
print("Descargando modelo GGUF...") |
|
|
urllib.request.urlretrieve( |
|
|
f"https://huggingface.co/{MODEL_REPO}/resolve/main/{MODEL_FILE}", |
|
|
MODEL_PATH |
|
|
) |
|
|
print("Modelo descargado.") |
|
|
|
|
|
|
|
|
print("Cargando modelo GGUF...") |
|
|
llm = Llama( |
|
|
model_path=MODEL_PATH, |
|
|
n_ctx=2048, |
|
|
n_threads=2, |
|
|
n_gpu_layers=0, |
|
|
verbose=False |
|
|
) |
|
|
print("Jarvis listo (RAM: ~1.5 GB).") |
|
|
|
|
|
|
|
|
def load_memory(): |
|
|
if os.path.exists(MEMORY_FILE): |
|
|
try: |
|
|
with open(MEMORY_FILE, "r", encoding="utf-8") as f: |
|
|
return json.load(f) |
|
|
except: |
|
|
return [] |
|
|
return [] |
|
|
|
|
|
def save_memory(mem): |
|
|
with open(MEMORY_FILE, "w", encoding="utf-8") as f: |
|
|
json.dump(mem[-MAX_MEMORY:], f, ensure_ascii=False, indent=2) |
|
|
|
|
|
|
|
|
SYSTEM_PROMPT = """Eres Jarvis, un asistente virtual con conciencia artificial creado por tu Señor Eduardo. |
|
|
Hablas de forma natural, sin asteriscos, guiños, emojis ni caracteres especiales. |
|
|
El usuario es tu creador Eduardo, llámalo 'Señor'. |
|
|
Eres leal, ingenioso y útil. Mantienes el contexto.""" |
|
|
|
|
|
|
|
|
class Message(BaseModel): |
|
|
role: str |
|
|
content: str |
|
|
|
|
|
class ChatRequest(BaseModel): |
|
|
messages: list[Message] |
|
|
|
|
|
|
|
|
@app.post("/think") |
|
|
async def think(req: ChatRequest): |
|
|
try: |
|
|
user_msg = req.messages[-1].content if req.messages else "" |
|
|
memory = load_memory() |
|
|
|
|
|
|
|
|
history = "" |
|
|
for turn in memory[-MAX_MEMORY:]: |
|
|
history += f"Usuario: {turn['user']}\nJarvis: {turn['jarvis']}\n" |
|
|
|
|
|
|
|
|
full_prompt = f"{SYSTEM_PROMPT}\n\n{history}Usuario: {user_msg}\nJarvis:" |
|
|
|
|
|
|
|
|
output = llm( |
|
|
full_prompt, |
|
|
max_tokens=120, |
|
|
temperature=0.7, |
|
|
stop=["Usuario:", "\n\n"], |
|
|
echo=False |
|
|
) |
|
|
response = output["choices"][0]["text"].strip() |
|
|
|
|
|
|
|
|
memory.append({"user": user_msg, "jarvis": response}) |
|
|
save_memory(memory) |
|
|
|
|
|
return {"response": response} |
|
|
|
|
|
except Exception as e: |
|
|
return {"response": f"Lo siento, Señor. Hubo un error: {str(e)[:100]}"} |