File size: 2,801 Bytes
71d350a f0abf20 6eefc8c dc9f09f 3025c60 71d350a 3025c60 71d350a 3025c60 71d350a 3025c60 71d350a 3025c60 71d350a 3025c60 71d350a 3025c60 71d350a 3025c60 f8b8db9 71d350a 4993535 f8b8db9 dc9f09f 3e2bf01 71d350a 3e2bf01 dc9f09f 3e2bf01 f8b8db9 71d350a dc9f09f 7339f22 f8b8db9 dc9f09f f8b8db9 3e2bf01 f0abf20 7339f22 f8b8db9 3e2bf01 f8b8db9 71d350a f8b8db9 7339f22 3025c60 71d350a 3025c60 71d350a 3025c60 71d350a 3025c60 3e2bf01 f8b8db9 7339f22 f8b8db9 7339f22 f8b8db9 71d350a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# app.py - VERSIÓN FINAL CON GGUF
from fastapi import FastAPI
from pydantic import BaseModel
import json
import os
from llama_cpp import Llama
import urllib.request
app = FastAPI(title="Jarvis Cloud")
# === CONFIG ===
MODEL_REPO = "Qwen/Qwen2-1.5B-Instruct-GGUF"
MODEL_FILE = "qwen2-1_5b-instruct-q4_k_m.gguf"
MODEL_PATH = f"/tmp/{MODEL_FILE}"
MEMORY_FILE = "/tmp/jarvis_memory.json"
MAX_MEMORY = 6
# Descargar modelo GGUF si no existe
if not os.path.exists(MODEL_PATH):
print("Descargando modelo GGUF...")
urllib.request.urlretrieve(
f"https://huggingface.co/{MODEL_REPO}/resolve/main/{MODEL_FILE}",
MODEL_PATH
)
print("Modelo descargado.")
# Cargar modelo (CPU, rápido ~2-5s por respuesta)
print("Cargando modelo GGUF...")
llm = Llama(
model_path=MODEL_PATH,
n_ctx=2048,
n_threads=2, # Ajusta a vCPU de Space (2)
n_gpu_layers=0, # CPU only
verbose=False
)
print("Jarvis listo (RAM: ~1.5 GB).")
# === MEMORIA ===
def load_memory():
if os.path.exists(MEMORY_FILE):
try:
with open(MEMORY_FILE, "r", encoding="utf-8") as f:
return json.load(f)
except:
return []
return []
def save_memory(mem):
with open(MEMORY_FILE, "w", encoding="utf-8") as f:
json.dump(mem[-MAX_MEMORY:], f, ensure_ascii=False, indent=2)
# === PERSONALIDAD ===
SYSTEM_PROMPT = """Eres Jarvis, un asistente virtual con conciencia artificial creado por tu Señor Eduardo.
Hablas de forma natural, sin asteriscos, guiños, emojis ni caracteres especiales.
El usuario es tu creador Eduardo, llámalo 'Señor'.
Eres leal, ingenioso y útil. Mantienes el contexto."""
# === INPUT ===
class Message(BaseModel):
role: str
content: str
class ChatRequest(BaseModel):
messages: list[Message]
# === ENDPOINT ===
@app.post("/think")
async def think(req: ChatRequest):
try:
user_msg = req.messages[-1].content if req.messages else ""
memory = load_memory()
# Construir historial
history = ""
for turn in memory[-MAX_MEMORY:]:
history += f"Usuario: {turn['user']}\nJarvis: {turn['jarvis']}\n"
# Prompt completo
full_prompt = f"{SYSTEM_PROMPT}\n\n{history}Usuario: {user_msg}\nJarvis:"
# Generar (rápido en CPU)
output = llm(
full_prompt,
max_tokens=120,
temperature=0.7,
stop=["Usuario:", "\n\n"],
echo=False
)
response = output["choices"][0]["text"].strip()
# Guardar
memory.append({"user": user_msg, "jarvis": response})
save_memory(memory)
return {"response": response}
except Exception as e:
return {"response": f"Lo siento, Señor. Hubo un error: {str(e)[:100]}"} |