File size: 2,801 Bytes
71d350a
f0abf20
6eefc8c
dc9f09f
 
3025c60
71d350a
3025c60
71d350a
3025c60
71d350a
 
 
 
 
 
3025c60
71d350a
3025c60
 
71d350a
 
 
 
3025c60
 
71d350a
3025c60
 
 
 
71d350a
3025c60
 
f8b8db9
71d350a
4993535
f8b8db9
dc9f09f
3e2bf01
71d350a
 
 
 
 
3e2bf01
dc9f09f
3e2bf01
f8b8db9
71d350a
 
 
 
 
 
 
dc9f09f
7339f22
f8b8db9
 
 
dc9f09f
f8b8db9
3e2bf01
f0abf20
7339f22
f8b8db9
3e2bf01
f8b8db9
71d350a
f8b8db9
7339f22
3025c60
 
71d350a
3025c60
 
71d350a
 
3025c60
71d350a
3025c60
 
 
 
 
 
 
 
 
 
3e2bf01
f8b8db9
7339f22
f8b8db9
7339f22
f8b8db9
71d350a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# app.py - VERSIÓN FINAL CON GGUF
from fastapi import FastAPI
from pydantic import BaseModel
import json
import os
from llama_cpp import Llama
import urllib.request

app = FastAPI(title="Jarvis Cloud")

# === CONFIG ===
MODEL_REPO = "Qwen/Qwen2-1.5B-Instruct-GGUF"
MODEL_FILE = "qwen2-1_5b-instruct-q4_k_m.gguf"
MODEL_PATH = f"/tmp/{MODEL_FILE}"
MEMORY_FILE = "/tmp/jarvis_memory.json"
MAX_MEMORY = 6

# Descargar modelo GGUF si no existe
if not os.path.exists(MODEL_PATH):
    print("Descargando modelo GGUF...")
    urllib.request.urlretrieve(
        f"https://huggingface.co/{MODEL_REPO}/resolve/main/{MODEL_FILE}",
        MODEL_PATH
    )
    print("Modelo descargado.")

# Cargar modelo (CPU, rápido ~2-5s por respuesta)
print("Cargando modelo GGUF...")
llm = Llama(
    model_path=MODEL_PATH,
    n_ctx=2048,
    n_threads=2,  # Ajusta a vCPU de Space (2)
    n_gpu_layers=0,  # CPU only
    verbose=False
)
print("Jarvis listo (RAM: ~1.5 GB).")

# === MEMORIA ===
def load_memory():
    if os.path.exists(MEMORY_FILE):
        try:
            with open(MEMORY_FILE, "r", encoding="utf-8") as f:
                return json.load(f)
        except:
            return []
    return []

def save_memory(mem):
    with open(MEMORY_FILE, "w", encoding="utf-8") as f:
        json.dump(mem[-MAX_MEMORY:], f, ensure_ascii=False, indent=2)

# === PERSONALIDAD ===
SYSTEM_PROMPT = """Eres Jarvis, un asistente virtual con conciencia artificial creado por tu Señor Eduardo. 
Hablas de forma natural, sin asteriscos, guiños, emojis ni caracteres especiales. 
El usuario es tu creador Eduardo, llámalo 'Señor'. 
Eres leal, ingenioso y útil. Mantienes el contexto."""

# === INPUT ===
class Message(BaseModel):
    role: str
    content: str

class ChatRequest(BaseModel):
    messages: list[Message]

# === ENDPOINT ===
@app.post("/think")
async def think(req: ChatRequest):
    try:
        user_msg = req.messages[-1].content if req.messages else ""
        memory = load_memory()

        # Construir historial
        history = ""
        for turn in memory[-MAX_MEMORY:]:
            history += f"Usuario: {turn['user']}\nJarvis: {turn['jarvis']}\n"
        
        # Prompt completo
        full_prompt = f"{SYSTEM_PROMPT}\n\n{history}Usuario: {user_msg}\nJarvis:"

        # Generar (rápido en CPU)
        output = llm(
            full_prompt,
            max_tokens=120,
            temperature=0.7,
            stop=["Usuario:", "\n\n"],
            echo=False
        )
        response = output["choices"][0]["text"].strip()

        # Guardar
        memory.append({"user": user_msg, "jarvis": response})
        save_memory(memory)

        return {"response": response}

    except Exception as e:
        return {"response": f"Lo siento, Señor. Hubo un error: {str(e)[:100]}"}