likhonsheikhdev commited on
Commit
09b5534
·
verified ·
1 Parent(s): d69e848

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. Dockerfile +25 -0
  2. README.md +39 -4
  3. main.py +231 -0
  4. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y --no-install-recommends \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Copy requirements and install Python dependencies
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # Copy application code
15
+ COPY . .
16
+
17
+ # Create non-root user for security
18
+ RUN useradd -m -u 1000 user
19
+ USER user
20
+
21
+ # Expose port
22
+ EXPOSE 7860
23
+
24
+ # Run the application
25
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,45 @@
1
  ---
2
  title: Docker Model Runner
3
- emoji: 🏢
4
- colorFrom: pink
5
- colorTo: red
6
  sdk: docker
 
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Docker Model Runner
3
+ emoji: 🐳
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: docker
7
+ app_port: 7860
8
  pinned: false
9
  ---
10
 
11
+ # Docker Model Runner
12
+
13
+ A Docker-based Hugging Face Space with named API endpoints for model inference.
14
+
15
+ ## Endpoints
16
+
17
+ | Endpoint | Method | Description |
18
+ |----------|--------|-------------|
19
+ | `/` | GET | Welcome message |
20
+ | `/health` | GET | Health check |
21
+ | `/info` | GET | Model information |
22
+ | `/predict` | POST | Run model prediction |
23
+ | `/generate` | POST | Text generation |
24
+ | `/embed` | POST | Get text embeddings |
25
+
26
+ ## Usage
27
+
28
+ ### Health Check
29
+ ```bash
30
+ curl https://YOUR-SPACE.hf.space/health
31
+ ```
32
+
33
+ ### Prediction
34
+ ```bash
35
+ curl -X POST https://YOUR-SPACE.hf.space/predict \
36
+ -H "Content-Type: application/json" \
37
+ -d '{"text": "Hello, world!"}'
38
+ ```
39
+
40
+ ### Text Generation
41
+ ```bash
42
+ curl -X POST https://YOUR-SPACE.hf.space/generate \
43
+ -H "Content-Type: application/json" \
44
+ -d '{"prompt": "Once upon a time", "max_length": 50}'
45
+ ```
main.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Docker Model Runner - FastAPI application with named endpoints
3
+ """
4
+ from fastapi import FastAPI, HTTPException
5
+ from pydantic import BaseModel
6
+ from typing import Optional, List
7
+ import torch
8
+ from transformers import pipeline, AutoTokenizer, AutoModel
9
+ import os
10
+ from datetime import datetime
11
+
12
+ app = FastAPI(
13
+ title="Docker Model Runner",
14
+ description="HuggingFace Space with named endpoints for model inference",
15
+ version="1.0.0"
16
+ )
17
+
18
+ # Model configurations
19
+ MODEL_NAME = os.getenv("MODEL_NAME", "distilbert-base-uncased")
20
+ GENERATOR_MODEL = os.getenv("GENERATOR_MODEL", "gpt2")
21
+
22
+ # Lazy-loaded pipelines
23
+ _classifier = None
24
+ _generator = None
25
+ _embedder = None
26
+
27
+
28
+ def get_classifier():
29
+ global _classifier
30
+ if _classifier is None:
31
+ _classifier = pipeline("text-classification", model=MODEL_NAME)
32
+ return _classifier
33
+
34
+
35
+ def get_generator():
36
+ global _generator
37
+ if _generator is None:
38
+ _generator = pipeline("text-generation", model=GENERATOR_MODEL)
39
+ return _generator
40
+
41
+
42
+ def get_embedder():
43
+ global _embedder
44
+ if _embedder is None:
45
+ _embedder = {
46
+ "tokenizer": AutoTokenizer.from_pretrained(MODEL_NAME),
47
+ "model": AutoModel.from_pretrained(MODEL_NAME)
48
+ }
49
+ return _embedder
50
+
51
+
52
+ # Request/Response Models
53
+ class PredictRequest(BaseModel):
54
+ text: str
55
+ top_k: Optional[int] = 1
56
+
57
+
58
+ class PredictResponse(BaseModel):
59
+ predictions: List[dict]
60
+ model: str
61
+ latency_ms: float
62
+
63
+
64
+ class GenerateRequest(BaseModel):
65
+ prompt: str
66
+ max_length: Optional[int] = 50
67
+ num_return_sequences: Optional[int] = 1
68
+ temperature: Optional[float] = 1.0
69
+
70
+
71
+ class GenerateResponse(BaseModel):
72
+ generated_text: List[str]
73
+ model: str
74
+ latency_ms: float
75
+
76
+
77
+ class EmbedRequest(BaseModel):
78
+ texts: List[str]
79
+
80
+
81
+ class EmbedResponse(BaseModel):
82
+ embeddings: List[List[float]]
83
+ model: str
84
+ dimensions: int
85
+ latency_ms: float
86
+
87
+
88
+ class HealthResponse(BaseModel):
89
+ status: str
90
+ timestamp: str
91
+ gpu_available: bool
92
+
93
+
94
+ class InfoResponse(BaseModel):
95
+ name: str
96
+ version: str
97
+ models: dict
98
+ endpoints: List[str]
99
+
100
+
101
+ # Named Endpoints
102
+ @app.get("/")
103
+ async def root():
104
+ """Welcome endpoint"""
105
+ return {
106
+ "message": "Docker Model Runner API",
107
+ "docs": "/docs",
108
+ "endpoints": ["/health", "/info", "/predict", "/generate", "/embed"]
109
+ }
110
+
111
+
112
+ @app.get("/health", response_model=HealthResponse)
113
+ async def health():
114
+ """Health check endpoint"""
115
+ return HealthResponse(
116
+ status="healthy",
117
+ timestamp=datetime.utcnow().isoformat(),
118
+ gpu_available=torch.cuda.is_available()
119
+ )
120
+
121
+
122
+ @app.get("/info", response_model=InfoResponse)
123
+ async def info():
124
+ """Model and API information"""
125
+ return InfoResponse(
126
+ name="Docker Model Runner",
127
+ version="1.0.0",
128
+ models={
129
+ "classifier": MODEL_NAME,
130
+ "generator": GENERATOR_MODEL,
131
+ "embedder": MODEL_NAME
132
+ },
133
+ endpoints=["/", "/health", "/info", "/predict", "/generate", "/embed"]
134
+ )
135
+
136
+
137
+ @app.post("/predict", response_model=PredictResponse)
138
+ async def predict(request: PredictRequest):
139
+ """
140
+ Run text classification prediction
141
+
142
+ - **text**: Input text to classify
143
+ - **top_k**: Number of top predictions to return
144
+ """
145
+ try:
146
+ start_time = datetime.now()
147
+ classifier = get_classifier()
148
+ results = classifier(request.text, top_k=request.top_k)
149
+ latency = (datetime.now() - start_time).total_seconds() * 1000
150
+
151
+ return PredictResponse(
152
+ predictions=results,
153
+ model=MODEL_NAME,
154
+ latency_ms=round(latency, 2)
155
+ )
156
+ except Exception as e:
157
+ raise HTTPException(status_code=500, detail=str(e))
158
+
159
+
160
+ @app.post("/generate", response_model=GenerateResponse)
161
+ async def generate(request: GenerateRequest):
162
+ """
163
+ Generate text from a prompt
164
+
165
+ - **prompt**: Input prompt for generation
166
+ - **max_length**: Maximum length of generated text
167
+ - **num_return_sequences**: Number of sequences to generate
168
+ - **temperature**: Sampling temperature
169
+ """
170
+ try:
171
+ start_time = datetime.now()
172
+ generator = get_generator()
173
+ results = generator(
174
+ request.prompt,
175
+ max_length=request.max_length,
176
+ num_return_sequences=request.num_return_sequences,
177
+ temperature=request.temperature,
178
+ do_sample=True
179
+ )
180
+ latency = (datetime.now() - start_time).total_seconds() * 1000
181
+
182
+ generated_texts = [r["generated_text"] for r in results]
183
+
184
+ return GenerateResponse(
185
+ generated_text=generated_texts,
186
+ model=GENERATOR_MODEL,
187
+ latency_ms=round(latency, 2)
188
+ )
189
+ except Exception as e:
190
+ raise HTTPException(status_code=500, detail=str(e))
191
+
192
+
193
+ @app.post("/embed", response_model=EmbedResponse)
194
+ async def embed(request: EmbedRequest):
195
+ """
196
+ Get text embeddings
197
+
198
+ - **texts**: List of texts to embed
199
+ """
200
+ try:
201
+ start_time = datetime.now()
202
+ embedder = get_embedder()
203
+
204
+ # Tokenize and get embeddings
205
+ inputs = embedder["tokenizer"](
206
+ request.texts,
207
+ padding=True,
208
+ truncation=True,
209
+ return_tensors="pt"
210
+ )
211
+
212
+ with torch.no_grad():
213
+ outputs = embedder["model"](**inputs)
214
+ # Use mean pooling
215
+ embeddings = outputs.last_hidden_state.mean(dim=1)
216
+
217
+ latency = (datetime.now() - start_time).total_seconds() * 1000
218
+
219
+ return EmbedResponse(
220
+ embeddings=embeddings.tolist(),
221
+ model=MODEL_NAME,
222
+ dimensions=embeddings.shape[1],
223
+ latency_ms=round(latency, 2)
224
+ )
225
+ except Exception as e:
226
+ raise HTTPException(status_code=500, detail=str(e))
227
+
228
+
229
+ if __name__ == "__main__":
230
+ import uvicorn
231
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi==0.115.0
2
+ uvicorn[standard]==0.30.6
3
+ transformers==4.45.0
4
+ torch==2.4.1
5
+ pydantic==2.9.2
6
+ huggingface-hub==0.25.1