Module2Space2 / app.py
ZENLLC's picture
Create app.py
0ae8b34 verified
raw
history blame
18 kB
import base64
import io
import os
from typing import List, Tuple, Optional
import gradio as gr
from PIL import Image
# -----------------------
# Optional imports guarded at call-time
# -----------------------
def _get_openai_client(api_key: str):
from openai import OpenAI # imported here to avoid issues if lib missing
return OpenAI(api_key=api_key)
def _configure_google(api_key: str):
import google.generativeai as genai
genai.configure(api_key=api_key)
return genai
# -----------------------
# Helper: apply visual presets
# -----------------------
def apply_preset_to_prompt(
base_prompt: str,
preset: str,
style: str,
content_type: str,
) -> str:
"""Augment the prompt with preset & style language."""
base_prompt = base_prompt.strip()
preset_addons = {
"None": "",
"ZEN Glass Dashboard": (
" ultra-detailed UI, glassmorphism, prismatic alloy panels, "
"neon cyan and magenta HUD overlays, high-end enterprise dashboard"
),
"Palantir / Anduril Infographic": (
" dark enterprise command-center aesthetic, clean vector infographics, "
"military-grade analytics overlays, sharp typography, high contrast, "
"minimal but dense information layout"
),
"Youth AI Literacy Poster": (
" vibrant educational poster for teens, clean icons, diverse students, "
"friendly but serious tone, clear typography, classroom-ready layout"
),
"ZEN AI Arena Card": (
" holographic trading card style, quantum glass edges, subtle glow, "
"sharp logo lockup, futuristic typography, dramatic lighting"
),
"Blueprint / Systems Diagram": (
" technical blueprint, white lines on deep navy background, callout labels, "
"flow arrows, system nodes, engineering drawing style"
),
}
style_addons = {
"Default": "",
"Photoreal": " hyper-realistic photography, physically based lighting",
"Illustration": " clean vector illustration style, flat colors, crisp lines",
"Futuristic UI": " futuristic interface design, HUD, holographic widgets",
"Blueprint": " blueprint drawing, schematic lines, engineering grid",
"Cinematic": " cinematic lighting, dramatic composition, filmic contrast",
}
ct_addon = ""
if content_type == "Image":
ct_addon = " high-resolution concept art,"
elif content_type == "Infographic Spec":
ct_addon = (
" detailed infographic design specification, including layout regions, "
"sections, labels, and visual hierarchy,"
)
extra = " ".join(
x
for x in [
ct_addon,
preset_addons.get(preset, ""),
style_addons.get(style, ""),
]
if x
)
if extra:
if base_prompt:
return f"{base_prompt}, {extra}"
else:
return extra.strip()
return base_prompt or "high quality image"
# -----------------------
# OpenAI: Text + Image
# -----------------------
def generate_text_openai(
api_key: str,
prompt: str,
mode: str,
) -> str:
client = _get_openai_client(api_key)
system_msg = (
"You are an expert creator for the ZEN AI ecosystem. "
"Write clear, concise, high-leverage content. "
"If mode is 'Infographic Spec', output a structured outline with sections, "
"titles, short captions, and suggested visual elements."
)
if mode == "Infographic Spec":
user_prompt = (
f"Create a Palantir/Anduril-level infographic specification based on:\n\n{prompt}\n\n"
"Return:\n"
"1) Title options\n"
"2) 3–5 main sections\n"
"3) Bullet points for each section\n"
"4) Suggested charts/visuals\n"
"5) Color and typography recommendations."
)
else:
user_prompt = prompt
# Using Chat Completions interface
resp = client.chat.completions.create(
model="gpt-4.1-mini",
messages=[
{"role": "system", "content": system_msg},
{"role": "user", "content": user_prompt},
],
temperature=0.7,
)
return resp.choices[0].message.content
def decode_b64_images(b64_list: List[str]) -> List[Image.Image]:
images: List[Image.Image] = []
for b64 in b64_list:
raw = base64.b64decode(b64)
img = Image.open(io.BytesIO(raw)).convert("RGB")
images.append(img)
return images
def generate_image_openai(
api_key: str,
model: str,
prompt: str,
size: str,
quality: str,
n_images: int,
seed: Optional[int],
) -> List[Image.Image]:
client = _get_openai_client(api_key)
# Map size choices to OpenAI-supported ones
size_map = {
"Square (1024x1024)": "1024x1024",
"Portrait (1024x1792)": "1024x1792",
"Landscape (1792x1024)": "1792x1024",
}
size_param = size_map.get(size, "1024x1024")
kwargs = {
"model": model,
"prompt": prompt,
"size": size_param,
"quality": quality,
"n": n_images,
}
# seed is optional on some models; safe to include conditionally
if seed is not None:
kwargs["seed"] = seed
resp = client.images.generate(**kwargs)
b64_list = [d.b64_json for d in resp.data]
return decode_b64_images(b64_list)
# -----------------------
# Google (Gemini / Nano-Banana)
# -----------------------
def generate_text_google(
api_key: str,
prompt: str,
mode: str,
) -> str:
genai = _configure_google(api_key)
# Default to a strong text model
model = genai.GenerativeModel("gemini-1.5-pro")
if mode == "Infographic Spec":
content = (
"You are an expert enterprise communicator. "
"Create a Palantir/Anduril-grade infographic spec.\n\n"
f"Topic / prompt:\n{prompt}\n\n"
"Return:\n"
"1) Title options\n"
"2) Main sections with bullet points\n"
"3) Visual layout ideas\n"
"4) Chart/visualization suggestions\n"
"5) Palette & typography notes."
)
else:
content = prompt
resp = model.generate_content(content)
return resp.text
def generate_image_google(
api_key: str,
google_image_model: str,
prompt: str,
n_images: int,
seed: Optional[int],
) -> List[Image.Image]:
"""
NOTE: Model & output handling may need adjustment depending on
the exact Nano-Banana / Nano-Banana-Pro API you use in Google AI Studio.
This assumes a GenerativeModel that returns inline image data.
"""
genai = _configure_google(api_key)
model = genai.GenerativeModel(google_image_model)
images: List[Image.Image] = []
for i in range(n_images):
# Some image models support generation_config with a seed;
# here we pass it if present.
generation_config = {}
if seed is not None:
generation_config["seed"] = seed + i
resp = model.generate_content(
prompt,
generation_config=generation_config or None,
)
# Try to pull image bytes from response parts
for cand in resp.candidates:
for part in cand.content.parts:
if hasattr(part, "inline_data") and getattr(part.inline_data, "data", None):
raw = base64.b64decode(part.inline_data.data)
img = Image.open(io.BytesIO(raw)).convert("RGB")
images.append(img)
return images
# -----------------------
# Core Gradio callback
# -----------------------
def run_generation(
openai_key: str,
google_key: str,
task_type: str,
provider: str,
base_prompt: str,
negative_prompt: str,
preset: str,
style: str,
size: str,
quality: str,
n_images: int,
seed: int,
use_seed: bool,
google_image_model: str,
google_text_model_hint: str,
) -> Tuple[str, List[Image.Image], str]:
"""
Returns: (text_output, images, debug_info)
"""
text_output = ""
images: List[Image.Image] = []
debug_lines = []
if not base_prompt.strip():
return "Please enter a prompt.", [], "No prompt provided."
# Build full prompt for images
content_type = "Image" if task_type == "Image" else task_type
full_prompt = apply_preset_to_prompt(
base_prompt=base_prompt,
preset=preset,
style=style,
content_type=content_type,
)
if negative_prompt.strip():
full_prompt += f". Avoid: {negative_prompt.strip()}"
debug_lines.append(f"Task: {task_type}")
debug_lines.append(f"Provider: {provider}")
debug_lines.append(f"Preset: {preset}, Style: {style}")
debug_lines.append(f"OpenAI model size: {size}, quality: {quality}")
debug_lines.append(f"Google image model: {google_image_model}")
debug_lines.append(f"Google text model hint: {google_text_model_hint}")
debug_lines.append(f"Seed enabled: {use_seed}, seed: {seed if use_seed else 'None'}")
seed_val: Optional[int] = seed if use_seed else None
try:
# TEXT or INFOGRAPHIC
if task_type in ["Text", "Infographic Spec"]:
if provider == "OpenAI":
if not openai_key.strip():
return "Missing OpenAI API key.", [], "OpenAI key not provided."
text_output = generate_text_openai(
api_key=openai_key.strip(),
prompt=full_prompt,
mode=task_type,
)
else:
if not google_key.strip():
return "Missing Google API key.", [], "Google key not provided."
text_output = generate_text_google(
api_key=google_key.strip(),
prompt=full_prompt,
mode=task_type,
)
# IMAGE
if task_type == "Image":
if provider == "OpenAI":
if not openai_key.strip():
return "Missing OpenAI API key.", [], "OpenAI key not provided."
# Decide OpenAI image model based on preset selection or UI (we can infer)
# We'll expose choice via size/style; model stays fixed to user-facing dropdown externally
# but here we assume they want GPT-Image-1 by default
image_model = "gpt-image-1"
# For Palantir/Anduril preset, sometimes DALL·E 3 is good – user can switch later by editing code.
if "Palantir" in preset:
image_model = "dall-e-3"
images = generate_image_openai(
api_key=openai_key.strip(),
model=image_model,
prompt=full_prompt,
size=size,
quality=quality,
n_images=n_images,
seed=seed_val,
)
debug_lines.append(f"OpenAI image model: {image_model}")
else:
if not google_key.strip():
return "Missing Google API key.", [], "Google key not provided."
images = generate_image_google(
api_key=google_key.strip(),
google_image_model=google_image_model.strip(),
prompt=full_prompt,
n_images=n_images,
seed=seed_val,
)
if not text_output and task_type == "Image":
text_output = (
"Image(s) generated successfully. Add 'Text' or 'Infographic Spec' mode "
"to generate descriptive copy or specs."
)
if not images and task_type == "Image":
debug_lines.append("No images returned from provider.")
return text_output, images, "\n".join(debug_lines)
except Exception as e:
return f"Error: {e}", [], "\n".join(debug_lines + [f"Exception: {e}"])
# -----------------------
# UI
# -----------------------
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
# 🧬 ZEN Omni Studio — Text • Images • Infographics
A unified creator for the ZEN ecosystem.
- 🔑 **Bring your own keys** for OpenAI + Google (Gemini / Nano-Banana).
- 🧠 Generate **text** and **infographic specs** for ZEN, AI literacy, dashboards, and more.
- 🎨 Generate **high-quality images** with advanced controls and presets.
"""
)
with gr.Row():
with gr.Column():
gr.Markdown("### 🔐 API Keys (kept only in your browser session)")
openai_key = gr.Textbox(
label="OPENAI_API_KEY",
type="password",
placeholder="sk-...",
)
google_key = gr.Textbox(
label="GOOGLE_API_KEY (Gemini / Nano-Banana)",
type="password",
placeholder="AIza...",
)
gr.Markdown("### 🎯 Task & Provider")
task_type = gr.Radio(
["Image", "Text", "Infographic Spec"],
value="Image",
label="Task Type",
)
provider = gr.Radio(
["Google (Nano-Banana / Gemini)", "OpenAI"],
value="Google (Nano-Banana / Gemini)",
label="Primary Provider",
)
# Prompt region
base_prompt = gr.Textbox(
label="Main Prompt",
lines=5,
placeholder="Describe what you want to create for ZEN (image, copy, infographic, etc.)",
)
negative_prompt = gr.Textbox(
label="Negative Prompt (optional)",
lines=2,
placeholder="Things to avoid: low-res, cluttered, distorted text, etc.",
)
with gr.Row():
preset = gr.Dropdown(
[
"None",
"ZEN Glass Dashboard",
"Palantir / Anduril Infographic",
"Youth AI Literacy Poster",
"ZEN AI Arena Card",
"Blueprint / Systems Diagram",
],
value="ZEN Glass Dashboard",
label="Visual Preset",
)
style = gr.Dropdown(
[
"Default",
"Photoreal",
"Illustration",
"Futuristic UI",
"Blueprint",
"Cinematic",
],
value="Futuristic UI",
label="Style Accent",
)
# OpenAI image options
gr.Markdown("### 🎛 OpenAI Image Controls (DALL·E / GPT-Image)")
with gr.Row():
size = gr.Dropdown(
[
"Square (1024x1024)",
"Portrait (1024x1792)",
"Landscape (1792x1024)",
],
value="Square (1024x1024)",
label="Aspect Ratio / Size",
)
quality = gr.Dropdown(
["standard", "hd"],
value="hd",
label="Quality",
)
n_images = gr.Slider(
minimum=1,
maximum=4,
value=1,
step=1,
label="Number of Images",
)
with gr.Row():
use_seed = gr.Checkbox(
value=False,
label="Lock Seed (repeatable outputs)",
)
seed = gr.Slider(
minimum=1,
maximum=2**31 - 1,
value=12345,
step=1,
label="Seed",
)
gr.Markdown("### 🧪 Google Image / Text Model Hints")
google_image_model = gr.Textbox(
label="Google Image Model (default: Nano-Banana-Pro)",
value="nano-banana-pro",
placeholder="e.g. nano-banana-pro (adjust to your actual model id in Google AI Studio)",
)
google_text_model_hint = gr.Textbox(
label="Google Text Model Hint (for future tweaking)",
value="gemini-1.5-pro",
placeholder="Used in code as default Gemini text model.",
)
generate_btn = gr.Button("🚀 Generate", variant="primary")
with gr.Column():
gr.Markdown("### 📜 Text / Spec Output")
text_output = gr.Markdown()
gr.Markdown("### 🖼 Image Output")
image_gallery = gr.Gallery(
label="Generated Images",
show_label=False,
columns=2,
height=500,
)
gr.Markdown("### 🧾 Debug / Logs (for you, not end users)")
debug_output = gr.Textbox(
label="Debug Info",
lines=10,
)
# Wire up callback
generate_btn.click(
fn=run_generation,
inputs=[
openai_key,
google_key,
task_type,
provider,
base_prompt,
negative_prompt,
preset,
style,
size,
quality,
n_images,
seed,
use_seed,
google_image_model,
google_text_model_hint,
],
outputs=[text_output, image_gallery, debug_output],
)
if __name__ == "__main__":
demo.launch()