Spaces:
Runtime error
Runtime error
| # Necessary imports | |
| import sys | |
| import gradio as gr | |
| import spaces | |
| from decord import VideoReader, cpu | |
| from PIL import Image | |
| # Local imports | |
| from src.config import ( | |
| device, | |
| model_name, | |
| sampling, | |
| stream, | |
| repetition_penalty, | |
| ) | |
| from src.app.model import load_model_tokenizer_and_processor | |
| from src.logger import logging | |
| from src.exception import CustomExceptionHandling | |
| # Model, tokenizer and processor | |
| # try : | |
| model, tokenizer, processor = load_model_tokenizer_and_processor(model_name, device) | |
| # except Exception as e: | |
| # Custom exception handling | |
| # raise CustomExceptionHandling(e, sys) from e | |
| MAX_NUM_FRAMES=64 | |
| def encode_video(video_path): | |
| def uniform_sample(l, n): | |
| gap = len(l) / n | |
| idxs = [int(i * gap + gap / 2) for i in range(n)] | |
| return [l[i] for i in idxs] | |
| vr = VideoReader(video_path, ctx=cpu(0)) | |
| sample_fps = round(vr.get_avg_fps() / 1) # FPS | |
| frame_idx = [i for i in range(0, len(vr), sample_fps)] | |
| if len(frame_idx) > MAX_NUM_FRAMES: | |
| frame_idx = uniform_sample(frame_idx, MAX_NUM_FRAMES) | |
| frames = vr.get_batch(frame_idx).asnumpy() | |
| frames = [Image.fromarray(v.astype('uint8')) for v in frames] | |
| print('num frames:', len(frames)) | |
| return frames | |
| def describe_image( | |
| image: str, | |
| question: str, | |
| temperature: float, | |
| top_p: float, | |
| top_k: int, | |
| max_new_tokens: int, | |
| ) -> str: | |
| """ | |
| Generates an answer to a given question based on the provided image and question. | |
| Args: | |
| - image (str): The path to the image file. | |
| - question (str): The question text. | |
| - temperature (float): The temperature parameter for the model. | |
| - top_p (float): The top_p parameter for the model. | |
| - top_k (int): The top_k parameter for the model. | |
| - max_new_tokens (int): The max tokens to be generated by the model. | |
| Returns: | |
| str: The generated answer to the question. | |
| """ | |
| try: | |
| # Check if image or question is None | |
| if not image or not question: | |
| gr.Warning("Please provide an image and a question.") | |
| frames = encode_video(image) | |
| msgs = [ | |
| {'role': 'user', 'content': frames + [question]}, | |
| ] | |
| # Message format for the model | |
| # msgs = [{"role": "user", "content": [image, question]}] | |
| # Generate the answer | |
| # answer = model.chat( | |
| # image=None, | |
| # msgs=msgs, | |
| # tokenizer=tokenizer, | |
| # processor=processor, | |
| # sampling=sampling, | |
| # stream=stream, | |
| # top_p=top_p, | |
| # top_k=top_k, | |
| # temperature=temperature, | |
| # repetition_penalty=repetition_penalty, | |
| # max_new_tokens=max_new_tokens, | |
| # ) | |
| answer = model.chat( | |
| msgs=msgs, | |
| tokenizer=tokenizer, | |
| processor=processor, | |
| sampling=sampling, | |
| stream=stream, | |
| top_p=top_p, | |
| top_k=top_k, | |
| temperature=temperature, | |
| repetition_penalty=repetition_penalty, | |
| max_new_tokens=max_new_tokens | |
| ) | |
| # Log the successful generation of the answer | |
| logging.info("Answer generated successfully.") | |
| logging.info("".join(answer)) | |
| # Return the answer | |
| return "".join(answer) | |
| # Handle exceptions that may occur during answer generation | |
| except Exception as e: | |
| # Custom exception handling | |
| raise CustomExceptionHandling(e, sys) from e | |