Initial commit

2026-02-04 02:23:11 +00:00 · 2023-10-06 19:45:45 -03:00
commit 7210933a15
5 changed files with 83 additions and 0 deletions
--- a/.editorconfig
+++ b/.editorconfig
@@ -0,0 +1,12 @@
+root = true
+
+[*]
+end_of_line = lf
+indent_style = space
+indent_size = 4
+charset = utf-8
+trim_trailing_whitespace = true
+insert_final_newline = true
+
+[*.md]
+trim_trailing_whitespace = false
--- a/README.md
+++ b/README.md
@@ -0,0 +1 @@
+# ImageCaptionAPI
--- a/main.py
+++ b/main.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+
+from flask import Flask, request, jsonify
+import requests
+import torch
+from PIL import Image
+from transformers import *
+from tqdm import tqdm
+import urllib.parse as parse
+import os
+
+app = Flask(__name__)
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(device)
+
+# Carregar o modelo, tokenizer e processador de imagem
+finetuned_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning").to(device)
+finetuned_tokenizer = GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+finetuned_image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+
+# Função para carregar uma imagem
+def load_image(image_path):
+    return Image.open(requests.get(image_path, stream=True).raw)
+
+# Função para obter a legenda de uma imagem
+def get_caption(model, image_processor, tokenizer, image_path):
+    image = load_image(image_path)
+    img = image_processor(image, return_tensors="pt").to(device)
+    output = model.generate(**img)
+    caption = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
+    return caption
+
+# Rota da api para obter a caption da imagem
+@app.route('/caption', methods=['POST'])
+def caption_image():
+    data = request.get_json()
+    if 'image_url' in data:
+        image_url = data['image_url']
+        caption = get_caption(finetuned_model, finetuned_image_processor, finetuned_tokenizer, image_url)
+        response = {"caption": caption}
+        return jsonify(response)
+    else:
+        return jsonify({"error": "Missing 'image_url'"}), 400
+
+if __name__ == '__main__':
+    app.run(debug=True)
--- a/setup.sh
+++ b/setup.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env sh
+
+pip3 install transformers rouge_score evaluate datasets flask
+pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
--- a/test.rest
+++ b/test.rest
@@ -0,0 +1,19 @@
+###
+
+POST http://localhost:5000/caption
+Content-Type: application/json
+
+{
+    "image_url": "http://images.cocodataset.org/test-stuff2017/000000009384.jpg"
+}
+
+###
+
+POST http://localhost:5000/caption
+Content-Type: application/json
+
+{
+    "image_url": "https://static.todamateria.com.br/upload/ur/so/ursopolarreproducao-cke.jpg?auto_optimize=low"
+}
+
+###