Initial commit

2025-08-04 06:24:54 +00:00 · 2023-10-06 19:45:45 -03:00
commit 7210933a15
5 changed files with 83 additions and 0 deletions
--- a/.editorconfig
+++ b/.editorconfig
@@ -0,0 +1,12 @@
 root = true
 [*]
 end_of_line = lf
 indent_style = space
 indent_size = 4
 charset = utf-8
 trim_trailing_whitespace = true
 insert_final_newline = true
 [*.md]
 trim_trailing_whitespace = false
--- a/README.md
+++ b/README.md
@@ -0,0 +1 @@
 # ImageCaptionAPI
--- a/main.py
+++ b/main.py
@@ -0,0 +1,47 @@
 #!/usr/bin/env python3
 from flask import Flask, request, jsonify
 import requests
 import torch
 from PIL import Image
 from transformers import *
 from tqdm import tqdm
 import urllib.parse as parse
 import os
 app = Flask(__name__)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(device)
 # Carregar o modelo, tokenizer e processador de imagem
 finetuned_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning").to(device)
 finetuned_tokenizer = GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
 finetuned_image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
 # Função para carregar uma imagem
 def load_image(image_path):
    return Image.open(requests.get(image_path, stream=True).raw)
 # Função para obter a legenda de uma imagem
 def get_caption(model, image_processor, tokenizer, image_path):
    image = load_image(image_path)
    img = image_processor(image, return_tensors="pt").to(device)
    output = model.generate(**img)
    caption = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
    return caption
 # Rota da api para obter a caption da imagem
@app.route('/caption', methods=['POST'])
 def caption_image():
    data = request.get_json()
    if 'image_url' in data:
        image_url = data['image_url']
        caption = get_caption(finetuned_model, finetuned_image_processor, finetuned_tokenizer, image_url)
        response = {"caption": caption}
        return jsonify(response)
    else:
        return jsonify({"error": "Missing 'image_url'"}), 400
 if __name__ == '__main__':
    app.run(debug=True)
--- a/setup.sh
+++ b/setup.sh
@@ -0,0 +1,4 @@
 #!/usr/bin/env sh
 pip3 install transformers rouge_score evaluate datasets flask
 pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
--- a/test.rest
+++ b/test.rest
@@ -0,0 +1,19 @@
 ###
 POST http://localhost:5000/caption
 Content-Type: application/json
 {
    "image_url": "http://images.cocodataset.org/test-stuff2017/000000009384.jpg"
 }
 ###
 POST http://localhost:5000/caption
 Content-Type: application/json
 {
    "image_url": "https://static.todamateria.com.br/upload/ur/so/ursopolarreproducao-cke.jpg?auto_optimize=low"
 }
 ###