mirror of
https://github.com/guilhermewerner/image-caption-api
synced 2025-06-14 22:15:02 +00:00
Initial commit
This commit is contained in:
12
.editorconfig
Normal file
12
.editorconfig
Normal file
@ -0,0 +1,12 @@
|
||||
root = true
|
||||
|
||||
[*]
|
||||
end_of_line = lf
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
charset = utf-8
|
||||
trim_trailing_whitespace = true
|
||||
insert_final_newline = true
|
||||
|
||||
[*.md]
|
||||
trim_trailing_whitespace = false
|
47
main.py
Normal file
47
main.py
Normal file
@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from flask import Flask, request, jsonify
|
||||
import requests
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import *
|
||||
from tqdm import tqdm
|
||||
import urllib.parse as parse
|
||||
import os
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
print(device)
|
||||
|
||||
# Carregar o modelo, tokenizer e processador de imagem
|
||||
finetuned_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning").to(device)
|
||||
finetuned_tokenizer = GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
||||
finetuned_image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
||||
|
||||
# Função para carregar uma imagem
|
||||
def load_image(image_path):
|
||||
return Image.open(requests.get(image_path, stream=True).raw)
|
||||
|
||||
# Função para obter a legenda de uma imagem
|
||||
def get_caption(model, image_processor, tokenizer, image_path):
|
||||
image = load_image(image_path)
|
||||
img = image_processor(image, return_tensors="pt").to(device)
|
||||
output = model.generate(**img)
|
||||
caption = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
|
||||
return caption
|
||||
|
||||
# Rota da api para obter a caption da imagem
|
||||
@app.route('/caption', methods=['POST'])
|
||||
def caption_image():
|
||||
data = request.get_json()
|
||||
if 'image_url' in data:
|
||||
image_url = data['image_url']
|
||||
caption = get_caption(finetuned_model, finetuned_image_processor, finetuned_tokenizer, image_url)
|
||||
response = {"caption": caption}
|
||||
return jsonify(response)
|
||||
else:
|
||||
return jsonify({"error": "Missing 'image_url'"}), 400
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(debug=True)
|
4
setup.sh
Normal file
4
setup.sh
Normal file
@ -0,0 +1,4 @@
|
||||
#!/usr/bin/env sh
|
||||
|
||||
pip3 install transformers rouge_score evaluate datasets flask
|
||||
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
19
test.rest
Normal file
19
test.rest
Normal file
@ -0,0 +1,19 @@
|
||||
###
|
||||
|
||||
POST http://localhost:5000/caption
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"image_url": "http://images.cocodataset.org/test-stuff2017/000000009384.jpg"
|
||||
}
|
||||
|
||||
###
|
||||
|
||||
POST http://localhost:5000/caption
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"image_url": "https://static.todamateria.com.br/upload/ur/so/ursopolarreproducao-cke.jpg?auto_optimize=low"
|
||||
}
|
||||
|
||||
###
|
Reference in New Issue
Block a user