|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# github.com/deadbits/vector-embedding-api |
| 3 | +import os |
| 4 | +import sys |
| 5 | +import logging |
| 6 | +import configparser |
| 7 | + |
| 8 | +import openai |
| 9 | + |
| 10 | +from flask import Flask, request, jsonify, abort |
| 11 | +from sentence_transformers import SentenceTransformer |
| 12 | + |
| 13 | + |
| 14 | +app = Flask(__name__) |
| 15 | + |
| 16 | + |
| 17 | +class Config: |
| 18 | + def __init__(self, config_file): |
| 19 | + self.config_file = config_file |
| 20 | + self.config = configparser.ConfigParser() |
| 21 | + if not os.path.exists(self.config_file): |
| 22 | + logging.error(f'Config file not found: {self.config_file}') |
| 23 | + sys.exit(1) |
| 24 | + |
| 25 | + logging.info(f'Loading config file: {self.config_file}') |
| 26 | + self.config.read(config_file) |
| 27 | + |
| 28 | + def get(self, section, key): |
| 29 | + answer = None |
| 30 | + |
| 31 | + try: |
| 32 | + answer = self.config.get(section, key) |
| 33 | + except: |
| 34 | + logging.error(f'Config file missing section: {section}') |
| 35 | + |
| 36 | + return answer |
| 37 | + |
| 38 | + |
| 39 | +def get_openai_embeddings(text: str): |
| 40 | + try: |
| 41 | + response = openai.Embedding.create(input=text, model='text-embedding-ada-002') |
| 42 | + return response['data'][0]['embedding'] |
| 43 | + except Exception as err: |
| 44 | + logger.error(f'Failed to get OpenAI embeddings: {err}') |
| 45 | + abort(500, 'Failed to get OpenAI embeddings') |
| 46 | + |
| 47 | + |
| 48 | +def get_transformers_embeddings(text: str): |
| 49 | + try: |
| 50 | + return model.encode(text).tolist() |
| 51 | + except Exception as err: |
| 52 | + logger.error(f'Failed to get sentence-transformers embeddings: {err}') |
| 53 | + abort(500, 'Failed to get sentence-transformers embeddings') |
| 54 | + |
| 55 | + |
| 56 | +@app.route('/submit', methods=['POST']) |
| 57 | +def submit_text(): |
| 58 | + data = request.json |
| 59 | + ada = data.get('ada', False) |
| 60 | + |
| 61 | + if not 'text' in data: |
| 62 | + abort(400, 'Text data is required') |
| 63 | + |
| 64 | + if ada: |
| 65 | + embedding_data = get_openai_embeddings(data['text']) |
| 66 | + else: |
| 67 | + embedding_data = get_transformers_embeddings(data['text']) |
| 68 | + |
| 69 | + return jsonify({'embedding': embedding_data, 'status': 'success'}) |
| 70 | + |
| 71 | + |
| 72 | +if __name__ == '__main__': |
| 73 | + conf = Config('server.conf') |
| 74 | + openai.api_key = conf.get('main', 'openai_api_key') |
| 75 | + sent_model = conf.get('main', 'sent_transformers_model') |
| 76 | + |
| 77 | + logging.basicConfig(level=logging.INFO) |
| 78 | + logger = logging.getLogger(__name__) |
| 79 | + |
| 80 | + try: |
| 81 | + model = SentenceTransformer(sent_model) |
| 82 | + except Exception as err: |
| 83 | + logger.error(f'Failed to load SentenceTransformer model "{sent_model}": {err}') |
| 84 | + sys.exit(1) |
| 85 | + |
| 86 | + app.run(debug=True) |
| 87 | + |
0 commit comments