documents = [
  "Machine learning is fascinating.",
  "Deep learning is a subset of machine learning.",
  "Artificial intelligence is a broader concept than machine learning.",
  "Natural language processing is a field of artificial intelligence."
]


from sentence_transformers import SentenceTransformer

# Load a pre-trained model (https://www.sbert.net/)
#model = SentenceTransformer('all-MiniLM-L6-v2')
model = SentenceTransformer('BAAI/bge-large-zh-v1.5')
# Create embeddings for the documents
#document_embeddings = model.encode("documents")
#document_embeddings = model.encode(documents)

#print(document_embeddings)

import mysql.connector # Or import pymysql
import os
import faiss
import numpy as np

file_path = "my_faiss_index.faiss"




mydb = mysql.connector.connect(
  host="122.128.109.51",
  user="root",
  password="efoxpasswd",
  database="myBible"
)

mycursor = mydb.cursor()
mycursor.execute("Select ID, Content FROM dunv WHERE book > 39 AND Caption IS NOT NULL ORDER BY book,chapter,verse")
myresult = mycursor.fetchall()

if os.path.exists(file_path):
  print("Index file Found!")
  index = faiss.read_index("my_faiss_index.faiss")

else:

  document_embeddings = model.encode(myresult)



  # Convert embeddings to a NumPy array
  document_embeddings_np = np.array(document_embeddings)
  # Create a FAISS index using L2 distance (a measure of similarity)
  index = faiss.IndexFlatL2(document_embeddings_np.shape[1])
  index.add(document_embeddings_np)

  faiss.write_index(index, "my_faiss_index.faiss")


print("INDEX COMPLETED!")
# Define a search query
query = "為何不可醉酒"
#query = "耶穌與食物"
#query = "耶穌與教育"

# Create an embedding for the query
query_embedding = model.encode([query])
# Perform the search, retrieving the top 3 closest matches
k = 8
distances, indices = index.search(query_embedding, k)
# Display the results
print("Query:", query)
print("\nTop", k, "most similar documents:")
for i in range(k):
  #print(f"{i+1}. {documents[indices[0][i]]} (distance: {distances[0][i]:.4f})")
  print(myresult[indices[0][i]])
  print(f"{i+1}.  (distance: {distances[0][i]:.4f})")