Spaces:

DanielIglesias97
/

TextEmbeddings

Sleeping

TextEmbeddings / utils_model.py

We have modified the implementation to return the dataframe with

9019d6b 5 months ago

1.68 kB

	import numpy as np
	import pandas as pd
	from sentence_transformers import SentenceTransformer

	class ModelFactory():

	def __init__(self):
	pass

	def create_model(self, model_type):
	model = None

	if (model_type=='all-MiniLM-L6-v2'):
	model = MiniLM_L6_v2_Model()

	if (model_type=='sentence_similarity_spanish'):
	model = SentenceSimilaritySpanishModel()

	return model

	class BaseModel():

	def __init__(self):
	pass

	def retrieve_embeddings_from_single_input_text(self, input_text):
	embeddings = self.model.encode(input_text, batch_size=32)
	embeddings *= 255
	embeddings = embeddings.astype(np.uint8).astype(str).tolist()

	return embeddings

	def retrieve_embeddings_from_texts_list(self, input_texts_list):
	all_embeddings_list = []
	for current_input_text_aux in input_texts_list:
	embeddings = self.retrieve_embeddings_from_single_input_text(current_input_text_aux)
	nof_features = len(embeddings[0])
	all_embeddings_list += [current_input_text_aux.tolist() + embeddings[0]]

	queries_embeddings_df = pd.DataFrame(all_embeddings_list)
	columns_list = ['text'] + [f'feature_{idx}' for idx in range(0, nof_features)]
	queries_embeddings_df.columns = columns_list

	return queries_embeddings_df

	class MiniLM_L6_v2_Model(BaseModel):

	def __init__(self):
	self.model = SentenceTransformer('all-MiniLM-L6-v2')

	class SentenceSimilaritySpanishModel(BaseModel):

	def __init__(self):
	self.model = SentenceTransformer('hiiamsid/sentence_similarity_spanish_es')