Hello everyone,
I’m currently working on fine-tuning a machine-learning model to enhance search capabilities in OpenSearch, but I’ve hit a snag. After successfully registering the model, I encountered errors during deployment as solving one error resulted in another. I would greatly appreciate any guidance or resources available online from where I can learn how to fine-tune the model and deploy it on OpenSearch
Here is my example Code:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
# Example dataset
train_examples = [
("Wooden dining table", "Oak coffee table", 0.5),
("Leather office chair", "Ergonomic mesh chair", 0.4),
# Add more examples as needed
]
from sentence_transformers import InputExample
train_dataset = [InputExample(texts=[text1, text2], label=label) for text1, text2, label in train_examples]
from sentence_transformers import losses, evaluation, models, SentenceTransformer
from torch.utils.data import DataLoader
# Define a dataloader and a loss
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=16)
train_loss = losses.CosineSimilarityLoss(model=model)
dev_examples = [
InputExample(texts=['Wooden dining table', 'Oak coffee table'], label=0.5),
InputExample(texts=['Leather office chair', 'Ergonomic mesh chair'], label=0.4),
InputExample(texts=['Convertible sofa bed', 'Futon couch'], label=0.8),
]
# If available, add a development set for evaluation during training
dev_evaluator = evaluation.EmbeddingSimilarityEvaluator.from_input_examples(dev_examples, name='dev')
# Define training parameters
model.fit(train_objectives=[(train_dataloader, train_loss)],
evaluator=dev_evaluator,
epochs=1,
warmup_steps=100,
output_path='output/training_miniLM')
import torch
# Load the model and set to evaluation mode
model = SentenceTransformer('output/training_miniLM')
model.eval()
# Define the dummy input for the export
input_ids = torch.tensor([model.tokenizer.encode("Sample text for ONNX export")])
# Export to ONNX
torch.onnx.export(model=model._first_module().auto_model,
args=(input_ids,),
f='miniLM_finetuned.onnx',
input_names=['input_ids'],
output_names=['sentence_embedding'],
dynamic_axes={'input_ids': {0: 'batch_size'}, 'sentence_embedding': {0: 'batch_size'}},
opset_version=12)