Skip to content

Conversation

@fen-qin
Copy link
Contributor

@fen-qin fen-qin commented Oct 7, 2025

Description

Add two fields to InferenceRequest

public abstract class InferenceRequest {
    ...
   /**
     * ML algorithm parameters for models.
     * For asymmetric models, use AsymmetricTextEmbeddingParameters with embeddingContentType set.
     */
    private MLAlgoParams mlAlgoParams;
    /**
     * Content type for embedding (QUERY or PASSAGE).
     * Used as indicator for asymmetric models to determine which prefix to apply.
     */
    private EmbeddingContentType embeddingContentType;
}

LOCAL MODEL

  • Step 1: Change cluster settings
echo -e "\n[Step 1] Configuring cluster settings..."
curl -XPUT "${OPENSEARCH_URL}/_cluster/settings" -H 'Content-Type: application/json' -d'{
  "persistent": {
    "plugins.ml_commons.allow_registering_model_via_url": "true",
    "plugins.ml_commons.only_run_on_ml_node": "false",
    "plugins.ml_commons.model_access_control_enabled": "true",
    "plugins.ml_commons.native_memory_threshold": "99"
  }
}'
echo -e "\n✓ Cluster settings configured"
  • Step 2: Register model group
echo -e "\n[Step 2] Registering model group..."
MODEL_GROUP_RESPONSE=$(curl -s -XPOST "${OPENSEARCH_URL}/_plugins/_ml/model_groups/_register" -H 'Content-Type: application/json' -d'{
  "name": "local_model_group",
  "description": "A model group for local models"
}')
MODEL_GROUP_ID=$(echo $MODEL_GROUP_RESPONSE | grep -o '"model_group_id":"[^"]*"' | cut -d'"' -f4)
echo "Model Group ID: $MODEL_GROUP_ID"
echo -e "✓ Model group registered"
  • Step 3: Register model
echo -e "\n[Step 3] Registering asymmetric model..."
REGISTER_RESPONSE=$(curl -s -XPOST "${OPENSEARCH_URL}/_plugins/_ml/models/_register" -H 'Content-Type: application/json' -d"{
  \"name\": \"traced_small_model\",
  \"version\": \"1.0.0\",
  \"model_format\": \"TORCH_SCRIPT\",
  \"model_task_type\": \"text_embedding\",
  \"model_content_hash_value\": \"e13b74006290a9d0f58c1376f9629d4ebc05a0f9385f40db837452b167ae9021\",
  \"model_group_id\": \"$MODEL_GROUP_ID\",
  \"model_config\": {
    \"model_type\": \"bert\",
    \"embedding_dimension\": 768,
    \"framework_type\": \"sentence_transformers\",
    \"passage_prefix\": \"passage: \",
    \"query_prefix\": \"query: \",
    \"all_config\": \"{\\\"architectures\\\":[\\\"BertModel\\\"],\\\"max_position_embeddings\\\":512,\\\"model_type\\\":\\\"bert\\\",\\\"num_attention_heads\\\":12,\\\"num_hidden_layers\\\":6}\"
  },
  \"url\": \"https://github.com/opensearch-project/ml-commons/blob/2.x/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/text_embedding/traced_small_model.zip?raw=true\"
}")
REGISTER_TASK_ID=$(echo $REGISTER_RESPONSE | grep -o '"task_id":"[^"]*"' | cut -d'"' -f4)
echo "Register Task ID: $REGISTER_TASK_ID"
echo -e "✓ Model registration initiated"
  • Step 4: Wait for model registration to complete
echo -e "\n[Step 4] Waiting for model registration to complete..."
ELAPSED=0
MODEL_ID=""
while [ $ELAPSED -lt $MAX_WAIT_TIME ]; do
    TASK_RESPONSE=$(curl -s -XGET "${OPENSEARCH_URL}/_plugins/_ml/tasks/${REGISTER_TASK_ID}")
    STATE=$(echo $TASK_RESPONSE | grep -o '"state":"[^"]*"' | cut -d'"' -f4)
    
    if [ "$STATE" = "COMPLETED" ]; then
        MODEL_ID=$(echo $TASK_RESPONSE | grep -o '"model_id":"[^"]*"' | cut -d'"' -f4)
        echo "Model ID: $MODEL_ID"
        echo -e "✓ Model registered successfully"
        break
    fi
    
    echo "Waiting... (${ELAPSED}s/${MAX_WAIT_TIME}s) State: $STATE"
    sleep $CHECK_INTERVAL
    ELAPSED=$((ELAPSED + CHECK_INTERVAL))
done

if [ -z "$MODEL_ID" ]; then
    echo "✗ Model registration failed or timed out"
    exit 1
fi
  • Step 5: Deploy model
echo -e "\n[Step 5] Deploying model..."
DEPLOY_RESPONSE=$(curl -s -XPOST "${OPENSEARCH_URL}/_plugins/_ml/models/${MODEL_ID}/_deploy")
DEPLOY_TASK_ID=$(echo $DEPLOY_RESPONSE | grep -o '"task_id":"[^"]*"' | cut -d'"' -f4)
echo "Deploy Task ID: $DEPLOY_TASK_ID"
echo -e "✓ Model deployment initiated"
  • Step 6: Wait for model deployment to complete
echo -e "\n[Step 6] Waiting for model deployment to complete..."
ELAPSED=0
while [ $ELAPSED -lt $MAX_WAIT_TIME ]; do
    TASK_RESPONSE=$(curl -s -XGET "${OPENSEARCH_URL}/_plugins/_ml/tasks/${DEPLOY_TASK_ID}")
    STATE=$(echo $TASK_RESPONSE | grep -o '"state":"[^"]*"' | cut -d'"' -f4)
    
    if [ "$STATE" = "COMPLETED" ]; then
        echo -e "✓ Model deployed successfully"
        break
    fi
    
    echo "Waiting... (${ELAPSED}s/${MAX_WAIT_TIME}s) State: $STATE"
    sleep $CHECK_INTERVAL
    ELAPSED=$((ELAPSED + CHECK_INTERVAL))
done

if [ "$STATE" != "COMPLETED" ]; then
    echo "✗ Model deployment failed or timed out"
    exit 1
fi
  • Step 7.1: Test generating query embeddings
echo -e "\n[Step 7.1] Generating query embeddings..."
QUERY_RESPONSE=$(curl -s -XPOST "${OPENSEARCH_URL}/_plugins/_ml/_predict/text_embedding/${MODEL_ID}" -H 'Content-Type: application/json' -d'{
  "parameters": {"content_type": "query"},
  "text_docs": ["What day is it today?"],
  "target_response": ["sentence_embedding"]
}')

if echo "$QUERY_RESPONSE" | grep -q "sentence_embedding"; then
    QUERY_DIM=$(echo "$QUERY_RESPONSE" | grep -o '"shape":\[[0-9]*\]' | grep -o '[0-9]*')
    echo "✓ Query embedding generated (dimension: $QUERY_DIM)"
else
    echo "✗ Query embedding generation failed"
    echo "Response: $QUERY_RESPONSE"
fi
  • Step 7.2: Test generating passage embeddings
echo -e "\n[Step 7.2] Generating passage embeddings..."
PASSAGE_RESPONSE=$(curl -s -XPOST "${OPENSEARCH_URL}/_plugins/_ml/_predict/text_embedding/${MODEL_ID}" -H 'Content-Type: application/json' -d'{
  "parameters": {"content_type": "passage"},
  "text_docs": ["Today is Friday, tomorrow will be my break day. After that, I will go to the library. When is lunch?"],
  "target_response": ["sentence_embedding"]
}')

if echo "$PASSAGE_RESPONSE" | grep -q "sentence_embedding"; then
    PASSAGE_DIM=$(echo "$PASSAGE_RESPONSE" | grep -o '"shape":\[[0-9]*\]' | grep -o '[0-9]*')
    echo "✓ Passage embedding generated (dimension: $PASSAGE_DIM)"
else
    echo "✗ Passage embedding generation failed"
    echo "Response: $PASSAGE_RESPONSE"
fi
  • Step 8: Create ingest pipeline
echo -e "\n[Step 8] Creating ingest pipeline..."
INGEST_RESPONSE=$(curl -s -XPUT "${OPENSEARCH_URL}/_ingest/pipeline/asymmetric_embedding_ingest_pipeline" -H 'Content-Type: application/json' -d'{
  "description": "ingest passage text and generate a embedding using an asymmetric model",
  "processors": [
    {
      "ml_inference": {
        "model_input": "{\"text_docs\":[\"${input_map.text_docs}\"],\"target_response\":[\"sentence_embedding\"],\"parameters\":{\"content_type\":\"passage\"}}",
        "function_name": "text_embedding",
        "model_id": "'${MODEL_ID}'",
        "input_map": [
          {
            "text_docs": "description"
          }
        ],
        "output_map": [
          {
            "fact_embedding": "$.inference_results[0].output[0].data",
            "embedding_size": "$.inference_results.*.output.*.shape[0]"
          }
        ]
      }
    }
  ]
}')
if echo "$INGEST_RESPONSE" | grep -q "acknowledged"; then
    echo "✓ Ingest pipeline created"
else
    echo "✗ Ingest pipeline creation failed"
    echo "Response: $INGEST_RESPONSE"
    exit 1
fi
  • Step 9: Create index with pipeline
echo -e "\n[Step 9] Creating index with KNN configuration..."
INDEX_RESPONSE=$(curl -s -XPUT "${OPENSEARCH_URL}/nyc_facts" -H 'Content-Type: application/json' -d'{
  "settings": {
    "index.knn": true,
    "default_pipeline": "asymmetric_embedding_ingest_pipeline",
    "knn.algo_param.ef_search": 100
  },
  "mappings": {
    "properties": {
      "fact_embedding": {
        "type": "knn_vector",
        "dimension": 768,
        "method": {
          "name": "hnsw",
          "space_type": "l2",
          "engine": "lucene",
          "parameters": {
            "ef_construction": 128,
            "m": 24
          }
        }
      }
    }
  }
}')
if echo "$INDEX_RESPONSE" | grep -q "acknowledged"; then
    echo "✓ Index created"
else
    echo "✗ Index creation failed"
    echo "Response: $INDEX_RESPONSE"
    exit 1
fi
  • Step 10: Ingest data
echo -e "\n[Step 10] Ingesting test data..."
BULK_RESPONSE=$(curl -s -XPOST "${OPENSEARCH_URL}/_bulk" -H 'Content-Type: application/json' -d'
{ "index": { "_index": "nyc_facts" } }
{ "title": "Central Park", "description": "A large public park in the heart of New York City, offering a wide range of recreational activities." }
{ "index": { "_index": "nyc_facts" } }
{ "title": "Empire State Building", "description": "An iconic skyscraper in New York City offering breathtaking views from its observation deck." }
{ "index": { "_index": "nyc_facts" } }
{ "title": "Statue of Liberty", "description": "A colossal neoclassical sculpture on Liberty Island, symbolizing freedom and democracy in the United States." }
{ "index": { "_index": "nyc_facts" } }
{ "title": "Brooklyn Bridge", "description": "A historic suspension bridge connecting Manhattan and Brooklyn, offering pedestrian walkways with great views." }
{ "index": { "_index": "nyc_facts" } }
{ "title": "Times Square", "description": "A bustling commercial and entertainment hub in Manhattan, known for its neon lights and Broadway theaters." }
{ "index": { "_index": "nyc_facts" } }
{ "title": "Yankee Stadium", "description": "Home to the New York Yankees, this baseball stadium is a historic landmark in the Bronx." }
{ "index": { "_index": "nyc_facts" } }
{ "title": "The Bronx Zoo", "description": "One of the largest zoos in the world, located in the Bronx, featuring diverse animal exhibits and conservation efforts." }
{ "index": { "_index": "nyc_facts" } }
{ "title": "New York Botanical Garden", "description": "A large botanical garden in the Bronx, known for its diverse plant collections and stunning landscapes." }
{ "index": { "_index": "nyc_facts" } }
{ "title": "Flushing Meadows-Corona Park", "description": "A major park in Queens, home to the USTA Billie Jean King National Tennis Center and the Unisphere." }
{ "index": { "_index": "nyc_facts" } }
{ "title": "Citi Field", "description": "The home stadium of the New York Mets, located in Queens, known for its modern design and fan-friendly atmosphere." }
{ "index": { "_index": "nyc_facts" } }
{ "title": "Rockefeller Center", "description": "A famous complex of commercial buildings in Manhattan, home to the NBC studios and the annual ice skating rink." }
{ "index": { "_index": "nyc_facts" } }
{ "title": "Queens Botanical Garden", "description": "A peaceful, beautiful botanical garden located in Flushing, Queens, featuring seasonal displays and plant collections." }
{ "index": { "_index": "nyc_facts" } }
{ "title": "Arthur Ashe Stadium", "description": "The largest tennis stadium in the world, located in Flushing Meadows-Corona Park, Queens, hosting the U.S. Open." }
{ "index": { "_index": "nyc_facts" } }
{ "title": "Wave Hill", "description": "A public garden and cultural center in the Bronx, offering stunning views of the Hudson River and a variety of nature programs." }
{ "index": { "_index": "nyc_facts" } }
{ "title": "Louis Armstrong House", "description": "The former home of jazz legend Louis Armstrong, located in Corona, Queens, now a museum celebrating his life and music." }
')
if echo "$BULK_RESPONSE" | grep -q "\"errors\":false"; then
    echo "✓ Data ingested successfully"
else
    echo "✗ Data ingestion failed"
    echo "Response: $BULK_RESPONSE"
    exit 1
fi

# Wait for indexing to complete
echo "Waiting for indexing to complete..."
sleep 5
  • Step 11: Create search pipeline
echo -e "\n[Step 11] Creating search pipeline..."
SEARCH_PIPELINE_RESPONSE=$(curl -s --max-time 30 -XPUT "${OPENSEARCH_URL}/_search/pipeline/asymmetric_embedding_search_pipeline" -H 'Content-Type: application/json' -d'
{
   "description": "search with query embedding using an asymmetric model",
   "request_processors": [
      {
        "ml_inference": {
            "query_template": "{\"size\": 3,\"query\": {\"knn\": {\"fact_embedding\": {\"vector\": ${query_embedding},\"k\": 4}}}}",
            "function_name": "text_embedding",
            "model_id": "'${MODEL_ID}'",
            "model_input": "{ \"text_docs\": [\"${input_map.query}\"], \"target_response\": [\"sentence_embedding\"], \"parameters\" : {\"content_type\" : \"query\" } }",
            "input_map": [
               {
                  "query": "query.term.fact_embedding.value"
               }
            ],
            "output_map": [
               {
                  "query_embedding": "$.inference_results[0].output[0].data",
                  "embedding_size": "$.inference_results.*.output.*.shape[0]"
               }
            ]
         }
      }
   ]
}
')
if echo "$SEARCH_PIPELINE_RESPONSE" | grep -q "acknowledged"; then
    echo "✓ Search pipeline created"
else
    echo "✗ Search pipeline creation failed"
    echo "Response: $SEARCH_PIPELINE_RESPONSE"
    exit 1
fi
  • Step 11.5: Test search pipeline with term query
echo -e "\n[Step 11.5] Testing search pipeline with term query..."
PIPELINE_SEARCH_RESPONSE=$(curl -s -XGET "${OPENSEARCH_URL}/nyc_facts/_search?search_pipeline=asymmetric_embedding_search_pipeline" -H 'Content-Type: application/json' -d'{
  "query": {
    "term": {
      "fact_embedding": {
        "value": "What are some places for sports in NYC?",
        "boost": 1
      }
    }
  }
}')

PIPELINE_HIT_COUNT=$(echo "$PIPELINE_SEARCH_RESPONSE" | grep -o '"total":{"value":[0-9]*' | grep -o '[0-9]*$')
if [ "$PIPELINE_HIT_COUNT" -gt 0 ]; then
    echo "✓ Search pipeline test passed (found $PIPELINE_HIT_COUNT results)"
    echo "Top results:"
    echo "$PIPELINE_SEARCH_RESPONSE" | grep -o '"title":"[^"]*"' | head -3 | sed 's/"title"://g'
else
    echo "✗ Search pipeline test failed (no results found)"
    echo "Response: $PIPELINE_SEARCH_RESPONSE"
fi
  • Step 12: Run neural search query
echo -e "\n[Step 12] Running neural search query..."
SEARCH_RESPONSE=$(curl -s -XGET "${OPENSEARCH_URL}/nyc_facts/_search" -H 'Content-Type: application/json' -d'{
  "_source": {
    "excludes": [
      "fact_embedding"
    ]
  },
  "query": {
    "neural": {
      "fact_embedding": {
        "query_text": "What are some places for sports in NYC?",
        "model_id": "'${MODEL_ID}'",
        "boost": 1
      }
    }
  }
}')

if echo "$SEARCH_RESPONSE" | grep -q "\"hits\""; then
    echo "✓ Neural search query executed successfully"
else
    echo "✗ Neural search query failed"
    echo "Response: $SEARCH_RESPONSE"
fi

REMOTE MODEL

    echo "[Setup] Creating connector..."
    CONNECTOR_RESPONSE=$(curl -s -X POST "${OPENSEARCH_URL}/_plugins/_ml/connectors/_create" \
    -H "Content-Type: application/json" \
    -d "{
      \"name\": \"e2e-test-connector\",
      \"description\": \"E2E test connector for neural search\",
      \"version\": \"1\",
      \"protocol\": \"aws_sigv4\",
      \"parameters\": {
        \"region\": \"us-east-1\",
        \"service_name\": \"sagemaker\"
      },
      \"credential\": {
        \"access_key\": \"${AWS_ACCESS_KEY}\",
        \"secret_key\": \"${AWS_SECRET_KEY}\",
        \"session_token\": \"${AWS_SESSION_TOKEN}\"
      },
      \"actions\": [
        {
          \"action_type\": \"predict\",
          \"method\": \"POST\",
          \"url\": \"https://runtime.sagemaker.us-east-1.amazonaws.com/endpoints/multilingual-e5-endpoint/invocations\",
          \"headers\": {
            \"content-type\": \"application/json\"
          },
          \"request_body\": \"{ \\\"texts\\\": \${parameters.texts}, \\\"content_type\\\": \\\"\${parameters.content_type}\\\" }\"
        }
      ]
    }")

    CONNECTOR_ID=$(echo $CONNECTOR_RESPONSE | jq -r '.connector_id')
    if [ "$CONNECTOR_ID" = "null" ] || [ -z "$CONNECTOR_ID" ]; then
        echo "✗ Connector creation failed"
        echo "$CONNECTOR_RESPONSE"
        exit 1
    fi
    echo "✓ Connector created: $CONNECTOR_ID"
  • Register model
    echo "[Setup] Registering model..."
    MODEL_RESPONSE=$(curl -s -X POST "${OPENSEARCH_URL}/_plugins/_ml/models/_register" \
    -H "Content-Type: application/json" \
    -d '{
      "name": "e2e-test-model",
      "function_name": "remote",
      "connector_id": "'$CONNECTOR_ID'",
      "model_config": {
        "model_type": "text_embedding",
        "embedding_dimension": 384,
        "framework_type": "SENTENCE_TRANSFORMERS",
        "additional_config": {
          "space_type": "l2",
          "is_asymmetric": true,
          "model_family": "e5",
          "query_prefix": "query: ",
          "passage_prefix": "passage: "
        }
      }
    }')

    MODEL_ID=$(echo $MODEL_RESPONSE | jq -r '.model_id')
    if [ "$MODEL_ID" = "null" ] || [ -z "$MODEL_ID" ]; then
        echo "✗ Model registration failed"
        echo "$MODEL_RESPONSE"
        exit 1
    fi
    echo "✓ Model registered: $MODEL_ID"

    # Deploy model
    echo "[Setup] Deploying model..."
    curl -s -X POST "${OPENSEARCH_URL}/_plugins/_ml/models/$MODEL_ID/_deploy" > /dev/null
    sleep 5
    echo "✓ Model deployed"
fi
  • Step 1: Configure cluster settings
echo "[Step 1] Configuring cluster settings..."
curl -s -XPUT "${OPENSEARCH_URL}/_cluster/settings" \
    -H 'Content-Type: application/json' \
    -d'{
        "persistent": {
            "plugins.ml_commons.only_run_on_ml_node": "false",
            "plugins.ml_commons.model_access_control_enabled": "true",
            "plugins.ml_commons.native_memory_threshold": "99"
        }
    }' > /dev/null
echo "✓ Cluster settings configured"
  • Step 2: Test embedding generation
QUERY_RESPONSE=$(curl -s -XPOST "${OPENSEARCH_URL}/_plugins/_ml/models/$MODEL_ID/_predict" \
    -H 'Content-Type: application/json' \
    -d'{"parameters": {"texts": ["baseball stadium in New York"], "content_type": "query"}}')

if echo "$QUERY_RESPONSE" | jq -e '.inference_results[0].output[0].dataAsMap.response[0]' > /dev/null 2>&1; then
    QUERY_EMBEDDING_LENGTH=$(echo "$QUERY_RESPONSE" | jq '.inference_results[0].output[0].dataAsMap.response[0] | length')
    echo "✓ Query embedding generated (dimension: $QUERY_EMBEDDING_LENGTH)"
else
    echo "✗ Query embedding failed"
    echo "$QUERY_RESPONSE"
    exit 1
fi
  • Step 3: Always recreate index
curl -s -X DELETE "${OPENSEARCH_URL}/nyc_facts" > /dev/null 2>&1
curl -s -X DELETE "${OPENSEARCH_URL}/_ingest/pipeline/nyc_facts_pipeline" > /dev/null 2>&1

INDEX_RESPONSE=$(curl -s -XPUT "${OPENSEARCH_URL}/nyc_facts" \
    -H 'Content-Type: application/json' \
    -d'{
        "settings": {
            "index": {
                "knn": true
            }
        },
        "mappings": {
            "properties": {
                "description": {
                    "type": "text"
                },
                "description_embedding": {
                    "type": "knn_vector",
                    "dimension": '$EMBEDDING_DIMENSION',
                    "method": {
                        "name": "hnsw",
                        "space_type": "l2",
                        "engine": "lucene"
                    }
                }
            }
        }
    }')

if echo "$INDEX_RESPONSE" | jq -e '.acknowledged' > /dev/null 2>&1; then
    echo "✓ Index recreated successfully"
else
    echo "✗ Index creation failed"
    echo "$INDEX_RESPONSE"
    exit 1
fi
  • Step 4: Create ingest pipeline
PIPELINE_RESPONSE=$(curl -s -XPUT "${OPENSEARCH_URL}/_ingest/pipeline/nyc_facts_pipeline" \
    -H 'Content-Type: application/json' \
    -d'{
        "description": "Ingest pipeline for NYC facts with remote model",
        "processors": [
            {
                "ml_inference": {
                    "model_id": "'$MODEL_ID'",
                    "function_name": "remote",
                    "model_input": "{\"parameters\":{\"texts\":[\"{{description}}\"], \"content_type\":\"passage\"}}",
                    "input_map": [
                        {
                            "description": "description"
                        }
                    ]
                }
            },
            {
                "script": {
                    "source": "ctx.description_embedding = ctx.inference_results.response[0]"
                }
            },
            {
                "remove": {
                    "field": "inference_results"
                }
            }
        ]
    }')

if echo "$PIPELINE_RESPONSE" | jq -e '.acknowledged' > /dev/null 2>&1; then
    echo "✓ Ingest pipeline created"
else
    echo "✗ Ingest pipeline failed"
    echo "$PIPELINE_RESPONSE"
fi
  • Step 5: Test ingest pipeline
INGEST_RESPONSE=$(curl -s -XPOST "${OPENSEARCH_URL}/nyc_facts/_doc?pipeline=nyc_facts_pipeline" \
    -H 'Content-Type: application/json' \
    -d'{"description": "Yankee Stadium is a baseball stadium located in the Bronx, New York City."}')

if echo "$INGEST_RESPONSE" | jq -e '._id' > /dev/null 2>&1; then
    echo "✓ Ingest pipeline test successful"
    sleep 2
else
    echo "⚠ Ingest pipeline test failed - using pre-computed embeddings"
    
    # Fallback: Generate embeddings manually
    YANKEE_EMBEDDING=$(curl -s -XPOST "${OPENSEARCH_URL}/_plugins/_ml/models/$MODEL_ID/_predict" \
        -H 'Content-Type: application/json' \
        -d'{"parameters": {"texts": ["Yankee Stadium is a baseball stadium located in the Bronx, New York City."], "content_type": "passage"}}' | \
        jq -c '.inference_results[0].output[0].dataAsMap.response[0]')
    
    CENTRAL_PARK_EMBEDDING=$(curl -s -XPOST "${OPENSEARCH_URL}/_plugins/_ml/models/$MODEL_ID/_predict" \
        -H 'Content-Type: application/json' \
        -d'{"parameters": {"texts": ["Central Park is a large public park in Manhattan, New York City."], "content_type": "passage"}}' | \
        jq -c '.inference_results[0].output[0].dataAsMap.response[0]')
    
    # Ingest with pre-computed embeddings
    curl -s -XPOST "${OPENSEARCH_URL}/nyc_facts/_bulk" \
        -H 'Content-Type: application/json' \
        -d'{"index": {"_id": "1"}}
{"description": "Central Park is a large public park in Manhattan, New York City.", "description_embedding": '$CENTRAL_PARK_EMBEDDING'}
{"index": {"_id": "2"}}
{"description": "Yankee Stadium is a baseball stadium located in the Bronx, New York City.", "description_embedding": '$YANKEE_EMBEDDING'}
' > /dev/null
    sleep 2
fi
  • Step 6: Test neural search query
NEURAL_SEARCH_RESPONSE=$(curl -s -XGET "${OPENSEARCH_URL}/nyc_facts/_search" \
    -H 'Content-Type: application/json' \
    -d'{
        "_source": {
            "excludes": ["description_embedding"]
        },
        "query": {
            "neural": {
                "description_embedding": {
                    "query_text": "What are some places for sports in NYC?",
                    "model_id": "'$MODEL_ID'",
                    "k": 3
                }
            }
        }
    }')

if echo "$NEURAL_SEARCH_RESPONSE" | jq -e '.hits.hits[0]._source.description' > /dev/null 2>&1; then
    echo "✓ Neural search successful"
    TOP_RESULT=$(echo "$NEURAL_SEARCH_RESPONSE" | jq -r '.hits.hits[0]._source.description')
    TOP_SCORE=$(echo "$NEURAL_SEARCH_RESPONSE" | jq -r '.hits.hits[0]._score')
    echo "  Top result: $TOP_RESULT"
    echo "  Score: $TOP_SCORE"
else
    echo "⚠ Neural search failed"
    echo "$NEURAL_SEARCH_RESPONSE" | jq '.error.reason // .error'
fi
  • Step 7: Test term query for comparison
TERM_SEARCH_RESPONSE=$(curl -s -XGET "${OPENSEARCH_URL}/nyc_facts/_search" \
    -H 'Content-Type: application/json' \
    -d'{
        "query": {
            "match": {
                "description": "stadium"
            }
        }
    }')

if echo "$TERM_SEARCH_RESPONSE" | jq -e '.hits.hits[0]' > /dev/null 2>&1; then
    echo "✓ Term search successful"
    TERM_RESULT=$(echo "$TERM_SEARCH_RESPONSE" | jq -r '.hits.hits[0]._source.description')
    echo "  Result: $TERM_RESULT"
else
    echo "⚠ Term search failed"
fi

Related Issues

#620

Check List

  • New functionality includes testing.
  • New functionality has been documented.
  • API changes companion pull request created.
  • Commits are signed per the DCO using --signoff.
  • Public documentation issue/PR created.

By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
For more information on following Developer Certificate of Origin and signing off your commits, please check here.

Signed-off-by: Fen Q <[email protected]>
@codecov
Copy link

codecov bot commented Oct 21, 2025

Codecov Report

❌ Patch coverage is 85.56701% with 14 lines in your changes missing coverage. Please review.
✅ Project coverage is 79.75%. Comparing base (aaf801a) to head (338019e).
⚠️ Report is 1 commits behind head on main.

Files with missing lines Patch % Lines
...earch/neuralsearch/ml/MLCommonsClientAccessor.java 75.00% 8 Missing and 6 partials ⚠️
Additional details and impacted files
@@             Coverage Diff              @@
##               main    #1605      +/-   ##
============================================
- Coverage     79.79%   79.75%   -0.04%     
- Complexity     3461     3470       +9     
============================================
  Files           279      280       +1     
  Lines         12960    13024      +64     
  Branches       2076     2084       +8     
============================================
+ Hits          10341    10387      +46     
- Misses         1858     1866       +8     
- Partials        761      771      +10     

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

🚀 New features to boost your workflow:
  • ❄️ Test Analytics: Detect flaky tests, report on failures, and find test suite problems.

*/
private boolean isAsymmetricModel(MLModel model) {
MLModelConfig modelConfig = model.getModelConfig();
if (!(modelConfig instanceof TextEmbeddingModelConfig textEmbeddingModelConfig)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does remote and local model has the same conditional check logic?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I reviewed the OpenSearch documentation about asymmetric semantic search here.

  • The asymmetric model is registered using a local zip file
  • The model_config for asymmetric models differs from symmetric models by requiring two additional parameters:
    • query_prefix
    • passage_prefix
  • I couldn't find any pre-built remote models specifically for asymmetric search. Please let me know if you happen to know any remote use cases and examples.
POST /_plugins/_ml/models/_register
{
    "name": "e5-small-onnx",
    "version": "1.0.0",
    "description": "Asymmetric multilingual-e5-small model",
    "model_format": "ONNX",
    "model_group_id": "your_group_id",
    "model_content_hash_value": "your_model_zip_content_hash_value",
    "model_config": {
        "model_type": "bert",
        "embedding_dimension": 384,
        "framework_type": "sentence_transformers",
        "query_prefix": "query: ",
        "passage_prefix": "passage: ",
        "all_config": "{ \"_name_or_path\": \"intfloat/multilingual-e5-small\", \"architectures\": [ \"BertModel\" ], \"attention_probs_dropout_prob\": 0.1, \"hidden_size\": 384, \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"tokenizer_class\": \"XLMRobertaTokenizer\" }"
    },
    "url": "http://localhost:8080/intfloat-multilingual-e5-small-onnx.zip"
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's make sure if there are remote model available to use in this feature. In some prod cases, users are encouraged to apply with remote model for better performance. So we need consider both local and remote cases to handle in the ml client accessor.

The function names could be different at here.

The model meta data(modelAsymmetry) check also need to be considered for both remote and local model.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added customized remote sagemaker model for asymmetric use case

updated the PR to support both local and remote models

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants