How to use Scroll while passing raw json Query to ElasticSearch using NEST

1.2k views Asked by At
var query = @"
        {
          ""query"": {
            ""match_all"": { }                    
          }
        }";

    Func<SearchRequestParameters, SearchRequestParameters> requestParameters = a => 
                                                a.SearchType(SearchType.Scan).Scroll(TimeSpan.FromSeconds(60));

        var searchResult = await client.LowLevel.SearchAsync<SearchResponse<T>>(indexName, mappingName, query , requestParameters)

if (searchResult.Body.IsValid)
            {
                var scrollNo = 0;                
                var results = await client.ScrollAsync<T>("10s", searchResult.Body.ScrollId);

                while (results.Documents.Any())
                {
                    documents.AddRange(results.Documents);
                    scrollNo++;

                    results = await client.ScrollAsync<T>("10s", results.ScrollId);

                return new Customresponse<T>
                {
                    Documents = documents,
                    total = result.Body.Total
                };
            }

Would like to pull all data using scroll while passing raw json query. but scroll is not working properly while passing json raw query. Can anyone help on this ?.

1

There are 1 answers

3
Russ Cam On BEST ANSWER

Your example is nearly there but not quite; you're missing a closing brace for the while loop to collect all documents before returning the custom response.

Here's an example I just ran on the Stackoverflow data set, to return all questions tagged with nest

private IElasticClient _client;

void Main()
{
    var pool = new SingleNodeConnectionPool(new Uri("http://localhost:9200"));
    var defaultIndex = "default-index";
    var connectionSettings = new ConnectionSettings(pool);

    _client = new ElasticClient(connectionSettings);

    var query = @"
    {
    ""query"": {
      ""term"": {
        ""tags"": {
          ""value"": ""nest""
          }
        }
      }
    }";

    var result = RunScrollAsync(query).Result.Dump();
}

private async Task<Customresponse<Question>> RunScrollAsync(string query)
{
    var scrollTime = "10s";

    // omit the .SearchType(Scan) which is deprecated. Not 
    // specifying means the first response contains the first set
    // of documents
    var esResponse = await _client.LowLevel.SearchAsync<SearchResponse<Question>>(
        "posts", 
        "question", 
        query, r => r.Scroll(TimeSpan.FromSeconds(10))).ConfigureAwait(false);

    if (esResponse.Body.IsValid && esResponse.Body.Documents.Any())
    {
        // assume you have less than 2,147,483,647 documents to return?
        var documents = new List<Question>((int)esResponse.Body.Total);
        documents.AddRange(esResponse.Body.Documents);

        var scrollNo = 0;

        var response = await _client.ScrollAsync<Question>(scrollTime, esResponse.Body.ScrollId).ConfigureAwait(false);;

        // keep scrolling until no more documents are returned
        while (response.Documents.Any())
        {
            documents.AddRange(response.Documents);
            scrollNo++;

            response = await _client.ScrollAsync<Question>(scrollTime, response.ScrollId).ConfigureAwait(false);;
        }

        return new Customresponse<Question>
        {
            Documents = documents,
            total = response.Total
        };
    }

    // return an empty result. 
    // Or throw an exception, or log - whatever you need to do
    return new Customresponse<Question>
    {
        Documents = Enumerable.Empty<Question>(),
        total = 0
    };
}

public class Customresponse<T>
{
    public IEnumerable<T> Documents { get; set; }

    public long total { get; set; }
}

This returns all 342 questions, with a total of 342 (Data set is from June 2016).