Python源码示例:elasticsearch.client.Elasticsearch()

示例1
def __init__(self, host=None, port=None, index=None, index_suffix=None):
        self.host = (host or
                     getattr(conf, 'elasticsearch_host', None) or
                     'localhost')
        self.port = (port or
                     getattr(conf, 'elasticsearch_port', None) or
                     9200)
        self.index = (index or
                      getattr(conf, 'elasticsearch_index', None) or
                      'repoxplorer')
        if index_suffix:
            self.index += "-%s" % index_suffix
        self.es = client.Elasticsearch(
            [{"host": self.host, "port": self.port}],
            timeout=60)
        self.ic = client.IndicesClient(self.es)
        if not self.ic.exists(index=self.index):
            self.ic.create(index=self.index)
            # Give some time to have the index fully created
            time.sleep(1) 
示例2
def _build_key(cls, urls, timeout, **settings):
        # Order the settings by key and then turn it into a string with
        # repr. There are a lot of edge cases here, but the worst that
        # happens is that the key is different and so you get a new
        # Elasticsearch. We'll probably have to tweak this.
        settings = sorted(settings.items(), key=lambda item: item[0])
        settings = repr([(k, v) for k, v in settings])
        # elasticsearch allows URLs to be a string, so we make sure to
        # account for that when converting whatever it is into a tuple.
        if isinstance(urls, string_types):
            urls = (urls,)
        else:
            urls = tuple(urls)
        # Generate a tuple of all the bits and return that as the key
        # because that's hashable.
        key = (urls, timeout, settings)
        return key 
示例3
def get_instance():
        if ESLowLevelClient.__es is None:
            with ESLowLevelClient.__es_lock:
                if ESLowLevelClient.__es is None:
                    ESLowLevelClient.__es = Elasticsearch(['localhost'], port=9200, maxsize=25)
        return ESLowLevelClient.__es 
示例4
def __init__(self, es):

        self.search_limit_size = 10000  # 10000 is MAX

        if isinstance(es, CES):
            self.es = es
        else:
            raise Exception("Elasticsearch client class NOT provided") 
示例5
def __init__(self, urls=None, timeout=None, force_new=False, raw_results=False, **kwargs):
        '''
        Creates a new ElasticSearch DSL object. Grabs the ElasticSearch connection from the pool
        if it has already been initialized. Otherwise, creates a new one.

        If no parameters are passed, everything is determined from the Django settings.

        :param urls: A list of URLs, or a single string of URL (without leading `http://`), or None to read from settings.
        :param idx: A list of indices or a single string representing an index_name name. Is optional. Will be merged with `idx_alias`.
        :param idx_alias: A list of index_name aliases or a single string representing an index_name alias, as defined in the settings. Will be merged with `index_name`.
        :param timeout: Timeout used in the connection.
        :param force_new: Set to `True` to force a new elasticsearch connection. Otherwise will aggressively use any connection with the exact same settings.
        :param **kwargs: Additional settings to pass to the low level elasticsearch client and to elasticsearch-sal-py.search.Search.
        '''

        Bungiesearch.__load_settings__()

        urls = urls or Bungiesearch.BUNGIE['URLS']
        if not timeout:
            timeout = Bungiesearch.BUNGIE.get('TIMEOUT', Bungiesearch.DEFAULT_TIMEOUT)

        search_keys = ['using', 'index', 'doc_type', 'extra']
        search_settings, es_settings = {}, {}
        for k, v in iteritems(kwargs):
            if k in search_keys:
                search_settings[k] = v
            else:
                es_settings[k] = v

        if not es_settings:
            # If there aren't any provided elasticsearch settings, let's see if it's defined in the settings.
            es_settings = Bungiesearch.BUNGIE.get('ES_SETTINGS', {})

        # Building a caching key to cache the es_instance for later use (and retrieved a previously cached es_instance).
        cache_key = Bungiesearch._build_key(urls, timeout, **es_settings)
        es_instance = None
        if not force_new:
            if cache_key in Bungiesearch._cached_es_instances:
                es_instance = Bungiesearch._cached_es_instances[cache_key]

        if not es_instance:
            es_instance = Elasticsearch(urls, timeout=timeout, **es_settings)
            Bungiesearch._cached_es_instances[cache_key] = es_instance

        if 'using' not in search_settings:
            search_settings['using'] = es_instance

        super(Bungiesearch, self).__init__(**search_settings)

        # Creating instance attributes.
        self._only = [] # Stores the exact fields to fetch from the database when mapping.
        self.results = [] # Store the mapped and unmapped results.
        self._raw_results_only = raw_results 
示例6
def create_es_publisher_sample_job(elasticsearch_index_alias='table_search_index',
                                   elasticsearch_doc_type_key='table',
                                   model_name='databuilder.models.table_elasticsearch_document.TableESDocument',
                                   cypher_query=None,
                                   elasticsearch_mapping=None):
    """
    :param elasticsearch_index_alias:  alias for Elasticsearch used in
                                       amundsensearchlibrary/search_service/config.py as an index
    :param elasticsearch_doc_type_key: name the ElasticSearch index is prepended with. Defaults to `table` resulting in
                                       `table_search_index`
    :param model_name:                 the Databuilder model class used in transporting between Extractor and Loader
    :param cypher_query:               Query handed to the `Neo4jSearchDataExtractor` class, if None is given (default)
                                       it uses the `Table` query baked into the Extractor
    :param elasticsearch_mapping:      Elasticsearch field mapping "DDL" handed to the `ElasticsearchPublisher` class,
                                       if None is given (default) it uses the `Table` query baked into the Publisher
    """
    # loader saves data to this location and publisher reads it from here
    extracted_search_data_path = '/var/tmp/amundsen/search_data.json'

    task = DefaultTask(loader=FSElasticsearchJSONLoader(),
                       extractor=Neo4jSearchDataExtractor(),
                       transformer=NoopTransformer())

    # elastic search client instance
    elasticsearch_client = es
    # unique name of new index in Elasticsearch
    elasticsearch_new_index_key = 'tables' + str(uuid.uuid4())

    job_config = ConfigFactory.from_dict({
        'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.GRAPH_URL_CONFIG_KEY): neo4j_endpoint,
        'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.MODEL_CLASS_CONFIG_KEY): model_name,
        'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.NEO4J_AUTH_USER): neo4j_user,
        'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.NEO4J_AUTH_PW): neo4j_password,
        'loader.filesystem.elasticsearch.{}'.format(FSElasticsearchJSONLoader.FILE_PATH_CONFIG_KEY):
            extracted_search_data_path,
        'loader.filesystem.elasticsearch.{}'.format(FSElasticsearchJSONLoader.FILE_MODE_CONFIG_KEY): 'w',
        'publisher.elasticsearch.{}'.format(ElasticsearchPublisher.FILE_PATH_CONFIG_KEY):
            extracted_search_data_path,
        'publisher.elasticsearch.{}'.format(ElasticsearchPublisher.FILE_MODE_CONFIG_KEY): 'r',
        'publisher.elasticsearch.{}'.format(ElasticsearchPublisher.ELASTICSEARCH_CLIENT_CONFIG_KEY):
            elasticsearch_client,
        'publisher.elasticsearch.{}'.format(ElasticsearchPublisher.ELASTICSEARCH_NEW_INDEX_CONFIG_KEY):
            elasticsearch_new_index_key,
        'publisher.elasticsearch.{}'.format(ElasticsearchPublisher.ELASTICSEARCH_DOC_TYPE_CONFIG_KEY):
            elasticsearch_doc_type_key,
        'publisher.elasticsearch.{}'.format(ElasticsearchPublisher.ELASTICSEARCH_ALIAS_CONFIG_KEY):
            elasticsearch_index_alias,
    })

    # only optionally add these keys, so need to dynamically `put` them
    if cypher_query:
        job_config.put('extractor.search_data.{}'.format(Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY),
                       cypher_query)
    if elasticsearch_mapping:
        job_config.put('publisher.elasticsearch.{}'.format(ElasticsearchPublisher.ELASTICSEARCH_MAPPING_CONFIG_KEY),
                       elasticsearch_mapping)

    job = DefaultJob(conf=job_config,
                     task=task,
                     publisher=ElasticsearchPublisher())
    return job