Python源码示例:concurrent.futures.ProcessPoolExecutor()

示例1
def main():

  t1 = timeit.default_timer()
  with ProcessPoolExecutor(max_workers=4) as executor:
        for number, prime in zip(PRIMES, executor.map(is_prime, PRIMES)):
            print('%d is prime: %s' % (number, prime))

  print("{} Seconds Needed for ProcessPoolExecutor".format(timeit.default_timer() - t1))
  
  t2 = timeit.default_timer()
  with ThreadPoolExecutor(max_workers=4) as executor:
        for number, prime in zip(PRIMES, executor.map(is_prime, PRIMES)):
            print('%d is prime: %s' % (number, prime))
  print("{} Seconds Needed for ThreadPoolExecutor".format(timeit.default_timer() - t2))

  t3 = timeit.default_timer()
  for number in PRIMES:
    isPrime = is_prime(number)
    print("{} is prime: {}".format(number, isPrime))
  print("{} Seconds needed for single threaded execution".format(timeit.default_timer()-t3)) 
示例2
def test_smoke_one_file(self):
        db_path = "test.sqlite"
        runner = CliRunner()

        with runner.isolated_filesystem():
            fifo_name = "jsonl_fifo"

            os.mkfifo(fifo_name)

            with ProcessPoolExecutor() as executor:
                executor.submit(fifo_writer, fifo_name)
                result = runner.invoke(cmd, ["-o", db_path, "file", fifo_name, "--format", "jsonl"])

            print_traceback(result)

            assert result.exit_code == ExitCode.SUCCESS, fifo_name

            assert SimpleSQLite(db_path).fetch_num_records("jsonl_fifo") == 8 
示例3
def launch(experiment: str, num_workers: int = 1, seed: tp.Optional[int] = None,
           cap_index: tp.Optional[int] = None, output: tp.Optional[PathLike] = None) -> Path:
    """Launch experiment with given names and selection modulo
    max_index can be specified to provide a limited number of settings
    """
    # create the data
    csvpath = Path(experiment + ".csv") if output is None else Path(output)
    if num_workers == 1:
        df = core.compute(experiment, cap_index=cap_index, seed=seed)
    else:
        with futures.ProcessPoolExecutor(max_workers=num_workers) as executor:
            df = core.compute(experiment, seed=seed, cap_index=cap_index, executor=executor, num_workers=num_workers)
    # save data to csv
    try:
        core.save_or_append_to_csv(df, csvpath)
    except Exception:  # pylint: disable=broad-except
        csvpath = Path(experiment + ".csv")
        print(f"Failed to save to {output}, falling back to {csvpath}")
        core.save_or_append_to_csv(df, csvpath)
    else:
        print(f"Saved data to {csvpath}")
    return csvpath 
示例4
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
  '''Preprocesses the LJ Speech dataset from a given input path into a given output directory.
    Args:
      in_dir: The directory where you have downloaded the LJ Speech dataset
      out_dir: The directory to write the output into
      num_workers: Optional number of worker processes to parallelize across
      tqdm: You can optionally pass tqdm to get a nice progress bar
    Returns:
      A list of tuples describing the training examples. This should be written to train.txt
  '''

  # We use ProcessPoolExecutor to parallize across processes. This is just an optimization and you
  # can omit it and just call _process_utterance on each input if you want.
  executor = ProcessPoolExecutor(max_workers=num_workers)
  futures = []
  index = 1
  with open(os.path.join(in_dir, 'metadata.train'), encoding='utf-8') as f:
    for line in f:
      parts = line.strip().split('|')
      wav_path = os.path.join(in_dir, 'wavs', '%s.wav' % parts[0])
      text = parts[1]
      futures.append(executor.submit(partial(_process_utterance, out_dir, index, wav_path, text)))
      index += 1
  results = [future.result() for future in tqdm(futures)]
  return [r for r in results if r is not None] 
示例5
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
  executor = ProcessPoolExecutor(max_workers=num_workers)
  futures = []
  index = 1
  for book in books:
    with open(os.path.join(in_dir, book, 'sentence_index.txt')) as f:
      for line in f:
        parts = line.strip().split('\t')
        if line[0] is not '#' and len(parts) == 8 and float(parts[3]) > _min_confidence:
          wav_path = os.path.join(in_dir, book, 'wav', '%s.wav' % parts[0])
          labels_path = os.path.join(in_dir, book, 'lab', '%s.lab' % parts[0])
          text = parts[5]
          task = partial(_process_utterance, out_dir, index, wav_path, labels_path, text)
          futures.append(executor.submit(task))
          index += 1
  results = [future.result() for future in tqdm(futures)]
  return [r for r in results if r is not None] 
示例6
def __init__(self, config: Config = None) -> None:
        if config is None:
            config = Config(**DEFAULT, TOKEN='asdf', CHANNELS={}, USERS={})

        Namespace._bot = self
        self.loop = asyncio.get_event_loop()
        self.call_queue: List[Call] = []
        self.api = SlackAPI(self)
        self.channels: List[PublicChannel] = []
        self.ims: List[DirectMessageChannel] = []
        self.groups: List[PrivateChannel] = []
        self.mc = aiomcache.Client(
            host=config.CACHE['HOST'], port=config.CACHE['PORT'],
        )
        self.cache: CacheMock = CacheMock(self.mc, 'YUI_TEST_')
        self.users: List[User] = [User(id='U0', team_id='T0', name='system')]
        self.responses: Dict[str, Callable] = {}
        self.config = config
        self.process_pool_executor = ProcessPoolExecutor()
        self.thread_pool_executor = ThreadPoolExecutor() 
示例7
def par_crop(args):
    """
    Dataset curation,crop data and transform the format of a label
    """
    crop_path = os.path.join(args.download_dir, './crop{:d}'.format(args.instance_size))
    if not os.path.isdir(crop_path): makedirs(crop_path)
    VID_base_path = os.path.join(args.download_dir, './ILSVRC')
    ann_base_path = os.path.join(VID_base_path, 'Annotations/DET/train/')
    sub_sets = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i')
    for sub_set in sub_sets:
        sub_set_base_path = os.path.join(ann_base_path, sub_set)
        if 'a' == sub_set:
            xmls = sorted(glob.glob(os.path.join(sub_set_base_path, '*', '*.xml')))
        else:
            xmls = sorted(glob.glob(os.path.join(sub_set_base_path, '*.xml')))
        n_imgs = len(xmls)
        sub_set_crop_path = os.path.join(crop_path, sub_set)
        with futures.ProcessPoolExecutor(max_workers=args.num_threads) as executor:
            fs = [executor.submit(crop_xml, args, xml, sub_set_crop_path, args.instance_size) for xml in xmls]
            for i, f in enumerate(futures.as_completed(fs)):
                printProgress(i, n_imgs, prefix=sub_set, suffix='Done ', barLength=80) 
示例8
def par_crop(args, ann_base_path):
    """
    Dataset curation, crop data and transform the format of label
    Parameters
    ----------
    ann_base_path: str, Annotations base path
    """
    crop_path = os.path.join(args.download_dir, './crop{:d}'.format(int(args.instance_size)))
    if not os.path.isdir(crop_path):
        makedirs(crop_path)
    sub_sets = sorted({'a', 'b', 'c', 'd', 'e'})
    for sub_set in sub_sets:
        sub_set_base_path = os.path.join(ann_base_path, sub_set)
        videos = sorted(os.listdir(sub_set_base_path))
        n_videos = len(videos)
        with futures.ProcessPoolExecutor(max_workers=args.num_threads) as executor:
            fs = [executor.submit(crop_video, args, sub_set, video, crop_path, ann_base_path) for video in videos]
            for i, f in enumerate(futures.as_completed(fs)):
                # Write progress to error so that it can be seen
                printProgress(i, n_videos, prefix=sub_set, suffix='Done ', barLength=40) 
示例9
def __init__(self, threads: int = None) -> None:
        try:  # get or create loop (threads don't have one)
            #: our asyncio loop
            self.loop = asyncio.get_event_loop()
        except RuntimeError:
            self.loop = asyncio.new_event_loop()
            asyncio.set_event_loop(self.loop)
        #: number of threads to use
        self.threads = threads or threads_to_use()
        #: semaphore to limit io parallelism
        self.io_sem: asyncio.Semaphore = asyncio.Semaphore(1)
        #: must never run more than one conda at the same time
        #: (used by PyPi when running skeleton)
        self.conda_sem: asyncio.Semaphore = asyncio.Semaphore(1)
        #: the filters successively applied to each item
        self.filters: List[AsyncFilter] = []
        #: executor running things in separate python processes
        self.proc_pool_executor = ProcessPoolExecutor(self.threads)

        self._shutting_down = False 
示例10
def run(self) -> bool:
        """Enters the asyncio loop and manages shutdown."""
        # We need to handle KeyboardInterrupt "manually" to get clean shutdown
        # for the ProcessPoolExecutor
        self.loop.add_signal_handler(signal.SIGINT,
                                     lambda: asyncio.ensure_future(self.shutdown(signal.SIGINT)))
        try:
            task = asyncio.ensure_future(self._async_run())
            self.loop.run_until_complete(task)
            logger.warning("Finished update")
        except asyncio.CancelledError:
            pass
        except EndProcessing:
            logger.error("Terminating...")
            self.loop.run_until_complete(self.shutdown())

        for filt in self.filters:
            filt.finalize() 
示例11
def preprocess(wav_dirs, out_dir, num_workers, params):
    audio_out_dir = os.path.join(out_dir, "audio")
    mel_out_dir = os.path.join(out_dir, "mels")
    os.makedirs(out_dir, exist_ok=True)
    os.makedirs(audio_out_dir, exist_ok=True)
    os.makedirs(mel_out_dir, exist_ok=True)

    executor = ProcessPoolExecutor(max_workers=num_workers)
    futures = []
    wav_paths = chain.from_iterable(glob.iglob("{}/*.wav".format(dir), recursive=True) for dir in wav_dirs)
    for wav_path in wav_paths:
        fid = os.path.basename(wav_path).replace(".wav", ".npy")
        audio_path = os.path.join(audio_out_dir, fid)
        mel_path = os.path.join(mel_out_dir, fid)
        futures.append(executor.submit(partial(process_wav, wav_path, audio_path, mel_path, params)))

    metadata = [future.result() for future in tqdm(futures)]
    write_metadata(metadata, out_dir, params) 
示例12
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
  executor = ProcessPoolExecutor(max_workers=num_workers)
  futures = []
  index = 1
  for book in books:
    with open(os.path.join(in_dir, book, 'sentence_index.txt')) as f:
      for line in f:
        parts = line.strip().split('\t')
        if line[0] is not '#' and len(parts) == 8 and float(parts[3]) > _min_confidence:
          wav_path = os.path.join(in_dir, book, 'wav', '%s.wav' % parts[0])
          labels_path = os.path.join(in_dir, book, 'lab', '%s.lab' % parts[0])
          text = parts[5]
          task = partial(_process_utterance, out_dir, index, wav_path, labels_path, text)
          futures.append(executor.submit(task))
          index += 1
  results = [future.result() for future in tqdm(futures)]
  return [r for r in results if r is not None] 
示例13
def __init__(self, eggroll_session):
        self.data_dir = os.path.join(file_utils.get_project_base_directory(), 'data')
        self.session_id = eggroll_session.get_session_id()
        self.meta_table = _DTable('__META__', '__META__', 'fragments', 10)
        self.pool = Executor()
        Standalone.__instance = self

        self.eggroll_session = eggroll_session

        self.unique_id_template = '_EggRoll_%s_%s_%s_%.20f_%d'

        eggroll_session.set_gc_table(self)
        eggroll_session.add_cleanup_task(eggroll_session.clean_duplicated_table)

        # todo: move to eggrollSession
        try:
            self.host_name = socket.gethostname()
            self.host_ip = socket.gethostbyname(self.host_name)
        except socket.gaierror as e:
            self.host_name = 'unknown'
            self.host_ip = 'unknown' 
示例14
def convert_dataset(path, filemap, name, num_processes, max_num_support, max_tokens, is_web=True):
    with open(path, 'rb') as f:
        dataset = pickle.load(f)

    if num_processes == 1:
        instances = process((dataset, filemap, max_num_support, max_tokens, is_web), True)
    else:
        chunk_size = 1000
        executor = ProcessPoolExecutor(num_processes)
        instances = []
        i = 0
        for processed in executor.map(
                process, [(dataset[i * chunk_size:(i + 1) * chunk_size], filemap, max_num_support, max_tokens, is_web)
                          for i in range(len(dataset) // chunk_size + 1)]):
            instances.extend(processed)
            i += chunk_size
            print("%d/%d done" % (min(len(dataset), i), len(dataset)))

    return {"meta": {"source": name}, 'instances': instances} 
示例15
def simulate_walks(self,num_walks,walk_length):

		# for large graphs, it is serially executed, because of memory use.
		if(len(self.G) > 500000):

			with ProcessPoolExecutor(max_workers=1) as executor:
				job = executor.submit(generate_random_walks_large_graphs,num_walks,walk_length,self.workers,self.G.keys())

				job.result()

		else:

			with ProcessPoolExecutor(max_workers=1) as executor:
				job = executor.submit(generate_random_walks,num_walks,walk_length,self.workers,self.G.keys())

				job.result()


		return 
示例16
def main():
  print("Starting ThreadPoolExecutor")
  with ProcessPoolExecutor(max_workers=3) as executor:
    future = executor.submit(task, (2))
    future = executor.submit(task, (3))
    future = executor.submit(task, (4))
    
  print("All tasks complete") 
示例17
def main():
  executor = ProcessPoolExecutor(max_workers=3)
  task1 = executor.submit(task)
  task2 = executor.submit(task) 
示例18
def init_pool(self, worker_count):
        return ProcessPoolExecutor(worker_count) 
示例19
def parallelize(func, arg_lst, show_progress=False, max_workers=None):
    """Parallel execution of function func across a list of arguments.

    The function func and all of the arguments must be pickable. Func is
    executed on each elements of arg_list as func(*args)

    Parameters
    ----------
        func:
            Function to repeatedly execute, must be pickable.
        arg_lst: iterable
            Iterator of unnamed arguments. Each element arg is passed as func(*arg).
        show_progress: bool
            Whether or not to display a progress bar.
        max_workers: int
            Maximum number of parallel processes to execute simultaneously.

    Returns
    -------
        results: list
            List of outcomes of running func(*arg) on each arg in arg_list.
            Results are in the same order as the input arg_list.

    """

    def display(range_obj):
        if show_progress:
            range_obj = tqdm(range_obj)
        return range_obj

    results = []
    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        futures = []
        for args in arg_lst:
            futures.append(executor.submit(func, *args))
        for future in display(futures):
            data = future.result()
            results.append(data)
    return results 
示例20
def load_adjacencylist(file_, undirected=False, chunksize=10000, unchecked=True):

  if unchecked:
    parse_func = parse_adjacencylist_unchecked
    convert_func = from_adjlist_unchecked
  else:
    parse_func = parse_adjacencylist
    convert_func = from_adjlist

  adjlist = []

  t0 = time()

  with open(file_) as f:
    with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
      total = 0 
      for idx, adj_chunk in enumerate(executor.map(parse_func, grouper(int(chunksize), f))):
          adjlist.extend(adj_chunk)
          total += len(adj_chunk)
  
  t1 = time()

  logger.info('Parsed {} edges with {} chunks in {}s'.format(total, idx, t1-t0))

  t0 = time()
  G = convert_func(adjlist)
  t1 = time()

  logger.info('Converted edges to graph in {}s'.format(t1-t0))

  if undirected:
    t0 = time()
    G = G.make_undirected()
    t1 = time()
    logger.info('Made graph undirected in {}s'.format(t1-t0))

  return G 
示例21
def count_textfiles(files, workers=1):
  c = Counter()
  with ProcessPoolExecutor(max_workers=workers) as executor:
    for c_ in executor.map(count_words, files):
      c.update(c_)
  return c 
示例22
def write_walks_to_disk(G, filebase, num_paths, path_length, alpha=0, rand=random.Random(0), num_workers=cpu_count(),
                        always_rebuild=True):
  global __current_graph
  global __vertex2str
  __current_graph = G
  __vertex2str = {v:str(v) for v in G.nodes()}
  files_list = ["{}.{}".format(filebase, str(x)) for x in xrange(num_paths)]
  expected_size = len(G)
  args_list = []
  files = []

  if num_paths <= num_workers:
    paths_per_worker = [1 for x in range(num_paths)]
  else:
    paths_per_worker = [len(filter(lambda z: z!= None, [y for y in x]))
                        for x in graph.grouper(int(num_paths / num_workers)+1, range(1, num_paths+1))]

  with ProcessPoolExecutor(max_workers=num_workers) as executor:
    for size, file_, ppw in zip(executor.map(count_lines, files_list), files_list, paths_per_worker):
      if always_rebuild or size != (ppw*expected_size):
        args_list.append((ppw, path_length, alpha, random.Random(rand.randint(0, 2**31)), file_))
      else:
        files.append(file_)

  with ProcessPoolExecutor(max_workers=num_workers) as executor:
    for file_ in executor.map(_write_walks_to_disk, args_list):
      files.append(file_)

  return files 
示例23
def load_adjacencylist(file_, undirected=False, chunksize=10000, unchecked=True):

  if unchecked:
    parse_func = parse_adjacencylist_unchecked
    convert_func = from_adjlist_unchecked
  else:
    parse_func = parse_adjacencylist
    convert_func = from_adjlist

  adjlist = []

  t0 = time()

  with open(file_) as f:
    with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
      total = 0 
      for idx, adj_chunk in enumerate(executor.map(parse_func, grouper(int(chunksize), f))):
          adjlist.extend(adj_chunk)
          total += len(adj_chunk)
  
  t1 = time()

  logger.info('Parsed {} edges with {} chunks in {}s'.format(total, idx, t1-t0))

  t0 = time()
  G = convert_func(adjlist)
  t1 = time()

  logger.info('Converted edges to graph in {}s'.format(t1-t0))

  if undirected:
    t0 = time()
    G = G.make_undirected()
    t1 = time()
    logger.info('Made graph undirected in {}s'.format(t1-t0))

  return G 
示例24
def count_textfiles(files, workers=1):
  c = Counter()
  with ProcessPoolExecutor(max_workers=workers) as executor:
    for c_ in executor.map(count_words, files):
      c.update(c_)
  return c 
示例25
def preprocess_midi_files_under(midi_root, save_dir, num_workers):
    midi_paths = list(utils.find_files_by_extensions(midi_root, ['.mid', '.midi']))
    os.makedirs(save_dir, exist_ok=True)
    out_fmt = '{}-{}.data'
    
    results = []
    executor = ProcessPoolExecutor(num_workers)

    for path in midi_paths:
        try:
            results.append((path, executor.submit(preprocess_midi, path)))
        except KeyboardInterrupt:
            print(' Abort')
            return
        except:
            print(' Error')
            continue
    
    for path, future in Bar('Processing').iter(results):
        print(' ', end='[{}]'.format(path), flush=True)
        name = os.path.basename(path)
        code = hashlib.md5(path.encode()).hexdigest()
        save_path = os.path.join(save_dir, out_fmt.format(name, code))
        torch.save(future.result(), save_path)

    print('Done') 
示例26
def executor():
    return ProcessPoolExecutor() 
示例27
def _assert_urls_exists(urls):

    with ProcessPoolExecutor(20) as ex:
        it = ex.map(_url_status, urls, chunksize=10)
        it = zip(
            urls,
            tqdm(it, total=len(urls), desc='checking urls'),
        )
        for i, (url, code) in enumerate(it):
            assert code == 200, (url, code)
        assert i == (len(urls) - 1) 
示例28
def scan_more(urls, data=None, headers=None, encoding="UTF-8"):
    """批量扫描URL"""
    scan = partial(scan_one, data=data, headers=headers, encoding=encoding)
    with futures.ProcessPoolExecutor(max_workers=process) as executor:
        results = list(executor.map(scan, urls)) 
示例29
def preprocess(args):
    with open(args.config) as f:
        config = yaml.load(f, Loader=yaml.FullLoader)

    # Make directory if not exist
    os.makedirs(args.output_dir, exist_ok=True)
    print('')
    print('[INFO] Root directory:', args.data_dir)

    AP = AudioProcessor(**config['audio'])
    executor = ProcessPoolExecutor(max_workers=args.n_jobs)
    fid = []
    text = []
    wav = []
    futures = []
    with open(args.old_meta, encoding='utf-8') as f:
        for line in f:
            parts = line.strip().split('|')
            fpath = os.path.join(args.data_dir, '%s.wav' % parts[0])
            text = parts[2]
            job = executor.submit(partial(process_utterance, fpath, text, args.output_dir, AP))
            futures += [job]

    print('[INFO] Preprocessing', end=' => ')
    print(len(futures), 'audio files found')
    results = [future.result() for future in tqdm(futures)]
    fpath_meta = os.path.join(args.output_dir, 'ljspeech_meta.txt')
    with open(fpath_meta, 'w') as f:
        for x in results:
            s = map(lambda x: str(x), x)
            f.write('|'.join(s) + '\n') 
示例30
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
  '''Preprocesses the LJ Speech dataset from a given input path into a given output directory.

    Args:
      in_dir: The directory where you have downloaded the LJ Speech dataset
      out_dir: The directory to write the output into
      num_workers: Optional number of worker processes to parallelize across
      tqdm: You can optionally pass tqdm to get a nice progress bar

    Returns:
      A list of tuples describing the training examples. This should be written to train.txt
  '''

  # We use ProcessPoolExecutor to parallelize across processes. This is just an optimization and you
  # can omit it and just call _process_utterance on each input if you want.
  executor = ProcessPoolExecutor(max_workers=num_workers)
  futures = []
  index = 1
  with open(os.path.join(in_dir, 'metadata.csv'), encoding='utf-8') as f:
    for line in f:
      parts = line.strip().split('|')
      wav_path = os.path.join(in_dir, 'wavs', '%s.wav' % parts[0])
      text = parts[2]
      if not os.path.exists(wav_path):
         continue
      futures.append(executor.submit(partial(_process_utterance, out_dir, index, wav_path, text)))
      index += 1
  return [future.result() for future in tqdm(futures)]