Java源码示例:it.unimi.dsi.fastutil.io.FastBufferedInputStream

示例1
public void readMetadata() throws IOException, ClassNotFoundException {
	final ObjectInputStream ois = new ObjectInputStream(new FastBufferedInputStream(new FileInputStream(new File(directory, "metadata"))));
	byteArrayDiskQueues.size = ois.readLong();
	byteArrayDiskQueues.appendPointer = ois.readLong();
	byteArrayDiskQueues.used = ois.readLong();
	byteArrayDiskQueues.allocated = ois.readLong();
	final int n = ois.readInt();
	byteArrayDiskQueues.buffers.size(n);
	byteArrayDiskQueues.files.size(n);
	final VisitStateSet schemeAuthority2VisitState = frontier.distributor.schemeAuthority2VisitState;
	byte[] schemeAuthority = new byte[1024];
	for(int i = ois.readInt(); i-- != 0;) {
		final int length = Util.readVByte(ois);
		if (schemeAuthority.length < length) schemeAuthority = new byte[length];
		ois.readFully(schemeAuthority, 0, length);
		final VisitState visitState = schemeAuthority2VisitState.get(schemeAuthority, 0, length);
		// This can happen if the serialization of the visit states has not been completed.
		if (visitState != null) byteArrayDiskQueues.key2QueueData.put(visitState, (QueueData)ois.readObject());
		else LOGGER.error("No visit state found for " + Util.toString(schemeAuthority));
	}

	ois.close();
}
 
示例2
public static void main(String[] arg) throws IOException, JSAPException {
	final SimpleJSAP jsap = new SimpleJSAP(GZIPIndexer.class.getName(), "Computes and stores a quasi-succinct index for a compressed archive.",
			new Parameter[] {
				new UnflaggedOption("archive", JSAP.STRING_PARSER, JSAP.REQUIRED, "The name a GZIP's archive."),
				new UnflaggedOption("index", JSAP.STRING_PARSER, JSAP.REQUIRED, "The output (a serialized LongBigList of pointers to the records in the archive) filename."),
			}
	);

	final JSAPResult jsapResult = jsap.parse(arg);
	if (jsap.messagePrinted()) return;

	final FastBufferedInputStream input = new FastBufferedInputStream(new FileInputStream(jsapResult.getString("archive")));
	ProgressLogger pl = new ProgressLogger(LOGGER, 1, TimeUnit.MINUTES, "records");
	pl.start("Scanning...");
	final EliasFanoMonotoneLongBigList list = new EliasFanoMonotoneLongBigList(index(input, pl));
	pl.done();
	BinIO.storeObject(list, jsapResult.getString("index"));
}
 
示例3
@Test
   public void reverseGetEntry() throws IOException {
final LongBigArrayBigList pos = GZIPIndexer.index(new FileInputStream(ARCHIVE_PATH));
GZIPArchive.ReadEntry re;
FastBufferedInputStream fis = new FastBufferedInputStream(new FileInputStream(ARCHIVE_PATH));
GZIPArchiveReader gzar = new GZIPArchiveReader(fis);
byte[] actualMagic = new byte[EXPECTED_MAGIC.length];
for (int i = (int) pos.size64() - 1; i >= 0; i--) {
    gzar.position(pos.getLong(i));
    re = gzar.getEntry();
    if (re == null) break;
    LazyInflater lin = re.lazyInflater;
    InputStream in = lin.get();
    in.read(actualMagic);
    assertArrayEquals(EXPECTED_MAGIC, actualMagic);
    for (int j = 0; j < (i + 1) * 512; j++) in.read();
    lin.consume();

}
fis.close();
   }
 
示例4
@Override
public void skip(final FastBufferedInputStream is) throws IOException {
	int length = 0, b;

	while((b = is.read()) >= 0x80) {
		length |= b & 0x7F;
		length <<= 7;
	}
	if (b == -1) throw new EOFException();
	length |= b;

	final long actual = is.skip(length);
	if (actual != length) throw new IOException("Asked for " + length + " but got " + actual);
}
 
示例5
@Override
public void skip(final FastBufferedInputStream is) throws IOException {
	// Borrowed from it.unimi.dsi.lang.MutableString.
	int length = 0, b;

	for(;;) {
		if ((b = is.read()) < 0) throw new EOFException();
		if ((b & 0x80) == 0) break;
		length |= b & 0x7F;
		length <<= 7;
	}
	length |= b;

	for(int i = 0; i < length; i++) {
		b = is.read() & 0xFF;
		switch (b >> 4) {
		case 0:
		case 1:
		case 2:
		case 3:
		case 4:
		case 5:
		case 6:
		case 7:
			break;
		case 12:
		case 13:
			is.skip(1);
			break;
		case 14:
			is.skip(2);
			break;
		default:
			throw new UTFDataFormatException();
		}
	}
}
 
示例6
public OfflineIterator<T, U> iterator() {
	try {
		dos.flush();
		final DataInputStream dis = new DataInputStream( new FastBufferedInputStream( new FileInputStream( file ) ) );
		return new OfflineIterator<T, U>( dis, serializer, store, size );
	}
	catch ( IOException e ) {
		throw new RuntimeException( e );
	}
}
 
示例7
/**
 * Hadoop fs based version.
 *
 * @param filename File name.
 * @param fileSystem File system.
 */
public IndexOffsetScanner(final String filename, final FileSystem fileSystem) {
    closer = Closer.create();
    try {
        final FSDataInputStream inputStream = fileSystem.open(new Path(filename));
        this.input = closer.register(new DataInputStream(new FastBufferedInputStream(inputStream)));
    } catch (IOException e) {
        throw new IOError(e);
    }
}
 
示例8
/**
 * Hadoop fs based version.
 *
 * @param path File path.
 * @param fileSystem File system.
 */
public IndexOffsetScanner(final Path path, final FileSystem fileSystem) {
    closer = Closer.create();
    try {
        final FSDataInputStream inputStream = fileSystem.open(path);
        this.input = closer.register(new DataInputStream(new FastBufferedInputStream(inputStream)));
    } catch (IOException e) {
        throw new IOError(e);
    }
}
 
示例9
@Override
public void skip(final FastBufferedInputStream is) throws IOException {}
 
示例10
@Override
public void skip(final FastBufferedInputStream is) throws IOException {
	is.skip(4);
}
 
示例11
@Override
public void skip(final FastBufferedInputStream is) throws IOException {
	final int length = Util.readVByte(is);
	final long actual = is.skip(length);
	if (actual != length) throw new IOException("Asked for " + length + " but got " + actual);
}
 
示例12
/** Prepares the bucket to be consumed.
 *
 * @throws IOException
 */
public void prepare() throws IOException {
	aux.flush();
	auxFbis = new FastBufferedInputStream(new FileInputStream(auxFile), ioBuffer);
}
 
示例13
public static void main(String arg[]) throws IOException, InterruptedException, JSAPException {

	SimpleJSAP jsap = new SimpleJSAP(WarcCompressor.class.getName(),
		"Given a store uncompressed, write a compressed store.",
		new Parameter[] { new FlaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', "output", "The output filename  (- for stdout)."),
		new UnflaggedOption("store", JSAP.STRING_PARSER, JSAP.NOT_REQUIRED, "The name of the store (if omitted, stdin)."),
	});

	JSAPResult jsapResult = jsap.parse(arg);
	if (jsap.messagePrinted()) return;

	final InputStream in = jsapResult.userSpecified("store") ? new FastBufferedInputStream(new FileInputStream(jsapResult.getString("store"))) : System.in;

	final WarcReader reader = new UncompressedWarcReader(in);
	final ProgressLogger pl = new ProgressLogger(LOGGER, 1, TimeUnit.MINUTES, "records");
	final String output = jsapResult.getString("output");

	PrintStream out = "-".equals(output) ? System.out : new PrintStream(new FastBufferedOutputStream(new FileOutputStream(output)), false, "UTF-8");
	final WarcWriter writer = new CompressedWarcWriter(out);

	pl.itemsName = "records";
	pl.displayFreeMemory = true;
	pl.displayLocalSpeed = true;
	pl.start("Scanning...");

	for (long storePosition = 0;; storePosition++) {
	    LOGGER.trace("STOREPOSITION " + storePosition);
	    WarcRecord record = null;
	    try {
		record = reader.read();
	    } catch (Exception e) {
		LOGGER.error("Exception while reading record " + storePosition + " ");
		LOGGER.error(e.getMessage());
		e.printStackTrace();
		continue;
	    }
	    if (record == null)
		break;
	    writer.write(record);
	    pl.lightUpdate();
	}
	pl.done();
	writer.close();
    }
 
示例14
public static void main(final String[] arg) throws Exception {
	final SimpleJSAP jsap = new SimpleJSAP(ParallelFilteredProcessorRunner.class.getName(), "Processes a store.",
			new Parameter[] {
			new FlaggedOption("filter", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'f', "filter", "A WarcRecord filter that recods must pass in order to be processed."),
	 		new FlaggedOption("processor", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'p', "processor", "A processor to be applied to data.").setAllowMultipleDeclarations(true),
		 	new FlaggedOption("writer", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'w', "writer", "A writer to be applied to the results.").setAllowMultipleDeclarations(true),
			new FlaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'o', "output", "The output filename  (- for stdout).").setAllowMultipleDeclarations(true),
			new FlaggedOption("threads", JSAP.INTSIZE_PARSER, Integer.toString(Runtime.getRuntime().availableProcessors()), JSAP.NOT_REQUIRED, 'T', "threads", "The number of threads to be used."),
			new Switch("sequential", 'S', "sequential"),
			new UnflaggedOption("store", JSAP.STRING_PARSER, JSAP.NOT_REQUIRED, "The name of the store (if omitted, stdin)."),
	});

	final JSAPResult jsapResult = jsap.parse(arg);
	if (jsap.messagePrinted()) return;

	final String filterSpec = jsapResult.getString("filter");
	final Filter<WarcRecord> filter;
	if (filterSpec != null) {
		final FilterParser<WarcRecord> parser = new FilterParser<>(WarcRecord.class);
		filter = parser.parse(filterSpec);
	} else
		filter = null;
	final InputStream in = jsapResult.userSpecified("store") ? new FastBufferedInputStream(new FileInputStream(jsapResult.getString("store"))) : System.in;
	final ParallelFilteredProcessorRunner parallelFilteredProcessorRunner = new ParallelFilteredProcessorRunner(in, filter);

	final String[] processor =  jsapResult.getStringArray("processor");
	final String[] writer =  jsapResult.getStringArray("writer");
	final String[] output =  jsapResult.getStringArray("output");
	if (processor.length != writer.length) throw new IllegalArgumentException("You must specify the same number or processors and writers");
	if (output.length != writer.length) throw new IllegalArgumentException("You must specify the same number or output specifications and writers");

	final String[] packages = new String[] { ParallelFilteredProcessorRunner.class.getPackage().getName() };
	final PrintStream[] ops = new PrintStream[processor.length];
	for (int i = 0; i < processor.length; i++) {
		ops[i] = "-".equals(output[i]) ? System.out : new PrintStream(new FastBufferedOutputStream(new FileOutputStream(output[i])), false, "UTF-8");
		// TODO: these casts to SOMETHING<Object> are necessary for compilation under Eclipse. Check in the future.
		parallelFilteredProcessorRunner.add((Processor<Object>)ObjectParser.fromSpec(processor[i], Processor.class, packages, new String[] { "getInstance" }),
				(Writer<Object>)ObjectParser.fromSpec(writer[i], Writer.class,  packages, new String[] { "getInstance" }),
				ops[i]);
	}

	if (jsapResult.userSpecified("sequential")) parallelFilteredProcessorRunner.runSequentially();
	else parallelFilteredProcessorRunner.run(jsapResult.getInt("threads"));

	for (int i = 0; i < processor.length; i++) ops[i].close();

}
 
示例15
@Test
public void testRecord() throws IOException, InterruptedException, URISyntaxException {

	for(boolean gzip: new boolean[] { true, false }) {
		final FastByteArrayOutputStream out = new FastByteArrayOutputStream();
		final ParallelBufferedWarcWriter warcParallelOutputStream = new ParallelBufferedWarcWriter(out, gzip);
		final Thread thread[] = new Thread[NUM_THREADS];

		final URI fakeUri = new URI("http://this.is/a/fake");
		final RandomTestMocks.HttpResponse[] response = new RandomTestMocks.HttpResponse[NUM_RECORDS];
		for(int i = 0; i < NUM_THREADS; i++)
			(thread[i] = new Thread(Integer.toString(i)) {
				@Override
				public void run() {
					final int index = Integer.parseInt(getName());
					for (int i = index * (NUM_RECORDS / NUM_THREADS); i < (index + 1) * (NUM_RECORDS / NUM_THREADS); i++) {
						try {
							response[i] = new RandomTestMocks.HttpResponse(MAX_NUMBER_OF_HEADERS, MAX_LENGTH_OF_HEADER, MAX_LENGTH_OF_BODY, i);
							HttpResponseWarcRecord record = new HttpResponseWarcRecord(fakeUri, response[i]);
							warcParallelOutputStream.write(record);
							LOGGER.info("Thread " + index + " wrote record " + i);
						} catch(Exception e) { throw new RuntimeException(e); }
					}
				}
			}).start();


		for(Thread t: thread) t.join();
		warcParallelOutputStream.close();
		out.close();

		final FastBufferedInputStream in = new FastBufferedInputStream(new FastByteArrayInputStream(out.array, 0, out.length));
		WarcReader reader = gzip ? new CompressedWarcReader(in) : new UncompressedWarcReader(in);

		final boolean found[] = new boolean[NUM_RECORDS];
		for (int i = 0; i < NUM_RECORDS; i++) {
			final HttpResponseWarcRecord r = (HttpResponseWarcRecord) reader.read();
			final int pos = Integer.parseInt(r.getFirstHeader("Position").getValue());
			found[pos] = true;
			assertArrayEquals(ByteStreams.toByteArray(response[pos].getEntity().getContent()), ByteStreams.toByteArray(r.getEntity().getContent()));
		}
		in.close();

		for(int i = NUM_RECORDS; i-- != 0;) assertTrue(Integer.toString(i), found[i]);
	}
}
 
示例16
/** Skip an object, usually without deserializing it.
 *
 * <p>Note that this method
 * <em>requires explicitly a {@link FastBufferedInputStream}</em>. As
 * a result, you can safely use {@link FastBufferedInputStream#skip(long) skip()} to
 * skip the number of bytes required (see the documentation of {@link FastBufferedInputStream#skip(long)}
 * for some elaboration).
 *
 * <p>Calling this method must be equivalent to calling {@link #fromStream(InputStream)}
 * and discarding the result.
 *
 * @param is the fast buffered input stream from which the next object will be skipped.
 */
public void skip(FastBufferedInputStream is) throws IOException;