Java源码示例:it.unimi.dsi.io.FastBufferedReader

示例1
public AnchorIterator(File inputFile) throws IOException
{
	anchor = null;
	links = new Int2IntOpenHashMap(1024);
	links.defaultReturnValue(0);
	originals = new HashSet<String>(32);
	in = new FastBufferedReader(new InputStreamReader(new FileInputStream(inputFile), Charset.forName("UTF-8")));
	line = new MutableString(1024);
	in.readLine(line);
	lastAnchor = Chars.split(line, TextDataset.SEP_CHAR)[0].toString();
	scroll = 1;
	end = false;
}
 
示例2
public List<WikiLink> extractDisambiguationLinks(MutableString cleanText)
	{
		FastBufferedReader tokenizer = new FastBufferedReader(cleanText);
		MutableString buffer = new MutableString(1024);
		List<WikiLink> links = new ArrayList<WikiLink>();
		
		try {
			while(tokenizer.readLine(buffer) != null)
			{
				buffer.trim();
				if (buffer.length() == 0) continue;
				
				if (buffer.charAt(0) == '*')
				{
					int start = 1;
					for(; start<buffer.length() && buffer.charAt(start)=='*' ; start++);
					buffer.delete(start, buffer.length()).trim();
					
					if (buffer.length() == 0) continue;
//					if (!buffer.startsWith("[[")) continue;
					
					List<WikiLink> lineLinks = extractLinkFromCleanedLine(buffer);
					if (lineLinks.size()>0) links.add(lineLinks.get(0));
				}
			}
		} catch (IOException ioe){}
		
		return links;
		
	}
 
示例3
public static void main(String[] arg) throws IOException {
	if (arg.length == 0) {
		System.err.println("Usage: " + BuildRepetitionSet.class.getSimpleName() + " REPETITIONSET");
		System.exit(1);
	}

	final FastBufferedReader fastBufferedReader = new FastBufferedReader(new InputStreamReader(System.in, Charsets.US_ASCII));
	final MutableString s = new MutableString();
	final LongOpenHashSet repeatedSet = new LongOpenHashSet();
	final String outputFilename = arg[0];
	final ProgressLogger pl = new ProgressLogger();

	MutableString lastUrl = new MutableString();
	pl.itemsName = "lines";
	pl.start("Reading... ");
	while(fastBufferedReader.readLine(s) != null) {
		final int firstTab = s.indexOf('\t');
		final int secondTab = s.indexOf('\t', firstTab + 1);
		MutableString url = s.substring(secondTab + 1);
		if (url.equals(lastUrl)) {
			final int storeIndex = Integer.parseInt(new String(s.array(), 0, firstTab));
			final long storePosition = Long.parseLong(new String(s.array(), firstTab + 1, secondTab - firstTab - 1));
			repeatedSet.add((long)storeIndex << 48 | storePosition);
			System.out.print(storeIndex);
			System.out.print('\t');
			System.out.print(storePosition);
			System.out.print('\t');
			System.out.println(url);
		}

		lastUrl = url;
		pl.lightUpdate();
	}

	pl.done();

	fastBufferedReader.close();
	BinIO.storeObject(repeatedSet, outputFilename);
}
 
示例4
/** Adds a (or a set of) new IPv4 to the black list; the IPv4 can be specified directly or it can be a file (prefixed by
 *  <code>file:</code>).
 *
 * @param spec the specification (an IP address, or a file prefixed by <code>file</code>).
 * @throws ConfigurationException
 * @throws FileNotFoundException
 */
public void addBlackListedIPv4(final String spec) throws ConfigurationException, FileNotFoundException {
		if (spec.length() == 0) return; // Skip empty specs
		if (spec.startsWith("file:")) {
			final LineIterator lineIterator = new LineIterator(new FastBufferedReader(new InputStreamReader(new FileInputStream(spec.substring(5)), Charsets.ISO_8859_1)));
			while (lineIterator.hasNext()) {
				final MutableString line = lineIterator.next();
				if (line.length() > 0) blackListedIPv4Addresses.add(handleIPv4(line.toString()));
			}
		}
		else blackListedIPv4Addresses.add(handleIPv4(spec));
}
 
示例5
/** Adds a (or a set of) new host to the black list; the host can be specified directly or it can be a file (prefixed by
 *  <code>file:</code>).
 *
 * @param spec the specification (a host, or a file prefixed by <code>file</code>).
 * @throws ConfigurationException
 * @throws FileNotFoundException
 */
public void addBlackListedHost(final String spec) throws ConfigurationException, FileNotFoundException 	{
	if (spec.length() == 0) return; // Skip empty specs
	if (spec.startsWith("file:")) {
		final LineIterator lineIterator = new LineIterator(new FastBufferedReader(new InputStreamReader(new FileInputStream(spec.substring(5)), Charsets.ISO_8859_1)));
		while (lineIterator.hasNext()) {
			final MutableString line = lineIterator.next();
			blackListedHostHashes.add(line.toString().trim().hashCode());
		}
	}
	else blackListedHostHashes.add(spec.trim().hashCode());
}
 
示例6
public static void main(String arg[]) throws IOException {
	char[][] robotsResult = URLRespectsRobots.parseRobotsReader(new FileReader(arg[0]), arg[1]);
	for(char[] a: robotsResult) System.err.println(new String(a));
	final FastBufferedReader in = new FastBufferedReader(new InputStreamReader(System.in, Charsets.US_ASCII));
	final MutableString s = new MutableString();
	while(in.readLine(s) != null) {
		final URI uri = BURL.parse(s);
		System.out.println(apply(robotsResult, uri) + "\t" + uri);
	}
	in.close();

}
 
示例7
@SuppressWarnings("unchecked")
public static void main( final String[] arg ) throws NoSuchMethodException, IOException, JSAPException, ClassNotFoundException {

	final SimpleJSAP jsap = new SimpleJSAP( ShiftAddXorSignedStringMap.class.getName(), "Builds a shift-add-xor signed string map by reading a newline-separated list of strings and a function built on the same list of strings.",
			new Parameter[] {
		new FlaggedOption( "bufferSize", JSAP.INTSIZE_PARSER, "64Ki", JSAP.NOT_REQUIRED, 'b',  "buffer-size", "The size of the I/O buffer used to read strings." ),
		new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The string file encoding." ),
		new Switch( "zipped", 'z', "zipped", "The string list is compressed in gzip format." ),
		new FlaggedOption( "width", JSAP.INTEGER_PARSER, Integer.toString( Integer.SIZE ), JSAP.NOT_REQUIRED, 'w', "width", "The signature width in bits." ),
		new UnflaggedOption( "function", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of the function to be signed." ),
		new UnflaggedOption( "map", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of the resulting serialised signed string map." ),
		new UnflaggedOption( "stringFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "Read strings from this file instead of standard input." ),
	});

	JSAPResult jsapResult = jsap.parse( arg );
	if ( jsap.messagePrinted() ) return;

	final int bufferSize = jsapResult.getInt( "bufferSize" );
	final String functionName = jsapResult.getString( "function" );
	final String mapName = jsapResult.getString( "map" );
	final String stringFile = jsapResult.getString( "stringFile" );
	final Charset encoding = (Charset)jsapResult.getObject( "encoding" );
	final int width = jsapResult.getInt( "width" );
	final boolean zipped = jsapResult.getBoolean( "zipped" );

	final InputStream inputStream = stringFile != null ? new FileInputStream( stringFile ) : System.in;
	final Iterator<MutableString> iterator = new LineIterator( new FastBufferedReader( new InputStreamReader( zipped ? new GZIPInputStream( inputStream ) : inputStream, encoding ), bufferSize ) );
	final Object2LongFunction<CharSequence> function = (Object2LongFunction<CharSequence>)BinIO.loadObject( functionName );
	LOGGER.info( "Signing..." );
	BinIO.storeObject( new ShiftAddXorSignedStringMap( iterator, function, width ), mapName );
	LOGGER.info( "Completed." );
}
 
示例8
public static void main( final String[] arg ) throws IOException, JSAPException, NoSuchMethodException {
	
	final SimpleJSAP jsap = new SimpleJSAP( FrontCodedStringList.class.getName(), "Builds a front-coded string list reading from standard input a newline-separated ordered list of terms.",
			new Parameter[] {
				new FlaggedOption( "bufferSize", IntSizeStringParser.getParser(), "64Ki", JSAP.NOT_REQUIRED, 'b',  "buffer-size", "The size of the I/O buffer used to read terms." ),
				new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term file encoding." ),
				new FlaggedOption( "ratio", IntSizeStringParser.getParser(), "4", JSAP.NOT_REQUIRED, 'r',  "ratio", "The compression ratio." ),
				new Switch( "utf8", 'u', "utf8", "Store the strings as UTF-8 byte arrays." ),
				new Switch( "zipped", 'z', "zipped", "The term list is compressed in gzip format." ),
				new UnflaggedOption( "frontCodedList", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised front-coded list." )
	});
	
	JSAPResult jsapResult = jsap.parse( arg );
	if ( jsap.messagePrinted() ) return;
	
	final int bufferSize = jsapResult.getInt( "bufferSize" );
	final int ratio = jsapResult.getInt( "ratio" );
	final boolean utf8 = jsapResult.getBoolean( "utf8" );
	final boolean zipped = jsapResult.getBoolean( "zipped" );
	final String listName = jsapResult.getString( "frontCodedList" );
	final Charset encoding = (Charset)jsapResult.getObject( "encoding" );
	
	final ProgressLogger pl = new ProgressLogger();
	pl.itemsName = "words";
	pl.start( "Reading words..." );
	final FrontCodedStringList frontCodedStringList = new FrontCodedStringList( new LineIterator( new FastBufferedReader( 
			new InputStreamReader( zipped ? new GZIPInputStream( System.in ) : System.in, encoding ), bufferSize ), pl ), ratio, utf8 );
	pl.done();

	System.err.print( "Writing to file..." );
	BinIO.storeObject( frontCodedStringList, listName );
	System.err.println( " done." );
}
 
示例9
public static void main( final String[] arg ) throws IOException, JSAPException, NoSuchMethodException {

		final SimpleJSAP jsap = new SimpleJSAP( TernaryIntervalSearchTree.class.getName(), "Builds a ternary interval search tree reading from standard input a newline-separated list of terms.",
			new Parameter[] {
				new FlaggedOption( "bufferSize", JSAP.INTSIZE_PARSER, "64Ki", JSAP.NOT_REQUIRED, 'b',  "buffer-size", "The size of the I/O buffer used to read terms." ),
				new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term file encoding." ),
				new UnflaggedOption( "tree", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised tree." )
		});

		JSAPResult jsapResult = jsap.parse( arg );
		if ( jsap.messagePrinted() ) return;

		final TernaryIntervalSearchTree tree = new TernaryIntervalSearchTree();
		
		MutableString term = new MutableString();
		final ProgressLogger pl = new ProgressLogger();
		pl.itemsName = "terms";
		final FastBufferedReader terms = new FastBufferedReader( new InputStreamReader( System.in, (Charset)jsapResult.getObject( "encoding" ) ), jsapResult.getInt( "bufferSize" ) );
				
		pl.start( "Reading terms..." );

		while( terms.readLine( term ) != null ) {
			pl.update();
			tree.add( term );
		}

		pl.done();

		BinIO.storeObject( tree, jsapResult.getString( "tree" ) );
	}
 
示例10
public static void main( final String[] arg ) throws IOException, JSAPException, NoSuchMethodException {
	
	final SimpleJSAP jsap = new SimpleJSAP( BloomFilter.class.getName(), "Creates a Bloom filter reading from standard input a newline-separated list of terms.",
			new Parameter[] {
				new FlaggedOption( "bufferSize", IntSizeStringParser.getParser(), "64Ki", JSAP.NOT_REQUIRED, 'b',  "buffer-size", "The size of the I/O buffer used to read terms." ),
				new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term file encoding." ),
				new UnflaggedOption( "bloomFilter", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised front-coded list." ),
				new UnflaggedOption( "size", JSAP.INTSIZE_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The size of the filter (i.e., the expected number of elements in the filter; usually, the number of terms)." ),
				new UnflaggedOption( "precision", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The precision of the filter." )
	});
	
	JSAPResult jsapResult = jsap.parse( arg );
	if ( jsap.messagePrinted() ) return;
	
	final int bufferSize = jsapResult.getInt( "bufferSize" );
	final String filterName = jsapResult.getString( "bloomFilter" );
	final Charset encoding = (Charset)jsapResult.getObject( "encoding" );

	BloomFilter filter = new BloomFilter( jsapResult.getInt( "size" ), jsapResult.getInt( "precision" ) );
	final ProgressLogger pl = new ProgressLogger();
	pl.itemsName = "terms";
	pl.start( "Reading terms..." );
	MutableString s = new MutableString();
	FastBufferedReader reader = new FastBufferedReader( new InputStreamReader( System.in, encoding ), bufferSize );
	while( reader.readLine( s ) != null ) { 
		filter.add( s );
		pl.lightUpdate();
	}
	pl.done();

	BinIO.storeObject( filter, filterName );
}
 
示例11
public void testLineIterator( ProgressLogger pl ) {
	final LineIterator lineIterator = new LineIterator( new FastBufferedReader( new StringReader( TEXT ) ), pl );
	int i = 0;
	while( lineIterator.hasNext() )
		assertEquals( LINES[ i++ ].toString(), lineIterator.next().toString() );

	assertEquals( i, LINES.length );
}
 
示例12
public void testToSpec() {
	String className = FastBufferedReader.class.getName();
	assertEquals( className, new FastBufferedReader().toSpec() );
	assertEquals( className + "(100)", new FastBufferedReader( 100 ).toSpec() );
	assertEquals( className + "(\"_\")", new FastBufferedReader( "_" ).toSpec() );
	assertEquals( className + "(100,\"_\")", new FastBufferedReader( "100", "_" ).toSpec() );
}
 
示例13
protected void start() throws IOException
{
	reader = new FastBufferedReader(new InputStreamReader(new FileInputStream(input), Charset.forName("UTF-8")));
	state = State.IDLE;
}
 
示例14
public static HashMap<String, List<String>> parseDBPediaCategories(String lang) throws IOException
{
	PLogger plog = new PLogger(log, Step.TEN_MINUTES, "Lines", "Articles", "Errors");
	plog.start("Parsing DBPEDIA categories");
	
	HashMap<String, List<String>> cats = new HashMap<String, List<String>>(1600000);

	Pattern patTitle = Pattern.compile("/resource/([^>]*)>");
	Pattern patCat = Pattern.compile("/resource/[^:</]*:([^>]*)>");

	
	File dbpedia_cat = WikipediaFiles.DBPEDIA_CAT.getSourceFile(lang);
	FastBufferedReader fbr = new FastBufferedReader(new InputStreamReader(new FileInputStream(dbpedia_cat), Charset.forName("UTF-8")));
	
	MutableString line = new MutableString(1024);
	while(fbr.readLine(line) != null)
	{
		plog.update(0);
		line.trim();
		if (line.startsWith("#")) continue;
		
		Matcher m = patTitle.matcher(line);
		if (!m.find())
		{
			plog.update(2);
			continue;
		}
		String title = m.group(1).replace('_', ' ');
		
		int lastCharTitle = m.end();
		m = patCat.matcher(line);
		if (!m.find(lastCharTitle))
		{
			plog.update(2);
			continue;
		}
		String cat = m.group(1).replace('_', ' ');
		
		if (!cats.containsKey(title))
		{
			plog.update(1);
			cats.put(title, new ArrayList<String>());
		}
		cats.get(title).add(cat);
	}
	plog.stop();
	
	fbr.close();
	return cats;
}
 
示例15
public MutableString removeStructure(MutableString input, boolean onlyAbstract)
	{
		
		MutableString buffer = new MutableString(1024);
		FastBufferedReader tokenizer = new FastBufferedReader(input);
		
		MutableString text = new MutableString(2048);
		String punts = ":.;,-";
		
		try {
			while(tokenizer.readLine(buffer) != null)
			{
				if (text.length() > MIN_ABSTRACT_CHARS && onlyAbstract){
					text.deleteCharAt(text.length()-1);
					return text;					
				}
				
//				MutableString linestr = new MutableString(buffer.trim());
				MutableString linestr = buffer.trim();
				if (linestr.length() == 0) continue;
				
				int start;
				int end;
				String chars;
				char[] line = linestr.array();
				int line_len = linestr.length();
				
				char first = linestr.charAt(0);
				switch (first)
				{
				case '=':{
					chars = " =";
					for(start=0; start <line_len && chars.indexOf(line[start])>=0; start++);
					for(end=line_len-1; end >= 0  && chars.indexOf(line[end])>=0; end--);
					
					if (start < end){
						text.append(linestr.subSequence(start, end+1));
						text.append(". ");
					}
					break;
				}
					
				case '*':
				case '#':
				case ':':
				case ';':{
					
					chars = "*#:; ";
					for(start=0; start<line_len && chars.indexOf(line[start])>=0 ; start++);
					
					if (start < line_len-1){
						text.append(linestr.subSequence(start, linestr.length()));
						if (punts.indexOf(text.lastChar())<0)
							text.append('.');
						text.append(' ');
					}
					
					break;
				}
				case '{':
				case '|':
					break;
				case '.':
				case '-':{
					linestr.delete(new char[]{'.','-'});
					if (linestr.length() > 0){
						text.append(linestr);
						if (punts.indexOf(text.lastChar())<0)
							text.append('.');
						text.append(' ');
					}
					break;
				}
				default:{
					if (linestr.lastChar() == '}')
						break;
					text.append(linestr);
					if (punts.indexOf(text.lastChar())<0)
						text.append('.');
					text.append(' ');
				}
				}
			}
		} catch (IOException e) {}
		if (text.length()>0) text.deleteCharAt(text.length()-1);
		return text;
	}
 
示例16
/** Parses the argument as if it were the content of a <code>robots.txt</code> file,
 * and returns a sorted array of prefixes of URLs that the agent should not follow.
 *
 * @param content the content of the  <code>robots.txt</code> file.
 * @param userAgent the string representing the user agent of interest.
 * @return an array of character arrays, which are prefixes of the URLs not to follow, in sorted order.
 */
public static char[][] parseRobotsReader(final Reader content, final String userAgent) throws IOException {
	/* The set of disallowed paths specifically aimed at userAgent. */
	Set<String> set = new ObjectOpenHashSet<>();
	/* The set of disallowed paths specifically aimed at *. */
	Set<String> setStar = new ObjectOpenHashSet<>();
	/* True if the currently examined record is targetted to us. */
	boolean doesMatter = false;
	/* True if we have seen a section targetted to our agent. */
	boolean specific = false;
	/* True if we have seen a section targetted to *. */
	boolean generic = false;
	/* True if we are in a star section. */
	boolean starSection = false;

	StreamTokenizer st = new StreamTokenizer(new FastBufferedReader(content));
	int token;

	st.resetSyntax();
	st.eolIsSignificant(true); // We need EOLs to separate records
	st.wordChars(33, 255); // All characters may appear
	st.whitespaceChars(0, 32);
	st.ordinaryChar('#'); // We must manually simulate comments 8^(
	st.lowerCaseMode(false);

	while (true) {
		int lineFirstToken = st.nextToken();
		if (lineFirstToken == StreamTokenizer.TT_EOF) break;

			switch (lineFirstToken) {
				// Blank line: a new block is starting
			case StreamTokenizer.TT_EOL:
				doesMatter = false;
				break;

			// Comment or number: ignore until the end of line
			case StreamTokenizer.TT_NUMBER:
			case '#':
				do {
					token = st.nextToken();
				} while (token != StreamTokenizer.TT_EOL && token != StreamTokenizer.TT_EOF);
				break;

			// A string
			case StreamTokenizer.TT_WORD:
				if (st.sval.equalsIgnoreCase("user-agent:")) {
					token = st.nextToken();
					if (token == StreamTokenizer.TT_WORD)
						if (StringUtils.startsWithIgnoreCase(userAgent, st.sval)) {
							doesMatter = true;
							specific = true;
							starSection = false;
						}
						else if (st.sval.equals("*")) {
							starSection = true;
							generic = true;
						} else starSection = false;
					// Ignore the rest of the line
					while (token != StreamTokenizer.TT_EOL && token != StreamTokenizer.TT_EOF)
						token = st.nextToken();
				} else if (st.sval.equalsIgnoreCase("disallow:")) {
					token = st.nextToken();
					//System.out.println(st.sval + " " + starSection + " " + set + " " + setStar);
					if (token == StreamTokenizer.TT_EOL) {
						if (doesMatter) set.clear();
						else if (starSection) setStar.clear();
					} else if (token == StreamTokenizer.TT_WORD) {
						String disallowed = st.sval;
						if (disallowed.endsWith("*")) disallowed = disallowed.substring(0, disallowed.length()-1); // Someone (erroneously) uses * to denote any suffix
						if (doesMatter) set.add(disallowed);
						else if (starSection) setStar.add(disallowed);
					}
					// Ignore the rest of the line
					while (token != StreamTokenizer.TT_EOL && token != StreamTokenizer.TT_EOF)
						token = st.nextToken();
				} else if (LOGGER.isTraceEnabled()) LOGGER.trace("Line first token {} ununderstandable in robots.txt", st.sval);
				break;

			// Something else: a syntax error
			default:
				if (LOGGER.isTraceEnabled()) LOGGER.trace("Found unknown token type {} in robots.txt", Integer.valueOf(lineFirstToken));
		}
	}

	if (specific) return toSortedPrefixFreeCharArrays(set); // Some instructions specific to us
	if (! specific && generic) return toSortedPrefixFreeCharArrays(setStar); // No specific instruction, but some generic ones
	return toSortedPrefixFreeCharArrays(set);
}
 
示例17
@SuppressWarnings("unchecked")
public static void main( final String[] arg ) throws ClassNotFoundException, IOException, JSAPException, SecurityException, NoSuchMethodException {

	final SimpleJSAP jsap = new SimpleJSAP( ImmutableExternalPrefixMap.class.getName(), "Builds an external map reading from standard input a newline-separated list of terms or a serialised term list. If the dump stream name is not specified, the map will be self-contained.", 
			new Parameter[] {
				new FlaggedOption( "blockSize", JSAP.INTSIZE_PARSER, ( STD_BLOCK_SIZE / 1024 ) + "Ki", JSAP.NOT_REQUIRED, 'b', "block-size", "The size of a block in the dump stream." ),
				new Switch( "serialised", 's', "serialised", "The data source (file or standard input) provides a serialised java.util.List of terms." ),
				new Switch( "zipped", 'z', "zipped", "Standard input is compressed in gzip format." ),
				new FlaggedOption( "termFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'o', "offline", "Read terms from this file instead of standard input." ),					
				new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term list encoding." ),
				new UnflaggedOption( "map", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised map." ),
				new UnflaggedOption( "dump", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "An optional dump stream (the resulting map will not be self-contained)." )
		}
	);

	JSAPResult jsapResult = jsap.parse( arg );
	if ( jsap.messagePrinted() ) return;
	
	Collection<? extends CharSequence> termList;
	
	final String termFile = jsapResult.getString( "termFile" );
	final Charset encoding = (Charset)jsapResult.getObject( "encoding" );
	final boolean zipped = jsapResult.getBoolean( "zipped" );
	final boolean serialised = jsapResult.getBoolean( "serialised" );

	if ( zipped && serialised ) throw new IllegalArgumentException( "The zipped and serialised options are incompatible" );

	if ( serialised ) termList = (List<? extends CharSequence>) ( termFile != null ? BinIO.loadObject( termFile ) : BinIO.loadObject( System.in ) );
	else {
		if ( termFile != null ) termList = new FileLinesCollection( termFile, encoding.name(), zipped );
		else {
			final ObjectArrayList<MutableString> list = new ObjectArrayList<MutableString>();
			termList = list;
			final FastBufferedReader terms = new FastBufferedReader( new InputStreamReader( 
					zipped ? new GZIPInputStream( System.in ) : System.in, encoding.name() ) );
			final MutableString term = new MutableString();
			while( terms.readLine( term ) != null ) list.add( term.copy() );
			terms.close();
		}
	}

	BinIO.storeObject( new ImmutableExternalPrefixMap( termList, jsapResult.getInt( "blockSize" ), jsapResult.getString( "dump" ) ), jsapResult.getString( "map" ) );
}
 
示例18
protected void loadNextPage(long runNumber) throws IOException {
	
	int currPageSize = 0;
	String key = null;
	
	FastBufferedReader reader = (FastBufferedReader)runsMap.get(runNumber);
	
	for (MutableString row = reader.readLine(buff); row != null; row = reader.readLine(buff)) {
		
		String line = buff.toString();
		
		numberOfInputRows++;
		
		currPageSize += buff.length();
		
		key = ExternalSortUtils.getKey(line, columns, sep, numeric);
		
		Tuple tuple = new Tuple();
		Tuple oldTuple = (Tuple)map.put(key, tuple);
		
		if (oldTuple != null) {
			tuple.run = oldTuple.run;
			tuple.lines = oldTuple.lines;
		}
		
		if (!uniq || (tuple.lines.size() == 0 && !key.equals(currKey)))
			tuple.append(line);
		
		if (currPageSize >= pageSize) {
			tuple.appendRun(runNumber);
			break;
		}
	}
}