Java源码示例:it.unimi.dsi.io.FastBufferedReader
示例1
public AnchorIterator(File inputFile) throws IOException
{
anchor = null;
links = new Int2IntOpenHashMap(1024);
links.defaultReturnValue(0);
originals = new HashSet<String>(32);
in = new FastBufferedReader(new InputStreamReader(new FileInputStream(inputFile), Charset.forName("UTF-8")));
line = new MutableString(1024);
in.readLine(line);
lastAnchor = Chars.split(line, TextDataset.SEP_CHAR)[0].toString();
scroll = 1;
end = false;
}
示例2
public List<WikiLink> extractDisambiguationLinks(MutableString cleanText)
{
FastBufferedReader tokenizer = new FastBufferedReader(cleanText);
MutableString buffer = new MutableString(1024);
List<WikiLink> links = new ArrayList<WikiLink>();
try {
while(tokenizer.readLine(buffer) != null)
{
buffer.trim();
if (buffer.length() == 0) continue;
if (buffer.charAt(0) == '*')
{
int start = 1;
for(; start<buffer.length() && buffer.charAt(start)=='*' ; start++);
buffer.delete(start, buffer.length()).trim();
if (buffer.length() == 0) continue;
// if (!buffer.startsWith("[[")) continue;
List<WikiLink> lineLinks = extractLinkFromCleanedLine(buffer);
if (lineLinks.size()>0) links.add(lineLinks.get(0));
}
}
} catch (IOException ioe){}
return links;
}
示例3
public static void main(String[] arg) throws IOException {
if (arg.length == 0) {
System.err.println("Usage: " + BuildRepetitionSet.class.getSimpleName() + " REPETITIONSET");
System.exit(1);
}
final FastBufferedReader fastBufferedReader = new FastBufferedReader(new InputStreamReader(System.in, Charsets.US_ASCII));
final MutableString s = new MutableString();
final LongOpenHashSet repeatedSet = new LongOpenHashSet();
final String outputFilename = arg[0];
final ProgressLogger pl = new ProgressLogger();
MutableString lastUrl = new MutableString();
pl.itemsName = "lines";
pl.start("Reading... ");
while(fastBufferedReader.readLine(s) != null) {
final int firstTab = s.indexOf('\t');
final int secondTab = s.indexOf('\t', firstTab + 1);
MutableString url = s.substring(secondTab + 1);
if (url.equals(lastUrl)) {
final int storeIndex = Integer.parseInt(new String(s.array(), 0, firstTab));
final long storePosition = Long.parseLong(new String(s.array(), firstTab + 1, secondTab - firstTab - 1));
repeatedSet.add((long)storeIndex << 48 | storePosition);
System.out.print(storeIndex);
System.out.print('\t');
System.out.print(storePosition);
System.out.print('\t');
System.out.println(url);
}
lastUrl = url;
pl.lightUpdate();
}
pl.done();
fastBufferedReader.close();
BinIO.storeObject(repeatedSet, outputFilename);
}
示例4
/** Adds a (or a set of) new IPv4 to the black list; the IPv4 can be specified directly or it can be a file (prefixed by
* <code>file:</code>).
*
* @param spec the specification (an IP address, or a file prefixed by <code>file</code>).
* @throws ConfigurationException
* @throws FileNotFoundException
*/
public void addBlackListedIPv4(final String spec) throws ConfigurationException, FileNotFoundException {
if (spec.length() == 0) return; // Skip empty specs
if (spec.startsWith("file:")) {
final LineIterator lineIterator = new LineIterator(new FastBufferedReader(new InputStreamReader(new FileInputStream(spec.substring(5)), Charsets.ISO_8859_1)));
while (lineIterator.hasNext()) {
final MutableString line = lineIterator.next();
if (line.length() > 0) blackListedIPv4Addresses.add(handleIPv4(line.toString()));
}
}
else blackListedIPv4Addresses.add(handleIPv4(spec));
}
示例5
/** Adds a (or a set of) new host to the black list; the host can be specified directly or it can be a file (prefixed by
* <code>file:</code>).
*
* @param spec the specification (a host, or a file prefixed by <code>file</code>).
* @throws ConfigurationException
* @throws FileNotFoundException
*/
public void addBlackListedHost(final String spec) throws ConfigurationException, FileNotFoundException {
if (spec.length() == 0) return; // Skip empty specs
if (spec.startsWith("file:")) {
final LineIterator lineIterator = new LineIterator(new FastBufferedReader(new InputStreamReader(new FileInputStream(spec.substring(5)), Charsets.ISO_8859_1)));
while (lineIterator.hasNext()) {
final MutableString line = lineIterator.next();
blackListedHostHashes.add(line.toString().trim().hashCode());
}
}
else blackListedHostHashes.add(spec.trim().hashCode());
}
示例6
public static void main(String arg[]) throws IOException {
char[][] robotsResult = URLRespectsRobots.parseRobotsReader(new FileReader(arg[0]), arg[1]);
for(char[] a: robotsResult) System.err.println(new String(a));
final FastBufferedReader in = new FastBufferedReader(new InputStreamReader(System.in, Charsets.US_ASCII));
final MutableString s = new MutableString();
while(in.readLine(s) != null) {
final URI uri = BURL.parse(s);
System.out.println(apply(robotsResult, uri) + "\t" + uri);
}
in.close();
}
示例7
@SuppressWarnings("unchecked")
public static void main( final String[] arg ) throws NoSuchMethodException, IOException, JSAPException, ClassNotFoundException {
final SimpleJSAP jsap = new SimpleJSAP( ShiftAddXorSignedStringMap.class.getName(), "Builds a shift-add-xor signed string map by reading a newline-separated list of strings and a function built on the same list of strings.",
new Parameter[] {
new FlaggedOption( "bufferSize", JSAP.INTSIZE_PARSER, "64Ki", JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of the I/O buffer used to read strings." ),
new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The string file encoding." ),
new Switch( "zipped", 'z', "zipped", "The string list is compressed in gzip format." ),
new FlaggedOption( "width", JSAP.INTEGER_PARSER, Integer.toString( Integer.SIZE ), JSAP.NOT_REQUIRED, 'w', "width", "The signature width in bits." ),
new UnflaggedOption( "function", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of the function to be signed." ),
new UnflaggedOption( "map", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of the resulting serialised signed string map." ),
new UnflaggedOption( "stringFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "Read strings from this file instead of standard input." ),
});
JSAPResult jsapResult = jsap.parse( arg );
if ( jsap.messagePrinted() ) return;
final int bufferSize = jsapResult.getInt( "bufferSize" );
final String functionName = jsapResult.getString( "function" );
final String mapName = jsapResult.getString( "map" );
final String stringFile = jsapResult.getString( "stringFile" );
final Charset encoding = (Charset)jsapResult.getObject( "encoding" );
final int width = jsapResult.getInt( "width" );
final boolean zipped = jsapResult.getBoolean( "zipped" );
final InputStream inputStream = stringFile != null ? new FileInputStream( stringFile ) : System.in;
final Iterator<MutableString> iterator = new LineIterator( new FastBufferedReader( new InputStreamReader( zipped ? new GZIPInputStream( inputStream ) : inputStream, encoding ), bufferSize ) );
final Object2LongFunction<CharSequence> function = (Object2LongFunction<CharSequence>)BinIO.loadObject( functionName );
LOGGER.info( "Signing..." );
BinIO.storeObject( new ShiftAddXorSignedStringMap( iterator, function, width ), mapName );
LOGGER.info( "Completed." );
}
示例8
public static void main( final String[] arg ) throws IOException, JSAPException, NoSuchMethodException {
final SimpleJSAP jsap = new SimpleJSAP( FrontCodedStringList.class.getName(), "Builds a front-coded string list reading from standard input a newline-separated ordered list of terms.",
new Parameter[] {
new FlaggedOption( "bufferSize", IntSizeStringParser.getParser(), "64Ki", JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of the I/O buffer used to read terms." ),
new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term file encoding." ),
new FlaggedOption( "ratio", IntSizeStringParser.getParser(), "4", JSAP.NOT_REQUIRED, 'r', "ratio", "The compression ratio." ),
new Switch( "utf8", 'u', "utf8", "Store the strings as UTF-8 byte arrays." ),
new Switch( "zipped", 'z', "zipped", "The term list is compressed in gzip format." ),
new UnflaggedOption( "frontCodedList", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised front-coded list." )
});
JSAPResult jsapResult = jsap.parse( arg );
if ( jsap.messagePrinted() ) return;
final int bufferSize = jsapResult.getInt( "bufferSize" );
final int ratio = jsapResult.getInt( "ratio" );
final boolean utf8 = jsapResult.getBoolean( "utf8" );
final boolean zipped = jsapResult.getBoolean( "zipped" );
final String listName = jsapResult.getString( "frontCodedList" );
final Charset encoding = (Charset)jsapResult.getObject( "encoding" );
final ProgressLogger pl = new ProgressLogger();
pl.itemsName = "words";
pl.start( "Reading words..." );
final FrontCodedStringList frontCodedStringList = new FrontCodedStringList( new LineIterator( new FastBufferedReader(
new InputStreamReader( zipped ? new GZIPInputStream( System.in ) : System.in, encoding ), bufferSize ), pl ), ratio, utf8 );
pl.done();
System.err.print( "Writing to file..." );
BinIO.storeObject( frontCodedStringList, listName );
System.err.println( " done." );
}
示例9
public static void main( final String[] arg ) throws IOException, JSAPException, NoSuchMethodException {
final SimpleJSAP jsap = new SimpleJSAP( TernaryIntervalSearchTree.class.getName(), "Builds a ternary interval search tree reading from standard input a newline-separated list of terms.",
new Parameter[] {
new FlaggedOption( "bufferSize", JSAP.INTSIZE_PARSER, "64Ki", JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of the I/O buffer used to read terms." ),
new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term file encoding." ),
new UnflaggedOption( "tree", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised tree." )
});
JSAPResult jsapResult = jsap.parse( arg );
if ( jsap.messagePrinted() ) return;
final TernaryIntervalSearchTree tree = new TernaryIntervalSearchTree();
MutableString term = new MutableString();
final ProgressLogger pl = new ProgressLogger();
pl.itemsName = "terms";
final FastBufferedReader terms = new FastBufferedReader( new InputStreamReader( System.in, (Charset)jsapResult.getObject( "encoding" ) ), jsapResult.getInt( "bufferSize" ) );
pl.start( "Reading terms..." );
while( terms.readLine( term ) != null ) {
pl.update();
tree.add( term );
}
pl.done();
BinIO.storeObject( tree, jsapResult.getString( "tree" ) );
}
示例10
public static void main( final String[] arg ) throws IOException, JSAPException, NoSuchMethodException {
final SimpleJSAP jsap = new SimpleJSAP( BloomFilter.class.getName(), "Creates a Bloom filter reading from standard input a newline-separated list of terms.",
new Parameter[] {
new FlaggedOption( "bufferSize", IntSizeStringParser.getParser(), "64Ki", JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of the I/O buffer used to read terms." ),
new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term file encoding." ),
new UnflaggedOption( "bloomFilter", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised front-coded list." ),
new UnflaggedOption( "size", JSAP.INTSIZE_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The size of the filter (i.e., the expected number of elements in the filter; usually, the number of terms)." ),
new UnflaggedOption( "precision", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The precision of the filter." )
});
JSAPResult jsapResult = jsap.parse( arg );
if ( jsap.messagePrinted() ) return;
final int bufferSize = jsapResult.getInt( "bufferSize" );
final String filterName = jsapResult.getString( "bloomFilter" );
final Charset encoding = (Charset)jsapResult.getObject( "encoding" );
BloomFilter filter = new BloomFilter( jsapResult.getInt( "size" ), jsapResult.getInt( "precision" ) );
final ProgressLogger pl = new ProgressLogger();
pl.itemsName = "terms";
pl.start( "Reading terms..." );
MutableString s = new MutableString();
FastBufferedReader reader = new FastBufferedReader( new InputStreamReader( System.in, encoding ), bufferSize );
while( reader.readLine( s ) != null ) {
filter.add( s );
pl.lightUpdate();
}
pl.done();
BinIO.storeObject( filter, filterName );
}
示例11
public void testLineIterator( ProgressLogger pl ) {
final LineIterator lineIterator = new LineIterator( new FastBufferedReader( new StringReader( TEXT ) ), pl );
int i = 0;
while( lineIterator.hasNext() )
assertEquals( LINES[ i++ ].toString(), lineIterator.next().toString() );
assertEquals( i, LINES.length );
}
示例12
public void testToSpec() {
String className = FastBufferedReader.class.getName();
assertEquals( className, new FastBufferedReader().toSpec() );
assertEquals( className + "(100)", new FastBufferedReader( 100 ).toSpec() );
assertEquals( className + "(\"_\")", new FastBufferedReader( "_" ).toSpec() );
assertEquals( className + "(100,\"_\")", new FastBufferedReader( "100", "_" ).toSpec() );
}
示例13
protected void start() throws IOException
{
reader = new FastBufferedReader(new InputStreamReader(new FileInputStream(input), Charset.forName("UTF-8")));
state = State.IDLE;
}
示例14
public static HashMap<String, List<String>> parseDBPediaCategories(String lang) throws IOException
{
PLogger plog = new PLogger(log, Step.TEN_MINUTES, "Lines", "Articles", "Errors");
plog.start("Parsing DBPEDIA categories");
HashMap<String, List<String>> cats = new HashMap<String, List<String>>(1600000);
Pattern patTitle = Pattern.compile("/resource/([^>]*)>");
Pattern patCat = Pattern.compile("/resource/[^:</]*:([^>]*)>");
File dbpedia_cat = WikipediaFiles.DBPEDIA_CAT.getSourceFile(lang);
FastBufferedReader fbr = new FastBufferedReader(new InputStreamReader(new FileInputStream(dbpedia_cat), Charset.forName("UTF-8")));
MutableString line = new MutableString(1024);
while(fbr.readLine(line) != null)
{
plog.update(0);
line.trim();
if (line.startsWith("#")) continue;
Matcher m = patTitle.matcher(line);
if (!m.find())
{
plog.update(2);
continue;
}
String title = m.group(1).replace('_', ' ');
int lastCharTitle = m.end();
m = patCat.matcher(line);
if (!m.find(lastCharTitle))
{
plog.update(2);
continue;
}
String cat = m.group(1).replace('_', ' ');
if (!cats.containsKey(title))
{
plog.update(1);
cats.put(title, new ArrayList<String>());
}
cats.get(title).add(cat);
}
plog.stop();
fbr.close();
return cats;
}
示例15
public MutableString removeStructure(MutableString input, boolean onlyAbstract)
{
MutableString buffer = new MutableString(1024);
FastBufferedReader tokenizer = new FastBufferedReader(input);
MutableString text = new MutableString(2048);
String punts = ":.;,-";
try {
while(tokenizer.readLine(buffer) != null)
{
if (text.length() > MIN_ABSTRACT_CHARS && onlyAbstract){
text.deleteCharAt(text.length()-1);
return text;
}
// MutableString linestr = new MutableString(buffer.trim());
MutableString linestr = buffer.trim();
if (linestr.length() == 0) continue;
int start;
int end;
String chars;
char[] line = linestr.array();
int line_len = linestr.length();
char first = linestr.charAt(0);
switch (first)
{
case '=':{
chars = " =";
for(start=0; start <line_len && chars.indexOf(line[start])>=0; start++);
for(end=line_len-1; end >= 0 && chars.indexOf(line[end])>=0; end--);
if (start < end){
text.append(linestr.subSequence(start, end+1));
text.append(". ");
}
break;
}
case '*':
case '#':
case ':':
case ';':{
chars = "*#:; ";
for(start=0; start<line_len && chars.indexOf(line[start])>=0 ; start++);
if (start < line_len-1){
text.append(linestr.subSequence(start, linestr.length()));
if (punts.indexOf(text.lastChar())<0)
text.append('.');
text.append(' ');
}
break;
}
case '{':
case '|':
break;
case '.':
case '-':{
linestr.delete(new char[]{'.','-'});
if (linestr.length() > 0){
text.append(linestr);
if (punts.indexOf(text.lastChar())<0)
text.append('.');
text.append(' ');
}
break;
}
default:{
if (linestr.lastChar() == '}')
break;
text.append(linestr);
if (punts.indexOf(text.lastChar())<0)
text.append('.');
text.append(' ');
}
}
}
} catch (IOException e) {}
if (text.length()>0) text.deleteCharAt(text.length()-1);
return text;
}
示例16
/** Parses the argument as if it were the content of a <code>robots.txt</code> file,
* and returns a sorted array of prefixes of URLs that the agent should not follow.
*
* @param content the content of the <code>robots.txt</code> file.
* @param userAgent the string representing the user agent of interest.
* @return an array of character arrays, which are prefixes of the URLs not to follow, in sorted order.
*/
public static char[][] parseRobotsReader(final Reader content, final String userAgent) throws IOException {
/* The set of disallowed paths specifically aimed at userAgent. */
Set<String> set = new ObjectOpenHashSet<>();
/* The set of disallowed paths specifically aimed at *. */
Set<String> setStar = new ObjectOpenHashSet<>();
/* True if the currently examined record is targetted to us. */
boolean doesMatter = false;
/* True if we have seen a section targetted to our agent. */
boolean specific = false;
/* True if we have seen a section targetted to *. */
boolean generic = false;
/* True if we are in a star section. */
boolean starSection = false;
StreamTokenizer st = new StreamTokenizer(new FastBufferedReader(content));
int token;
st.resetSyntax();
st.eolIsSignificant(true); // We need EOLs to separate records
st.wordChars(33, 255); // All characters may appear
st.whitespaceChars(0, 32);
st.ordinaryChar('#'); // We must manually simulate comments 8^(
st.lowerCaseMode(false);
while (true) {
int lineFirstToken = st.nextToken();
if (lineFirstToken == StreamTokenizer.TT_EOF) break;
switch (lineFirstToken) {
// Blank line: a new block is starting
case StreamTokenizer.TT_EOL:
doesMatter = false;
break;
// Comment or number: ignore until the end of line
case StreamTokenizer.TT_NUMBER:
case '#':
do {
token = st.nextToken();
} while (token != StreamTokenizer.TT_EOL && token != StreamTokenizer.TT_EOF);
break;
// A string
case StreamTokenizer.TT_WORD:
if (st.sval.equalsIgnoreCase("user-agent:")) {
token = st.nextToken();
if (token == StreamTokenizer.TT_WORD)
if (StringUtils.startsWithIgnoreCase(userAgent, st.sval)) {
doesMatter = true;
specific = true;
starSection = false;
}
else if (st.sval.equals("*")) {
starSection = true;
generic = true;
} else starSection = false;
// Ignore the rest of the line
while (token != StreamTokenizer.TT_EOL && token != StreamTokenizer.TT_EOF)
token = st.nextToken();
} else if (st.sval.equalsIgnoreCase("disallow:")) {
token = st.nextToken();
//System.out.println(st.sval + " " + starSection + " " + set + " " + setStar);
if (token == StreamTokenizer.TT_EOL) {
if (doesMatter) set.clear();
else if (starSection) setStar.clear();
} else if (token == StreamTokenizer.TT_WORD) {
String disallowed = st.sval;
if (disallowed.endsWith("*")) disallowed = disallowed.substring(0, disallowed.length()-1); // Someone (erroneously) uses * to denote any suffix
if (doesMatter) set.add(disallowed);
else if (starSection) setStar.add(disallowed);
}
// Ignore the rest of the line
while (token != StreamTokenizer.TT_EOL && token != StreamTokenizer.TT_EOF)
token = st.nextToken();
} else if (LOGGER.isTraceEnabled()) LOGGER.trace("Line first token {} ununderstandable in robots.txt", st.sval);
break;
// Something else: a syntax error
default:
if (LOGGER.isTraceEnabled()) LOGGER.trace("Found unknown token type {} in robots.txt", Integer.valueOf(lineFirstToken));
}
}
if (specific) return toSortedPrefixFreeCharArrays(set); // Some instructions specific to us
if (! specific && generic) return toSortedPrefixFreeCharArrays(setStar); // No specific instruction, but some generic ones
return toSortedPrefixFreeCharArrays(set);
}
示例17
@SuppressWarnings("unchecked")
public static void main( final String[] arg ) throws ClassNotFoundException, IOException, JSAPException, SecurityException, NoSuchMethodException {
final SimpleJSAP jsap = new SimpleJSAP( ImmutableExternalPrefixMap.class.getName(), "Builds an external map reading from standard input a newline-separated list of terms or a serialised term list. If the dump stream name is not specified, the map will be self-contained.",
new Parameter[] {
new FlaggedOption( "blockSize", JSAP.INTSIZE_PARSER, ( STD_BLOCK_SIZE / 1024 ) + "Ki", JSAP.NOT_REQUIRED, 'b', "block-size", "The size of a block in the dump stream." ),
new Switch( "serialised", 's', "serialised", "The data source (file or standard input) provides a serialised java.util.List of terms." ),
new Switch( "zipped", 'z', "zipped", "Standard input is compressed in gzip format." ),
new FlaggedOption( "termFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'o', "offline", "Read terms from this file instead of standard input." ),
new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term list encoding." ),
new UnflaggedOption( "map", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised map." ),
new UnflaggedOption( "dump", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "An optional dump stream (the resulting map will not be self-contained)." )
}
);
JSAPResult jsapResult = jsap.parse( arg );
if ( jsap.messagePrinted() ) return;
Collection<? extends CharSequence> termList;
final String termFile = jsapResult.getString( "termFile" );
final Charset encoding = (Charset)jsapResult.getObject( "encoding" );
final boolean zipped = jsapResult.getBoolean( "zipped" );
final boolean serialised = jsapResult.getBoolean( "serialised" );
if ( zipped && serialised ) throw new IllegalArgumentException( "The zipped and serialised options are incompatible" );
if ( serialised ) termList = (List<? extends CharSequence>) ( termFile != null ? BinIO.loadObject( termFile ) : BinIO.loadObject( System.in ) );
else {
if ( termFile != null ) termList = new FileLinesCollection( termFile, encoding.name(), zipped );
else {
final ObjectArrayList<MutableString> list = new ObjectArrayList<MutableString>();
termList = list;
final FastBufferedReader terms = new FastBufferedReader( new InputStreamReader(
zipped ? new GZIPInputStream( System.in ) : System.in, encoding.name() ) );
final MutableString term = new MutableString();
while( terms.readLine( term ) != null ) list.add( term.copy() );
terms.close();
}
}
BinIO.storeObject( new ImmutableExternalPrefixMap( termList, jsapResult.getInt( "blockSize" ), jsapResult.getString( "dump" ) ), jsapResult.getString( "map" ) );
}
示例18
protected void loadNextPage(long runNumber) throws IOException {
int currPageSize = 0;
String key = null;
FastBufferedReader reader = (FastBufferedReader)runsMap.get(runNumber);
for (MutableString row = reader.readLine(buff); row != null; row = reader.readLine(buff)) {
String line = buff.toString();
numberOfInputRows++;
currPageSize += buff.length();
key = ExternalSortUtils.getKey(line, columns, sep, numeric);
Tuple tuple = new Tuple();
Tuple oldTuple = (Tuple)map.put(key, tuple);
if (oldTuple != null) {
tuple.run = oldTuple.run;
tuple.lines = oldTuple.lines;
}
if (!uniq || (tuple.lines.size() == 0 && !key.equals(currKey)))
tuple.append(line);
if (currPageSize >= pageSize) {
tuple.appendRun(runNumber);
break;
}
}
}