Java源码示例:parquet.schema.MessageType

示例1
private ExaParquetWriterImpl(final MessageType schema,
                             final int numColumns,
                             final Configuration conf,
                             final Path path,
                             final String compressionType,
                             final ExaIterator exa,
                             final int firstColumnIndex,
                             final List<Integer> dynamicPartitionExaColNums) throws Exception {
    System.out.println("Path: " + path.toString());
    System.out.println("Parquet schema:\n" + schema);

    TupleWriteSupport.setSchema(schema, conf);
    this.writer = new ParquetWriter<>(path,
            new TupleWriteSupport(),
            CompressionCodecName.fromConf(compressionType),
            ParquetWriter.DEFAULT_BLOCK_SIZE,
            ParquetWriter.DEFAULT_PAGE_SIZE,
            ParquetWriter.DEFAULT_PAGE_SIZE,
            ParquetWriter.DEFAULT_IS_DICTIONARY_ENABLED,
            ParquetWriter.DEFAULT_IS_VALIDATING_ENABLED,
            conf);

    // Create Tuple object with ExaIterator reference.
    this.row = new Tuple(exa, numColumns, firstColumnIndex, dynamicPartitionExaColNums);
}
 
示例2
public ExaParquetWriterImpl(final List<ExaParquetTypeInfo> schemaTypes,
                            final Configuration conf,
                            final Path path,
                            final String compressionType,
                            final ExaIterator exa,
                            final int firstColumnIndex,
                            final List<Integer> dynamicPartitionExaColNums) throws Exception {
    // Use the schemaTypes provided since HCat table metadata isn't available.
    // This should normally only be used for testing.
    this(new MessageType("hive_schema", ExaParquetWriterImpl.typeInfoToParquetTypes(schemaTypes)),
            schemaTypes.size(),
            conf,
            path,
            compressionType,
            exa,
            firstColumnIndex,
            dynamicPartitionExaColNums);
}
 
示例3
private Type buildSchema() {
  JsonArray inputSchema = this.jsonSchema.getDataTypeValues();
  List<Type> parquetTypes = new ArrayList<>();
  for (JsonElement element : inputSchema) {
    JsonObject map = (JsonObject) element;
    JsonSchema elementSchema = new JsonSchema(map);
    String columnName = elementSchema.getColumnName();
    JsonElementConverter converter = JsonElementConversionFactory.getConverter(elementSchema, false);
    Type schemaType = converter.schema();
    this.converters.put(columnName, converter);
    parquetTypes.add(schemaType);
  }
  String docName = this.jsonSchema.getColumnName();
  switch (recordType) {
    case ROOT:
      return new MessageType(docName, parquetTypes);
    case CHILD:
      return new GroupType(optionalOrRequired(this.jsonSchema), docName, parquetTypes);
    default:
      throw new RuntimeException("Unsupported Record type");
  }
}
 
示例4
@Override
public List<IParquetInputField> readSchema( String file ) throws Exception {
  return inClassloader( () -> {
    Configuration conf = job.getConfiguration();
    S3NCredentialUtils.applyS3CredentialsToHadoopConfigurationIfNecessary( file, conf );
    Path filePath = new Path( S3NCredentialUtils.scrubFilePathIfNecessary( file ) );
    FileSystem fs = FileSystem.get( filePath.toUri(), conf );
    FileStatus fileStatus = fs.getFileStatus( filePath );
    List<Footer> footers = ParquetFileReader.readFooters( conf, fileStatus, true );
    if ( footers.isEmpty() ) {
      return new ArrayList<>();
    } else {
      ParquetMetadata meta = footers.get( 0 ).getParquetMetadata();
      MessageType schema = meta.getFileMetaData().getSchema();
      return ParquetConverter.buildInputFields( schema );
    }
  } );
}
 
示例5
@Override
public ReadContext init( InitContext context ) {
  String schemaStr = context.getConfiguration().get( ParquetConverter.PARQUET_SCHEMA_CONF_KEY );
  if ( schemaStr == null ) {
    throw new RuntimeException( "Schema not defined in the PentahoParquetSchema key" );
  }

  ParquetInputFieldList schema = ParquetInputFieldList.unmarshall( schemaStr );
  converter = new ParquetConverter( schema.getFields() );

  // get all fields from file's schema
  MessageType fileSchema = context.getFileSchema();
  List<Type> newFields = new ArrayList<>();
  // use only required fields
  for ( IParquetInputField f : schema ) {
    Type origField = fileSchema.getFields().get( fileSchema.getFieldIndex( f.getFormatFieldName() ) );
    newFields.add( origField );
  }
  if ( newFields.isEmpty() ) {
    throw new RuntimeException( "Fields should be declared" );
  }
  MessageType newSchema = new MessageType( fileSchema.getName(), newFields );

  return new ReadContext( newSchema, new HashMap<>() );
}
 
示例6
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath) {
  String name = Strings.repeat(".", depth) + type.getName();
  OriginalType otype = type.getOriginalType();
  Repetition rep = type.getRepetition();
  PrimitiveTypeName ptype = type.getPrimitiveTypeName();

  out.format("%s: %s %s", name, rep, ptype);
  if (otype != null) out.format(" O:%s", otype);

  if (container != null) {
    cpath.add(type.getName());
    String[] paths = cpath.toArray(new String[cpath.size()]);
    cpath.remove(cpath.size() - 1);

    ColumnDescriptor desc = container.getColumnDescription(paths);

    int defl = desc.getMaxDefinitionLevel();
    int repl = desc.getMaxRepetitionLevel();
    out.format(" R:%d D:%d", repl, defl);
  }
  out.println();
}
 
示例7
public ParquetReader(MessageType fileSchema,
                     MessageType requestedSchema,
                     List<BlockMetaData> blocks,
                     ParquetDataSource dataSource,
                     TypeManager typeManager)
{
    this.fileSchema = fileSchema;
    this.requestedSchema = requestedSchema;
    this.blocks = blocks;
    this.dataSource = dataSource;
    this.typeManager = typeManager;
    initializeColumnReaders();
}
 
示例8
private static MessageType readParquetSchema(List<SchemaElement> schema)
{
    Iterator<SchemaElement> schemaIterator = schema.iterator();
    SchemaElement rootSchema = schemaIterator.next();
    Types.MessageTypeBuilder builder = Types.buildMessage();
    readTypeSchema(builder, schemaIterator, rootSchema.getNum_children());
    return builder.named(rootSchema.name);
}
 
示例9
private Type getParquetType(ParaflowColumnHandle column, MessageType messageType)
{
    if (messageType.containsField(column.getName())) {
        return messageType.getType(column.getName());
    }
    // parquet is case-insensitive, all hdfs-columns get converted to lowercase
    for (Type type : messageType.getFields()) {
        if (type.getName().equalsIgnoreCase(column.getName())) {
            return type;
        }
    }
    return null;
}
 
示例10
public ParaflowPageSource(
        ParquetReader parquetReader,
        ParquetDataSource dataSource,
        MessageType fileSchema,
        MessageType requestedSchema,
        long totalBytes,
        List<ParaflowColumnHandle> columns,
        TypeManager typeManager)
{
    checkArgument(totalBytes >= 0, "totalBytes is negative");

    this.parquetReader = requireNonNull(parquetReader, "parquetReader is null");
    this.dataSource = requireNonNull(dataSource, "dataSource is null");
    this.fileSchema = requireNonNull(fileSchema, "fileSchema is null");
    this.requestedSchema = requireNonNull(requestedSchema, "requestedSchema is null");
    this.totalBytes = totalBytes;

    this.columnSize = columns.size();
    this.constantBlocks = new Block[columnSize];
    ImmutableList.Builder<String> namesBuilder = ImmutableList.builder();
    ImmutableList.Builder<Type> typesBuilder = ImmutableList.builder();
    for (int columnIndex = 0; columnIndex < columnSize; columnIndex++) {
        ParaflowColumnHandle column = columns.get(columnIndex);
        String name = column.getName();
        Type type = typeManager.getType(column.getType().getTypeSignature());

        namesBuilder.add(name);
        typesBuilder.add(type);

        if (getParquetType(column, fileSchema) == null) {
            constantBlocks[columnIndex] = RunLengthEncodedBlock.create(type, null, MAX_VECTOR_LENGTH);
        }
    }
    columnNames = namesBuilder.build();
    types = typesBuilder.build();
}
 
示例11
private parquet.schema.Type getParquetType(ParaflowColumnHandle column, MessageType messageType)
{
    if (messageType.containsField(column.getName())) {
        return messageType.getType(column.getName());
    }
    // parquet is case-insensitive, all hdfs-columns get converted to lowercase
    for (parquet.schema.Type type : messageType.getFields()) {
        if (type.getName().equalsIgnoreCase(column.getName())) {
            return type;
        }
    }
    return null;
}
 
示例12
public int getFieldIndex(MessageType fileSchema, String name)
{
    try {
        return fileSchema.getFieldIndex(name);
    }
    catch (InvalidRecordException e) {
        for (parquet.schema.Type type : fileSchema.getFields()) {
            if (type.getName().equalsIgnoreCase(name)) {
                return fileSchema.getFieldIndex(type.getName());
            }
        }
        return -1;
    }
}
 
示例13
private DatasetDescriptor getDatasetDescriptorFromParquetFile(Job job, FileSystem fs, String uri)
    throws IOException {

  ArrayList<FileStatus> files = new ArrayList<FileStatus>();
  FileStatus[] dirs;
  dirs = fs.globStatus(fs.makeQualified(getInputPath()));
  for (int i = 0; (dirs != null && i < dirs.length); i++) {
    files.addAll(Arrays.asList(fs.listStatus(dirs[i].getPath(), HIDDEN_FILES_PATH_FILTER)));
    // We only check one file, so exit the loop when we have at least
    // one.
    if (files.size() > 0) {
      break;
    }
  }

  ParquetMetadata parquetMetadata;
  try {
    parquetMetadata =
        ParquetFileReader.readFooter(job.getConfiguration(),
            fs.makeQualified(files.get(0).getPath()));
  } catch (IOException e) {
    LOG.error("Wrong file format. Please check the export file's format.", e);
    throw e;
  }
  MessageType schema = parquetMetadata.getFileMetaData().getSchema();
  Schema avroSchema = new AvroSchemaConverter().convert(schema);
  DatasetDescriptor descriptor =
      new DatasetDescriptor.Builder().schema(avroSchema).format(Formats.PARQUET)
          .compressionType(ParquetJob.getCompressionType(job.getConfiguration())).build();
  return descriptor;
}
 
示例14
@Override
public MessageType convertSchema(JsonArray inputSchema, WorkUnitState workUnit)
    throws SchemaConversionException {
  String fieldName = workUnit.getExtract().getTable();
  JsonSchema jsonSchema = new JsonSchema(inputSchema);
  jsonSchema.setColumnName(fieldName);
  recordConverter = new RecordConverter(jsonSchema, ROOT);
  return (MessageType) recordConverter.schema();
}
 
示例15
private void testCase(String testCaseName)
    throws SchemaConversionException, DataConversionException {
  JsonObject test = testCases.get(testCaseName).getAsJsonObject();
  parquetConverter = new JsonIntermediateToParquetGroupConverter();

  MessageType schema = parquetConverter.convertSchema(test.get("schema").getAsJsonArray(), workUnit);
  Group record =
      parquetConverter.convertRecord(schema, test.get("record").getAsJsonObject(), workUnit).iterator().next();
  assertEqualsIgnoreSpaces(schema.toString(), test.get("expectedSchema").getAsString());
  assertEqualsIgnoreSpaces(record.toString(), test.get("expectedRecord").getAsString());
}
 
示例16
@Test(expectedExceptions = RuntimeException.class, expectedExceptionsMessageRegExp = "Symbol .* does not belong to set \\[.*?\\]")
public void testEnumTypeBelongsToEnumSet()
    throws Exception {
  JsonObject test = deepCopy(testCases.get("enum").getAsJsonObject(), JsonObject.class);
  parquetConverter = new JsonIntermediateToParquetGroupConverter();

  MessageType schema = parquetConverter.convertSchema(test.get("schema").getAsJsonArray(), workUnit);
  JsonObject jsonRecord = test.get("record").getAsJsonObject();
  jsonRecord.addProperty("some_enum", "HELL");

  parquetConverter.convertRecord(schema, jsonRecord, workUnit).iterator().next();
}
 
示例17
private MessageType createParquetSchema() {
  List<Type> types = new ArrayList<>();

  for ( IParquetOutputField outputField : outputFields ) {
    types.add( convertToPrimitiveType( outputField ) );
  }

  if ( types.isEmpty() ) {
    throw new IllegalArgumentException( "Schema should contain at least one field" );
  }

  return new MessageType( "parquet-schema", types );
}
 
示例18
public static List<IParquetInputField> buildInputFields( MessageType schema ) {
  List<IParquetInputField> inputFields = new ArrayList<>();

  for ( Type type : schema.getFields() ) {
    if ( type.isPrimitive() ) {
      inputFields.add( convertField( type ) );
    }
  }

  return inputFields;
}
 
示例19
private static void showDetails(PrettyPrintWriter out, GroupType type, int depth, MessageType container, List<String> cpath) {
  String name = Strings.repeat(".", depth) + type.getName();
  Repetition rep = type.getRepetition();
  int fcount = type.getFieldCount();
  out.format("%s: %s F:%d%n", name, rep, fcount);

  cpath.add(type.getName());
  for (Type ftype : type.getFields()) {
    showDetails(out, ftype, depth + 1, container, cpath);
  }
  cpath.remove(cpath.size() - 1);
}
 
示例20
private static void showDetails(PrettyPrintWriter out, Type type, int depth, MessageType container, List<String> cpath) {
  if (type instanceof GroupType) {
    showDetails(out, type.asGroupType(), depth, container, cpath);
    return;
  } else if (type instanceof PrimitiveType) {
    showDetails(out, type.asPrimitiveType(), depth, container, cpath);
    return;
  }
}
 
示例21
@Override
public void execute(CommandLine options) throws Exception {
    super.execute(options);

    String[] args = options.getArgs();
    String input = args[0];

    Configuration conf = new Configuration();
    Path inpath = new Path(input);

    ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inpath);
    MessageType schema = metaData.getFileMetaData().getSchema();

    PrettyPrintWriter out = PrettyPrintWriter.stdoutPrettyPrinter()
                                             .withAutoColumn()
                                             .withAutoCrop()
                                             .withWhitespaceHandler(WhiteSpaceHandler.ELIMINATE_NEWLINES)
                                             .withColumnPadding(1)
                                             .withMaxBufferedLines(1000000)
                                             .withFlushOnTab()
                                             .build();

    boolean showmd = !options.hasOption('m');
    boolean showdt = !options.hasOption('d');

    Set<String> showColumns = null;
    if (options.hasOption('c')) {
        String[] cols = options.getOptionValues('c');
        showColumns = new HashSet<String>(Arrays.asList(cols));
    }

    dump(out, metaData, schema, inpath, showmd, showdt, showColumns);
}
 
示例22
public static ParquetMetadata readFooter(FileSystem fileSystem, Path file)
        throws IOException
{
    FileStatus fileStatus = fileSystem.getFileStatus(file);
    try (FSDataInputStream inputStream = fileSystem.open(file)) {
        // Parquet File Layout:
        //
        // MAGIC
        // variable: Data
        // variable: Metadata
        // 4 bytes: MetadataLength
        // MAGIC

        long length = fileStatus.getLen();
        validateParquet(length >= MAGIC.length + PARQUET_METADATA_LENGTH + MAGIC.length, "%s is not a valid Parquet File", file);
        long metadataLengthIndex = length - PARQUET_METADATA_LENGTH - MAGIC.length;

        inputStream.seek(metadataLengthIndex);
        int metadataLength = readIntLittleEndian(inputStream);

        byte[] magic = new byte[MAGIC.length];
        inputStream.readFully(magic);
        validateParquet(Arrays.equals(MAGIC, magic), "Not valid Parquet file: %s expected magic number: %s got: %s", file, Arrays.toString(MAGIC), Arrays.toString(magic));

        long metadataIndex = metadataLengthIndex - metadataLength;
        validateParquet(
                metadataIndex >= MAGIC.length && metadataIndex < metadataLengthIndex,
                "Corrupted Parquet file: %s metadata index: %s out of range",
                file,
                metadataIndex);
        inputStream.seek(metadataIndex);
        FileMetaData fileMetaData = readFileMetaData(inputStream);
        List<SchemaElement> schema = fileMetaData.getSchema();
        validateParquet(!schema.isEmpty(), "Empty Parquet schema in file: %s", file);

        MessageType messageType = readParquetSchema(schema);
        List<BlockMetaData> blocks = new ArrayList<>();
        List<RowGroup> rowGroups = fileMetaData.getRow_groups();
        if (rowGroups != null) {
            for (RowGroup rowGroup : rowGroups) {
                BlockMetaData blockMetaData = new BlockMetaData();
                blockMetaData.setRowCount(rowGroup.getNum_rows());
                blockMetaData.setTotalByteSize(rowGroup.getTotal_byte_size());
                List<ColumnChunk> columns = rowGroup.getColumns();
                validateParquet(!columns.isEmpty(), "No columns in row group: %s", rowGroup);
                String filePath = columns.get(0).getFile_path();
                for (ColumnChunk columnChunk : columns) {
                    validateParquet(
                            (filePath == null && columnChunk.getFile_path() == null)
                                    || (filePath != null && filePath.equals(columnChunk.getFile_path())),
                            "all column chunks of the same row group must be in the same file");
                    ColumnMetaData metaData = columnChunk.meta_data;
                    String[] path = metaData.path_in_schema.toArray(new String[metaData.path_in_schema.size()]);
                    ColumnPath columnPath = ColumnPath.get(path);
                    ColumnChunkMetaData column = ColumnChunkMetaData.get(
                            columnPath,
                            messageType.getType(columnPath.toArray()).asPrimitiveType().getPrimitiveTypeName(),
                            CompressionCodecName.fromParquet(metaData.codec),
                            readEncodings(metaData.encodings),
                            readStats(metaData.statistics, messageType.getType(columnPath.toArray()).asPrimitiveType().getPrimitiveTypeName()),
                            metaData.data_page_offset,
                            metaData.dictionary_page_offset,
                            metaData.num_values,
                            metaData.total_compressed_size,
                            metaData.total_uncompressed_size);
                    blockMetaData.addColumn(column);
                }
                blockMetaData.setPath(filePath);
                blocks.add(blockMetaData);
            }
        }

        Map<String, String> keyValueMetaData = new HashMap<>();
        List<KeyValue> keyValueList = fileMetaData.getKey_value_metadata();
        if (keyValueList != null) {
            for (KeyValue keyValue : keyValueList) {
                keyValueMetaData.put(keyValue.key, keyValue.value);
            }
        }
        return new ParquetMetadata(new parquet.hadoop.metadata.FileMetaData(messageType, keyValueMetaData, fileMetaData.getCreated_by()), blocks);
    }
}
 
示例23
private Optional<ConnectorPageSource> createParaflowPageSource(
        Path path,
        long start,
        long length,
        List<ParaflowColumnHandle> columns)
{
    Optional<FileSystem> fileSystemOptional = fsFactory.getFileSystem();
    FileSystem fileSystem;
    ParquetDataSource dataSource;
    if (fileSystemOptional.isPresent()) {
        fileSystem = fileSystemOptional.get();
    }
    else {
        throw new RuntimeException("Could not find filesystem for path " + path);
    }
    try {
        dataSource = buildHdfsParquetDataSource(fileSystem, path, start, length);
        // default length is file size, which means whole file is a split
        length = dataSource.getSize();
        ParquetMetadata parquetMetadata = ParquetMetadataReader.readFooter(fileSystem, path);
        FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
        MessageType fileSchema = fileMetaData.getSchema();

        List<Type> fields = columns.stream()
                .filter(column -> column.getColType() != ParaflowColumnHandle.ColumnType.NOTVALID)
                .map(column -> getParquetType(column, fileSchema))
                .filter(Objects::nonNull)
                .collect(Collectors.toList());
        MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);

        List<BlockMetaData> blocks = new ArrayList<>();
        for (BlockMetaData block : parquetMetadata.getBlocks()) {
            long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
            if (firstDataPage >= start && firstDataPage < start + length) {
                blocks.add(block);
            }
        }

        ParquetReader parquetReader = new ParquetReader(
                fileSchema,
                requestedSchema,
                blocks,
                dataSource,
                typeManager);
        return Optional.of(new ParaflowPageSource(
                parquetReader,
                dataSource,
                fileSchema,
                requestedSchema,
                length,
                columns,
                typeManager));
    }
    catch (IOException e) {
        log.error(e);
        return Optional.empty();
    }
}
 
示例24
public static LocalMetrics execute (FileStatus[] fileStatuses, ParquetMetadata[] metadatas, String[] columnNames, Configuration conf) throws IOException
{
    boolean printColumns = true;
    List<ParquetFileReader> readers = new ArrayList<ParquetFileReader>();
    List<Column> columns = new ArrayList<Column>();
    for (int i = 0; i < fileStatuses.length; ++i)
    {
        FileStatus status = fileStatuses[i];
        ParquetMetadata metadata = metadatas[i];

        MessageType schema = metadata.getFileMetaData().getSchema();

        List<ColumnDescriptor> columnDescriptors = new ArrayList<ColumnDescriptor>();

        for (String columnName : columnNames)
        {
            int fieldIndex = schema.getFieldIndex(columnName.toLowerCase());
            ColumnDescriptor descriptor = schema.getColumns().get(fieldIndex);

            columnDescriptors.add(descriptor);

            if (printColumns)
            {
                Column column = new Column();
                column.setIndex(fieldIndex);
                column.setName(schema.getFieldName(column.getIndex()));
                column.setDescriptor(descriptor);
                columns.add(column);
            }
        }
        printColumns = false;

        readers.add(new ParquetFileReader(conf, status.getPath(), metadata.getBlocks(), columnDescriptors));
    }

    long time  = System.currentTimeMillis();
    long rowCount = 0;
    long rowGroupCount = 0;
    long readerCount = readers.size();
    for (ParquetFileReader reader : readers)
    {
        PageReadStore pageReadStore;
        while ((pageReadStore = reader.readNextRowGroup()) != null)
        {
            rowGroupCount ++;
            rowCount += pageReadStore.getRowCount();
        }
        reader.close();
    }
    LocalMetrics metrics = new LocalMetrics(columns, readerCount, rowGroupCount, rowCount, System.currentTimeMillis()-time);
    return metrics;
}
 
示例25
public static void setSchema(MessageType schema, Configuration configuration) {
    configuration.set(PARQUET_SCHEMA_PROPERTY_NAME, schema.toString());
}
 
示例26
public static MessageType getSchema(Configuration configuration) {
    return MessageTypeParser.parseMessageType(configuration.get(PARQUET_SCHEMA_PROPERTY_NAME));
}
 
示例27
@Override
public Iterable<Group> convertRecord(MessageType outputSchema, JsonObject inputRecord, WorkUnitState workUnit)
    throws DataConversionException {
  return new SingleRecordIterable<>((Group) recordConverter.convert(inputRecord));
}
 
示例28
/**
 * Build a version-specific {@link ParquetWriter} for given {@link ParquetWriterConfiguration}
 * @param writerConfiguration
 * @return
 * @throws IOException
 */
@Override
public ParquetWriterShim getVersionSpecificWriter(ParquetWriterConfiguration writerConfiguration)
    throws IOException {

  CompressionCodecName codecName = CompressionCodecName.fromConf(writerConfiguration.getCodecName());
  ParquetProperties.WriterVersion writerVersion = ParquetProperties.WriterVersion
      .fromString(writerConfiguration.getWriterVersion());

  Configuration conf = new Configuration();
  ParquetWriter versionSpecificWriter = null;
  switch (writerConfiguration.getRecordFormat()) {
    case GROUP: {
      GroupWriteSupport.setSchema((MessageType) this.schema, conf);
      WriteSupport support = new GroupWriteSupport();
      versionSpecificWriter = new ParquetWriter<Group>(
          writerConfiguration.getAbsoluteStagingFile(),
          support,
          codecName,
          writerConfiguration.getBlockSize(),
          writerConfiguration.getPageSize(),
          writerConfiguration.getDictPageSize(),
          writerConfiguration.isDictionaryEnabled(),
          writerConfiguration.isValidate(),
          writerVersion,
          conf);
      break;
    }
    case AVRO:  {
      versionSpecificWriter = new AvroParquetWriter(
          writerConfiguration.getAbsoluteStagingFile(),
          (Schema) this.schema,
          codecName,
          writerConfiguration.getBlockSize(),
          writerConfiguration.getPageSize(),
          writerConfiguration.isDictionaryEnabled(),
          conf);
      break;
    }
    case PROTOBUF: {
      versionSpecificWriter = new ProtoParquetWriter(
          writerConfiguration.getAbsoluteStagingFile(),
          (Class<? extends Message>) this.schema,
          codecName,
          writerConfiguration.getBlockSize(),
          writerConfiguration.getPageSize(),
          writerConfiguration.isDictionaryEnabled(),
          writerConfiguration.isValidate());
      break;
    }
    default: throw new RuntimeException("Record format not supported");
  }
  ParquetWriter finalVersionSpecificWriter = versionSpecificWriter;

  return new ParquetWriterShim() {
    @Override
    public void write(Object record)
        throws IOException {
      finalVersionSpecificWriter.write(record);
    }

    @Override
    public void close()
        throws IOException {
      finalVersionSpecificWriter.close();
    }
  };
}
 
示例29
@Override
public RecordMaterializer<Group> prepareForRead(Configuration conf, Map<String, String> metaData,
    MessageType schema, ReadContext context) {
  return new GroupRecordConverter(schema);
}
 
示例30
@Override
public RecordMaterializer<RowMetaAndData> prepareForRead( Configuration configuration,
                                                          Map<String, String> keyValueMetaData,
                                                          MessageType fileSchema, ReadContext readContext ) {
  return new ParquetConverter.MyRecordMaterializer( converter );
}