public class ParquetFileWriter extends Object
| Modifier and Type | Class and Description |
|---|---|
static class |
ParquetFileWriter.Mode |
| Modifier and Type | Field and Description |
|---|---|
static int |
CURRENT_VERSION |
static byte[] |
MAGIC |
static String |
PARQUET_COMMON_METADATA_FILE |
static String |
PARQUET_METADATA_FILE |
| Constructor and Description |
|---|
ParquetFileWriter(org.apache.hadoop.conf.Configuration configuration,
MessageType schema,
org.apache.hadoop.fs.Path file) |
ParquetFileWriter(org.apache.hadoop.conf.Configuration configuration,
MessageType schema,
org.apache.hadoop.fs.Path file,
ParquetFileWriter.Mode mode) |
ParquetFileWriter(org.apache.hadoop.conf.Configuration configuration,
MessageType schema,
org.apache.hadoop.fs.Path file,
ParquetFileWriter.Mode mode,
long rowGroupSize,
int maxPaddingSize) |
| Modifier and Type | Method and Description |
|---|---|
void |
appendFile(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file) |
void |
appendRowGroup(org.apache.hadoop.fs.FSDataInputStream from,
BlockMetaData rowGroup,
boolean dropColumns) |
void |
appendRowGroup(SeekableInputStream from,
BlockMetaData rowGroup,
boolean dropColumns) |
void |
appendRowGroups(org.apache.hadoop.fs.FSDataInputStream file,
List<BlockMetaData> rowGroups,
boolean dropColumns) |
void |
appendRowGroups(SeekableInputStream file,
List<BlockMetaData> rowGroups,
boolean dropColumns) |
void |
end(Map<String,String> extraMetaData)
ends a file once all blocks have been written.
|
void |
endBlock()
ends a block once all column chunks have been written
|
void |
endColumn()
end a column (once all rep, def and data have been written)
|
long |
getNextRowGroupSize() |
long |
getPos() |
void |
start()
start the file
|
void |
startBlock(long recordCount)
start a block
|
void |
startColumn(ColumnDescriptor descriptor,
long valueCount,
CompressionCodecName compressionCodecName)
start a column inside a block
|
void |
writeDataPage(int valueCount,
int uncompressedPageSize,
org.apache.parquet.bytes.BytesInput bytes,
Encoding rlEncoding,
Encoding dlEncoding,
Encoding valuesEncoding)
Deprecated.
|
void |
writeDataPage(int valueCount,
int uncompressedPageSize,
org.apache.parquet.bytes.BytesInput bytes,
Statistics statistics,
Encoding rlEncoding,
Encoding dlEncoding,
Encoding valuesEncoding)
writes a single page
|
void |
writeDictionaryPage(DictionaryPage dictionaryPage)
writes a dictionary page page
|
static void |
writeMetadataFile(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path outputPath,
List<Footer> footers)
writes a _metadata and _common_metadata file
|
public static final String PARQUET_METADATA_FILE
public static final String PARQUET_COMMON_METADATA_FILE
public static final byte[] MAGIC
public static final int CURRENT_VERSION
public ParquetFileWriter(org.apache.hadoop.conf.Configuration configuration,
MessageType schema,
org.apache.hadoop.fs.Path file)
throws IOException
configuration - Hadoop configurationschema - the schema of the datafile - the file to write toIOException - if the file can not be createdpublic ParquetFileWriter(org.apache.hadoop.conf.Configuration configuration,
MessageType schema,
org.apache.hadoop.fs.Path file,
ParquetFileWriter.Mode mode)
throws IOException
configuration - Hadoop configurationschema - the schema of the datafile - the file to write tomode - file creation modeIOException - if the file can not be createdpublic ParquetFileWriter(org.apache.hadoop.conf.Configuration configuration,
MessageType schema,
org.apache.hadoop.fs.Path file,
ParquetFileWriter.Mode mode,
long rowGroupSize,
int maxPaddingSize)
throws IOException
configuration - Hadoop configurationschema - the schema of the datafile - the file to write tomode - file creation moderowGroupSize - the row group sizeIOException - if the file can not be createdpublic void start()
throws IOException
IOExceptionpublic void startBlock(long recordCount)
throws IOException
recordCount - the record count in this blockIOExceptionpublic void startColumn(ColumnDescriptor descriptor, long valueCount, CompressionCodecName compressionCodecName) throws IOException
descriptor - the column descriptorvalueCount - the value count in this columncompressionCodecName - IOExceptionpublic void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException
dictionaryPage - the dictionary pageIOException@Deprecated public void writeDataPage(int valueCount, int uncompressedPageSize, org.apache.parquet.bytes.BytesInput bytes, Encoding rlEncoding, Encoding dlEncoding, Encoding valuesEncoding) throws IOException
valueCount - count of valuesuncompressedPageSize - the size of the data once uncompressedbytes - the compressed data for the page without headerrlEncoding - encoding of the repetition leveldlEncoding - encoding of the definition levelvaluesEncoding - encoding of valuesIOExceptionpublic void writeDataPage(int valueCount,
int uncompressedPageSize,
org.apache.parquet.bytes.BytesInput bytes,
Statistics statistics,
Encoding rlEncoding,
Encoding dlEncoding,
Encoding valuesEncoding)
throws IOException
valueCount - count of valuesuncompressedPageSize - the size of the data once uncompressedbytes - the compressed data for the page without headerrlEncoding - encoding of the repetition leveldlEncoding - encoding of the definition levelvaluesEncoding - encoding of valuesIOExceptionpublic void endColumn()
throws IOException
IOExceptionpublic void endBlock()
throws IOException
IOExceptionpublic void appendFile(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file)
throws IOException
IOExceptionpublic void appendRowGroups(org.apache.hadoop.fs.FSDataInputStream file,
List<BlockMetaData> rowGroups,
boolean dropColumns)
throws IOException
IOExceptionpublic void appendRowGroups(SeekableInputStream file, List<BlockMetaData> rowGroups, boolean dropColumns) throws IOException
IOExceptionpublic void appendRowGroup(org.apache.hadoop.fs.FSDataInputStream from,
BlockMetaData rowGroup,
boolean dropColumns)
throws IOException
IOExceptionpublic void appendRowGroup(SeekableInputStream from, BlockMetaData rowGroup, boolean dropColumns) throws IOException
IOExceptionpublic void end(Map<String,String> extraMetaData) throws IOException
extraMetaData - the extra meta data to write in the footerIOExceptionpublic static void writeMetadataFile(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path outputPath,
List<Footer> footers)
throws IOException
configuration - the configuration to use to get the FileSystemoutputPath - the directory to write the _metadata file tofooters - the list of footers to mergeIOExceptionpublic long getPos()
throws IOException
IOExceptionpublic long getNextRowGroupSize()
throws IOException
IOExceptionCopyright © 2018 The Apache Software Foundation. All rights reserved.