public class ParquetFileReader extends Object implements Closeable
| Modifier and Type | Field and Description |
|---|---|
static String |
PARQUET_READ_PARALLELISM |
| Constructor and Description |
|---|
ParquetFileReader(org.apache.hadoop.conf.Configuration configuration,
FileMetaData fileMetaData,
org.apache.hadoop.fs.Path filePath,
List<BlockMetaData> blocks,
List<ColumnDescriptor> columns)
Deprecated.
|
ParquetFileReader(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path filePath,
List<BlockMetaData> blocks,
List<ColumnDescriptor> columns)
Deprecated.
use @link{ParquetFileReader(Configuration configuration, FileMetaData fileMetaData,
Path filePath, List
|
ParquetFileReader(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file,
ParquetMetadata footer) |
ParquetFileReader(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file,
ParquetMetadataConverter.MetadataFilter filter) |
| Modifier and Type | Method and Description |
|---|---|
void |
appendTo(ParquetFileWriter writer) |
void |
close() |
org.apache.parquet.hadoop.DictionaryPageReader |
getDictionaryReader(BlockMetaData block) |
FileMetaData |
getFileMetaData() |
ParquetMetadata |
getFooter() |
DictionaryPageReadStore |
getNextDictionaryReader()
Returns a
DictionaryPageReadStore for the row group that would be
returned by calling readNextRowGroup() or skipped by calling
skipNextRowGroup(). |
org.apache.hadoop.fs.Path |
getPath() |
long |
getRecordCount() |
List<BlockMetaData> |
getRowGroups() |
static ParquetFileReader |
open(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file) |
static ParquetFileReader |
open(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file,
ParquetMetadata footer) |
static ParquetFileReader |
open(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file,
ParquetMetadataConverter.MetadataFilter filter) |
static List<Footer> |
readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus fileStatus)
Read the footers of all the files under that path (recursively)
not using summary files.
|
static List<Footer> |
readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration,
List<org.apache.hadoop.fs.FileStatus> partFiles)
Deprecated.
|
static List<Footer> |
readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration,
List<org.apache.hadoop.fs.FileStatus> partFiles,
boolean skipRowGroups)
read all the footers of the files provided
(not using summary files)
|
static List<Footer> |
readAllFootersInParallelUsingSummaryFiles(org.apache.hadoop.conf.Configuration configuration,
Collection<org.apache.hadoop.fs.FileStatus> partFiles,
boolean skipRowGroups)
for files provided, check if there's a summary file.
|
static List<Footer> |
readAllFootersInParallelUsingSummaryFiles(org.apache.hadoop.conf.Configuration configuration,
List<org.apache.hadoop.fs.FileStatus> partFiles)
Deprecated.
|
static ParquetMetadata |
readFooter(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus file)
Deprecated.
use
ParquetFileReader#readFooter(Configuration, FileStatus, MetadataFilter) |
static ParquetMetadata |
readFooter(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus file,
ParquetMetadataConverter.MetadataFilter filter)
Reads the meta data block in the footer of the file
|
static ParquetMetadata |
readFooter(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path file)
Deprecated.
|
static ParquetMetadata |
readFooter(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path file,
ParquetMetadataConverter.MetadataFilter filter)
Reads the meta data in the footer of the file.
|
static ParquetMetadata |
readFooter(InputFile file,
ParquetMetadataConverter.MetadataFilter filter)
Reads the meta data block in the footer of the file using provided input stream
|
static List<Footer> |
readFooters(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus pathStatus)
Deprecated.
|
static List<Footer> |
readFooters(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus pathStatus,
boolean skipRowGroups)
Read the footers of all the files under that path (recursively)
using summary files if possible
|
static List<Footer> |
readFooters(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path path)
Deprecated.
|
PageReadStore |
readNextRowGroup()
Reads all the columns requested from the row group at the current file position.
|
static List<Footer> |
readSummaryFile(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus summaryStatus)
Specifically reads a given summary file
|
void |
setRequestedSchema(MessageType projection) |
boolean |
skipNextRowGroup() |
public static String PARQUET_READ_PARALLELISM
public ParquetFileReader(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path filePath,
List<BlockMetaData> blocks,
List<ColumnDescriptor> columns)
throws IOException
IOException@Deprecated public ParquetFileReader(org.apache.hadoop.conf.Configuration configuration, FileMetaData fileMetaData, org.apache.hadoop.fs.Path filePath, List<BlockMetaData> blocks, List<ColumnDescriptor> columns) throws IOException
configuration - the Hadoop conffileMetaData - fileMetaData for parquet fileblocks - the blocks to readcolumns - the columns to read (their path)IOException - if the file can not be openedpublic ParquetFileReader(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file,
ParquetMetadataConverter.MetadataFilter filter)
throws IOException
conf - the Hadoop Configurationfile - Path to a parquet filefilter - a ParquetMetadataConverter.MetadataFilter for selecting row groupsIOException - if the file can not be openedpublic ParquetFileReader(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file,
ParquetMetadata footer)
throws IOException
conf - the Hadoop Configurationfile - Path to a parquet filefooter - a ParquetMetadata footer already read from the fileIOException - if the file can not be opened@Deprecated public static List<Footer> readAllFootersInParallelUsingSummaryFiles(org.apache.hadoop.conf.Configuration configuration, List<org.apache.hadoop.fs.FileStatus> partFiles) throws IOException
configuration - the hadoop conf to connect to the file system;partFiles - the part files to readIOExceptionpublic static List<Footer> readAllFootersInParallelUsingSummaryFiles(org.apache.hadoop.conf.Configuration configuration, Collection<org.apache.hadoop.fs.FileStatus> partFiles, boolean skipRowGroups) throws IOException
configuration - the hadoop conf to connect to the file system;partFiles - the part files to readskipRowGroups - to skipRowGroups in the footersIOException@Deprecated public static List<Footer> readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration, List<org.apache.hadoop.fs.FileStatus> partFiles) throws IOException
IOExceptionpublic static List<Footer> readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration, List<org.apache.hadoop.fs.FileStatus> partFiles, boolean skipRowGroups) throws IOException
configuration - the conf to access the File SystempartFiles - the files to readskipRowGroups - to skip the rowGroup infoIOExceptionpublic static List<Footer> readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus fileStatus) throws IOException
configuration - the configuration to access the FSfileStatus - the root dirIOException@Deprecated public static List<Footer> readFooters(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.Path path) throws IOException
IOException@Deprecated public static List<Footer> readFooters(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus pathStatus) throws IOException
configuration - pathStatus - IOExceptionpublic static List<Footer> readFooters(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus pathStatus, boolean skipRowGroups) throws IOException
configuration - the configuration to access the FSfileStatus - the root dirIOExceptionpublic static List<Footer> readSummaryFile(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus summaryStatus) throws IOException
configuration - summaryStatus - IOException@Deprecated public static final ParquetMetadata readFooter(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.Path file) throws IOException
configuration - file - the parquet FileIOException - if an error occurs while reading the filepublic static ParquetMetadata readFooter(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.Path file, ParquetMetadataConverter.MetadataFilter filter) throws IOException
configuration - file - the Parquet Filefilter - the filter to apply to row groupsIOException - if an error occurs while reading the file@Deprecated public static final ParquetMetadata readFooter(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus file) throws IOException
ParquetFileReader#readFooter(Configuration, FileStatus, MetadataFilter)IOExceptionpublic static final ParquetMetadata readFooter(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus file, ParquetMetadataConverter.MetadataFilter filter) throws IOException
configuration - file - the parquet Filefilter - the filter to apply to row groupsIOException - if an error occurs while reading the filepublic static final ParquetMetadata readFooter(InputFile file, ParquetMetadataConverter.MetadataFilter filter) throws IOException
file - a InputFile to readfilter - the filter to apply to row groupsIOException - if an error occurs while reading the filepublic static ParquetFileReader open(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path file) throws IOException
IOExceptionpublic static ParquetFileReader open(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path file, ParquetMetadataConverter.MetadataFilter filter) throws IOException
IOExceptionpublic static ParquetFileReader open(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path file, ParquetMetadata footer) throws IOException
IOExceptionpublic ParquetMetadata getFooter()
public FileMetaData getFileMetaData()
public long getRecordCount()
public org.apache.hadoop.fs.Path getPath()
public List<BlockMetaData> getRowGroups()
public void setRequestedSchema(MessageType projection)
public void appendTo(ParquetFileWriter writer) throws IOException
IOExceptionpublic PageReadStore readNextRowGroup() throws IOException
IOException - if an error occurs while readingpublic boolean skipNextRowGroup()
public DictionaryPageReadStore getNextDictionaryReader()
DictionaryPageReadStore for the row group that would be
returned by calling readNextRowGroup() or skipped by calling
skipNextRowGroup().public org.apache.parquet.hadoop.DictionaryPageReader getDictionaryReader(BlockMetaData block)
public void close()
throws IOException
close in interface Closeableclose in interface AutoCloseableIOExceptionCopyright © 2018 The Apache Software Foundation. All rights reserved.