Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion core/src/main/java/org/apache/iceberg/CatalogUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@

public class CatalogUtil {
private static final Logger LOG = LoggerFactory.getLogger(CatalogUtil.class);

/**
* Shortcut catalog property to load a catalog implementation through a short type name,
* instead of specifying a full java class through {@link CatalogProperties#CATALOG_IMPL}.
* Currently the following type to implementation mappings are supported:
* <ul>
* <li>hive: org.apache.iceberg.hive.HiveCatalog</li>
* <li>hadoop: org.apache.iceberg.hadoop.HadoopCatalog</li>
* </ul>
*/
public static final String ICEBERG_CATALOG_TYPE = "type";
public static final String ICEBERG_CATALOG_TYPE_HADOOP = "hadoop";
public static final String ICEBERG_CATALOG_TYPE_HIVE = "hive";
Expand Down Expand Up @@ -184,10 +194,22 @@ public static Catalog loadCatalog(
return catalog;
}

/**
* Build an Iceberg {@link Catalog} based on a map of catalog properties and optional Hadoop configuration.
* <p>
* This method examines both the {@link #ICEBERG_CATALOG_TYPE} and {@link CatalogProperties#CATALOG_IMPL} properties
* to determine the catalog implementation to load.
* If nothing is specified for both properties, Hive catalog will be loaded by default.
*
* @param name catalog name
* @param options catalog properties
* @param conf Hadoop configuration
* @return initialized catalog
*/
public static Catalog buildIcebergCatalog(String name, Map<String, String> options, Configuration conf) {
String catalogImpl = options.get(CatalogProperties.CATALOG_IMPL);
if (catalogImpl == null) {
String catalogType = options.getOrDefault(ICEBERG_CATALOG_TYPE, ICEBERG_CATALOG_TYPE_HIVE);
String catalogType = PropertyUtil.propertyAsString(options, ICEBERG_CATALOG_TYPE, ICEBERG_CATALOG_TYPE_HIVE);
switch (catalogType.toLowerCase(Locale.ENGLISH)) {
case ICEBERG_CATALOG_TYPE_HIVE:
catalogImpl = ICEBERG_CATALOG_HIVE;
Expand Down
69 changes: 39 additions & 30 deletions mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import org.apache.iceberg.Table;
import org.apache.iceberg.catalog.Catalog;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.exceptions.NoSuchNamespaceException;
import org.apache.iceberg.hadoop.HadoopTables;
import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
Expand All @@ -45,20 +44,35 @@
/**
* Class for catalog resolution and accessing the common functions for {@link Catalog} API.
* <p>
* See {@link Catalogs#getCatalogType(Configuration, String)} for catalog type resolution strategy.
* If the catalog name is provided, get the catalog type from iceberg.catalog.<code>catalogName</code>.type config.
* <p>
* In case the catalog name is {@link #ICEBERG_HADOOP_TABLE_NAME location_based_table},
* type is ignored and tables will be loaded using {@link HadoopTables}.
* <p>
* In case the value of catalog type is null, iceberg.catalog.<code>catalogName</code>.catalog-impl config
* is used to determine the catalog implementation class.
* <p>
* If catalog name is null, get the catalog type from {@link InputFormatConfig#CATALOG iceberg.mr.catalog} config:
* <ul>
* <li>hive: HiveCatalog</li>
* <li>location: HadoopTables</li>
* <li>hadoop: HadoopCatalog</li>
* </ul>
* <p>
* In case the value of catalog type is null,
* {@link InputFormatConfig#CATALOG_LOADER_CLASS iceberg.mr.catalog.loader.class} is used to determine
* the catalog implementation class.
* <p>
* Note: null catalog name mode is only supported for backwards compatibility. Using this mode is NOT RECOMMENDED.
*/
public final class Catalogs {

public static final String ICEBERG_DEFAULT_CATALOG_NAME = "default_iceberg";
public static final String ICEBERG_HADOOP_TABLE_NAME = "location_based_table";

private static final String HIVE_CATALOG_TYPE = "hive";
private static final String HADOOP_CATALOG_TYPE = "hadoop";
private static final String NO_CATALOG_TYPE = "no catalog";

public static final String NAME = "name";
public static final String LOCATION = "location";

private static final String NO_CATALOG_TYPE = "no catalog";
private static final Set<String> PROPERTIES_TO_REMOVE =
ImmutableSet.of(InputFormatConfig.TABLE_SCHEMA, InputFormatConfig.PARTITION_SPEC, LOCATION, NAME,
InputFormatConfig.CATALOG_NAME);
Expand Down Expand Up @@ -194,10 +208,6 @@ public static boolean hiveCatalog(Configuration conf, Properties props) {
@VisibleForTesting
static Optional<Catalog> loadCatalog(Configuration conf, String catalogName) {
String catalogType = getCatalogType(conf, catalogName);
if (catalogType == null) {
throw new NoSuchNamespaceException("Catalog definition for %s is not found.", catalogName);
}

if (NO_CATALOG_TYPE.equalsIgnoreCase(catalogType)) {
return Optional.empty();
} else {
Expand Down Expand Up @@ -233,45 +243,44 @@ private static Map<String, String> getCatalogProperties(Configuration conf, Stri
*/
private static Map<String, String> addCatalogPropertiesIfMissing(Configuration conf, String catalogType,
Map<String, String> catalogProperties) {
catalogProperties.putIfAbsent(CatalogUtil.ICEBERG_CATALOG_TYPE, catalogType);
if (catalogType.equalsIgnoreCase(HADOOP_CATALOG_TYPE)) {
catalogProperties.putIfAbsent(CatalogProperties.WAREHOUSE_LOCATION,
conf.get(InputFormatConfig.HADOOP_CATALOG_WAREHOUSE_LOCATION));
if (catalogType != null) {
catalogProperties.putIfAbsent(CatalogUtil.ICEBERG_CATALOG_TYPE, catalogType);
}

String legacyCatalogImpl = conf.get(InputFormatConfig.CATALOG_LOADER_CLASS);
if (legacyCatalogImpl != null) {
catalogProperties.putIfAbsent(CatalogProperties.CATALOG_IMPL, legacyCatalogImpl);
}

String legacyWarehouseLocation = conf.get(InputFormatConfig.HADOOP_CATALOG_WAREHOUSE_LOCATION);
if (legacyWarehouseLocation != null) {
catalogProperties.putIfAbsent(CatalogProperties.WAREHOUSE_LOCATION, legacyWarehouseLocation);
}
return catalogProperties;
}

/**
* Return the catalog type based on the catalog name.
* <p>
* If the catalog name is provided get the catalog type from 'iceberg.catalog.<code>catalogName</code>.type' config.
* In case the value of this property is null, return with no catalog definition (Hadoop Table)
* </p>
* <p>
* If catalog name is null, check the global conf for 'iceberg.mr.catalog' property. If the value of the property is:
* <ul>
* <li>null/hive -> Hive Catalog</li>
* <li>location -> Hadoop Table</li>
* <li>hadoop -> Hadoop Catalog</li>
* <li>any other value -> Custom Catalog</li>
* </ul>
* </p>
* See {@link Catalogs} documentation for catalog type resolution strategy.
*
* @param conf global hive configuration
* @param catalogName name of the catalog
* @return type of the catalog, can be null
*/
private static String getCatalogType(Configuration conf, String catalogName) {
if (catalogName != null) {
String catalogType = conf.get(String.format(InputFormatConfig.CATALOG_TYPE_TEMPLATE, catalogName));
if (catalogName.equals(ICEBERG_HADOOP_TABLE_NAME) || catalogType == null) {
String catalogType = conf.get(InputFormatConfig.catalogPropertyConfigKey(
catalogName, CatalogUtil.ICEBERG_CATALOG_TYPE));
if (catalogName.equals(ICEBERG_HADOOP_TABLE_NAME)) {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to update the javadocs of this method according to this change?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved the doc of this method to Catalogs class level, because private method docs are not generated in html.

return NO_CATALOG_TYPE;
} else {
return catalogType;
}
} else {
String catalogType = conf.get(InputFormatConfig.CATALOG);
if (catalogType == null) {
return HIVE_CATALOG_TYPE;
return CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE;
} else if (catalogType.equals(LOCATION)) {
return NO_CATALOG_TYPE;
} else {
Expand Down
27 changes: 20 additions & 7 deletions mr/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,20 +51,24 @@ private InputFormatConfig() {
public static final String LOCALITY = "iceberg.mr.locality";

/**
* @deprecated please use {@link InputFormatConfig#CATALOG_TYPE_TEMPLATE} to specify the type of a catalog,
* and set {@link InputFormatConfig#CATALOG_NAME} in table property.
* @deprecated please use {@link #catalogPropertyConfigKey(String, String)}
* with config key {@link org.apache.iceberg.CatalogUtil#ICEBERG_CATALOG_TYPE} to specify the type of a catalog.
*/
@Deprecated
public static final String CATALOG = "iceberg.mr.catalog";

/**
* @deprecated please use {@link InputFormatConfig#CATALOG_WAREHOUSE_TEMPLATE} to specify the warehouse location.
* @deprecated please use {@link #catalogPropertyConfigKey(String, String)}
* with config key {@link org.apache.iceberg.CatalogProperties#WAREHOUSE_LOCATION}
* to specify the warehouse location of a catalog.
*/
@Deprecated
public static final String HADOOP_CATALOG_WAREHOUSE_LOCATION = "iceberg.mr.catalog.hadoop.warehouse.location";

/**
* @deprecated please use {@link InputFormatConfig#CATALOG_CLASS_TEMPLATE} to set catalog implementation.
* @deprecated please use {@link #catalogPropertyConfigKey(String, String)}
* with config key {@link org.apache.iceberg.CatalogProperties#CATALOG_IMPL}
* to specify the implementation of a catalog.
*/
@Deprecated
public static final String CATALOG_LOADER_CLASS = "iceberg.mr.catalog.loader.class";
Expand All @@ -91,9 +95,6 @@ private InputFormatConfig() {
public static final String SNAPSHOT_TABLE_SUFFIX = "__snapshots";

public static final String CATALOG_CONFIG_PREFIX = "iceberg.catalog.";
public static final String CATALOG_TYPE_TEMPLATE = "iceberg.catalog.%s.type";
public static final String CATALOG_WAREHOUSE_TEMPLATE = "iceberg.catalog.%s.warehouse";
public static final String CATALOG_CLASS_TEMPLATE = "iceberg.catalog.%s.catalog-impl";

public enum InMemoryDataModel {
PIG,
Expand Down Expand Up @@ -220,6 +221,18 @@ public static String[] selectedColumns(Configuration conf) {
return readColumns != null && readColumns.length > 0 ? readColumns : null;
}

/**
* Get Hadoop config key of a catalog property based on catalog name
* @param catalogName catalog name
* @param catalogProperty catalog property, can be any custom property,
* a commonly used list of properties can be found
* at {@link org.apache.iceberg.CatalogProperties}
* @return Hadoop config key of a catalog property for the catalog name
*/
public static String catalogPropertyConfigKey(String catalogName, String catalogProperty) {
return String.format("%s%s.%s", CATALOG_CONFIG_PREFIX, catalogName, catalogProperty);
}

private static Schema schema(Configuration conf, String key) {
String json = conf.get(key);
return json == null ? null : SchemaParser.fromJson(json);
Expand Down
Loading