diff --git a/assembly/pom.xml b/assembly/pom.xml index 885319bab45..0dff52c07e3 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -58,12 +58,12 @@ org.apache.carbondata - carbondata-spark_${spark.binary.version} + carbondata-spark ${project.version} org.apache.carbondata - carbondata-mv-plan_${spark.binary.version} + carbondata-mv-plan ${project.version} diff --git a/common/pom.xml b/common/pom.xml index b4be1c5c912..d6b33868a33 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -51,7 +51,8 @@ org.apache.hadoop - hadoop-common + hadoop-client-api + ${hadoop.version} diff --git a/core/pom.xml b/core/pom.xml index cd0ee2cfbc3..03a21cf269e 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -52,7 +52,38 @@ org.apache.hadoop - hadoop-client + hadoop-client-runtime + ${hadoop.version} + + + commons-codec + commons-codec + 1.16.0 + + + commons-io + commons-io + 2.13.0 + + + com.google.guava + guava + 14.0.1 + + + commons-collections + commons-collections + 3.2.2 + + + org.apache.commons + commons-compress + 1.23.0 + + + com.google.code.findbugs + jsr305 + 3.0.0 org.xerial.snappy @@ -67,7 +98,7 @@ com.github.luben zstd-jni - 1.3.2-2 + 1.5.5-4 org.jmockit @@ -82,7 +113,7 @@ org.apache.zookeeper zookeeper - 3.4.7 + 3.6.3 jline @@ -97,7 +128,7 @@ org.apache.commons commons-lang3 - 3.5 + 3.12.0 org.roaringbitmap @@ -112,12 +143,12 @@ io.netty netty-all - 4.1.17.Final + 4.1.96.Final org.lz4 lz4-java - 1.4.0 + 1.8.0 diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java index 023137e815f..73c9339964d 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java @@ -2659,18 +2659,6 @@ private CarbonCommonConstants() { public static final String CARBON_SDK_EMPTY_METADATA_PATH = "emptyMetadataFolder"; - /** - * Property to identify if the spark version is above 3.x version - */ - public static final String CARBON_SPARK_VERSION_SPARK3 = "carbon.spark.version.spark3"; - - public static final String CARBON_SPARK_VERSION_SPARK3_DEFAULT = "false"; - - /** - * Carbon Spark 3.x supported data file written version - */ - public static final String CARBON_SPARK3_VERSION = "2.2.0"; - /** * This property is to enable the min max pruning of target carbon table based on input/source * data diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/DimensionChunkReaderV3.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/DimensionChunkReaderV3.java index b42e0b2661c..02c3ea3f62e 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/DimensionChunkReaderV3.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/DimensionChunkReaderV3.java @@ -49,7 +49,7 @@ import org.apache.carbondata.format.DataChunk3; import org.apache.carbondata.format.Encoding; -import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang3.ArrayUtils; /** * Dimension column V3 Reader class which will be used to read and uncompress diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/v3/MeasureChunkReaderV3.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/v3/MeasureChunkReaderV3.java index 81e5cdad422..bef84ab2937 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/v3/MeasureChunkReaderV3.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/v3/MeasureChunkReaderV3.java @@ -38,7 +38,7 @@ import org.apache.carbondata.format.DataChunk3; import org.apache.carbondata.format.Encoding; -import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang3.ArrayUtils; /** * Measure column V3 Reader class which will be used to read and uncompress diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/impl/FileFactory.java b/core/src/main/java/org/apache/carbondata/core/datastore/impl/FileFactory.java index b2d6e8309ca..cc77cac552b 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/impl/FileFactory.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/impl/FileFactory.java @@ -45,7 +45,7 @@ import org.apache.carbondata.core.util.ThreadLocalSessionInfo; import org.apache.commons.io.FileUtils; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; diff --git a/core/src/main/java/org/apache/carbondata/core/index/IndexStoreManager.java b/core/src/main/java/org/apache/carbondata/core/index/IndexStoreManager.java index cdf960e9224..38bf6649fb7 100644 --- a/core/src/main/java/org/apache/carbondata/core/index/IndexStoreManager.java +++ b/core/src/main/java/org/apache/carbondata/core/index/IndexStoreManager.java @@ -45,7 +45,7 @@ import org.apache.carbondata.core.util.CarbonProperties; import org.apache.commons.collections.CollectionUtils; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.Path; import org.apache.log4j.Logger; diff --git a/core/src/main/java/org/apache/carbondata/core/locks/CarbonLockUtil.java b/core/src/main/java/org/apache/carbondata/core/locks/CarbonLockUtil.java index 6b6e607f4a6..90453f9ed54 100644 --- a/core/src/main/java/org/apache/carbondata/core/locks/CarbonLockUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/locks/CarbonLockUtil.java @@ -27,7 +27,7 @@ import org.apache.carbondata.core.util.CarbonProperties; import org.apache.carbondata.core.util.path.CarbonTablePath; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Logger; /** diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/IndexSchema.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/IndexSchema.java index c128271e633..f00886dbb0f 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/IndexSchema.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/IndexSchema.java @@ -31,7 +31,7 @@ import static org.apache.carbondata.core.constants.CarbonCommonConstants.INDEX_COLUMNS; import com.google.gson.annotations.SerializedName; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * It is the new schema of index and it has less fields compare to {{@link IndexSchema}} diff --git a/core/src/main/java/org/apache/carbondata/core/reader/CarbonIndexFileReader.java b/core/src/main/java/org/apache/carbondata/core/reader/CarbonIndexFileReader.java index b99b13bfe1e..9639e3b1597 100644 --- a/core/src/main/java/org/apache/carbondata/core/reader/CarbonIndexFileReader.java +++ b/core/src/main/java/org/apache/carbondata/core/reader/CarbonIndexFileReader.java @@ -87,7 +87,7 @@ public void openThriftReader(String filePath) throws IOException { * * @param fileData */ - public void openThriftReader(byte[] fileData) { + public void openThriftReader(byte[] fileData) throws IOException { thriftReader = new ThriftReader(fileData); } diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/ImplicitExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/ImplicitExpression.java index ca58c53bf7b..7a388d63fa9 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/ImplicitExpression.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/ImplicitExpression.java @@ -32,7 +32,7 @@ import org.apache.carbondata.core.scan.filter.intf.RowIntf; import org.apache.carbondata.core.util.path.CarbonTablePath; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * Custom class to handle filter values for Implicit filter diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java index bf01c81795f..6905f6dd02d 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java @@ -117,9 +117,9 @@ import com.google.gson.GsonBuilder; import org.apache.commons.codec.binary.Base64; import org.apache.commons.io.input.ClassLoaderObjectInputStream; -import org.apache.commons.lang.ArrayUtils; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringEscapeUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java index 17d560169fd..4c82a4407fd 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java @@ -544,50 +544,10 @@ private static long createTimeInstant(String dimensionValue, String dateFormat) } private static Object parseTimestamp(String dimensionValue, String dateFormat) { - Date dateToStr; - DateFormat dateFormatter = null; - long timeValue; try { - if (Boolean.parseBoolean(CarbonProperties.getInstance() - .getProperty(CarbonCommonConstants.CARBON_SPARK_VERSION_SPARK3, - CarbonCommonConstants.CARBON_SPARK_VERSION_SPARK3_DEFAULT))) { - try { - return createTimeInstant(dimensionValue, dateFormat.trim()); - } catch (DateTimeParseException e) { - throw new NumberFormatException(e.getMessage()); - } - } - if (null != dateFormat && !dateFormat.trim().isEmpty()) { - dateFormatter = new SimpleDateFormat(dateFormat); - dateFormatter.setLenient(false); - } else { - dateFormatter = timestampFormatter.get(); - } - dateToStr = dateFormatter.parse(dimensionValue); - timeValue = dateToStr.getTime(); - validateTimeStampRange(timeValue); - return timeValue; - } catch (ParseException e) { - // If the parsing fails, try to parse again with setLenient to true if the property is set - // (example: 1941-03-15 00:00:00 is invalid data and will fail to parse in Asia/Shanghai zone - // as DST is observed and clocks were turned forward 1 hour to 1941-03-15 01:00:00) - if (CarbonProperties.getInstance().isSetLenientEnabled()) { - try { - dateFormatter.setLenient(true); - dateToStr = dateFormatter.parse(dimensionValue); - timeValue = dateToStr.getTime(); - validateTimeStampRange(timeValue); - LOGGER.info("Parsed data with lenience as true, setting back to default mode"); - return timeValue; - } catch (ParseException ex) { - LOGGER.info("Failed to parse data with lenience as true, setting back to default mode"); - throw new NumberFormatException(ex.getMessage()); - } finally { - dateFormatter.setLenient(false); - } - } else { - throw new NumberFormatException(e.getMessage()); - } + return createTimeInstant(dimensionValue, dateFormat.trim()); + } catch (DateTimeParseException e) { + throw new NumberFormatException(e.getMessage()); } } diff --git a/docs/sdk-guide.md b/docs/sdk-guide.md index fdd689cfef5..1f50139943b 100644 --- a/docs/sdk-guide.md +++ b/docs/sdk-guide.md @@ -95,7 +95,7 @@ import org.apache.carbondata.sdk.file.CarbonWriter; import org.apache.carbondata.core.metadata.datatype.Field; import org.apache.avro.generic.GenericData; -import org.apache.commons.lang.CharEncoding; +import org.apache.commons.lang3.CharEncoding; import tech.allegro.schema.json2avro.converter.JsonAvroConverter; diff --git a/examples/flink/pom.xml b/examples/flink/pom.xml index a8f5bba38d0..035a9352838 100644 --- a/examples/flink/pom.xml +++ b/examples/flink/pom.xml @@ -63,7 +63,7 @@ org.apache.carbondata - carbondata-spark_${spark.binary.version} + carbondata-spark ${project.version} diff --git a/examples/spark/pom.xml b/examples/spark/pom.xml index 9a891487c24..aa6c6c95ac3 100644 --- a/examples/spark/pom.xml +++ b/examples/spark/pom.xml @@ -36,7 +36,7 @@ org.apache.carbondata - carbondata-spark_${spark.binary.version} + carbondata-spark ${project.version} @@ -157,6 +157,7 @@ com.ning.maven.plugins maven-duplicate-finder-plugin + 1.0.9 true @@ -198,27 +199,5 @@ true - - spark-2.3 - - 2.3 - - - - spark-2.4 - - true - - - 2.4 - - - - spark-3.1 - - 3.1 - 2.10.0 - - diff --git a/geo/pom.xml b/geo/pom.xml index 342d737c6b0..545ced50666 100644 --- a/geo/pom.xml +++ b/geo/pom.xml @@ -44,7 +44,7 @@ org.apache.commons commons-lang3 - 3.5 + 3.12.0 org.scalatest @@ -126,6 +126,7 @@ com.ning.maven.plugins maven-duplicate-finder-plugin + 1.0.9 true diff --git a/geo/src/main/java/org/apache/carbondata/geo/scan/expression/PolygonListExpression.java b/geo/src/main/java/org/apache/carbondata/geo/scan/expression/PolygonListExpression.java index 3e22dd3f136..9ff64cff44c 100644 --- a/geo/src/main/java/org/apache/carbondata/geo/scan/expression/PolygonListExpression.java +++ b/geo/src/main/java/org/apache/carbondata/geo/scan/expression/PolygonListExpression.java @@ -28,7 +28,7 @@ import org.apache.carbondata.geo.GeoHashUtils; import org.apache.carbondata.geo.GeoOperationType; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * InPolygonList expression processor. It inputs the InPolygonList string to the Geo diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonFileInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonFileInputFormat.java index 13e1ef1a1ad..ea111af99cc 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonFileInputFormat.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonFileInputFormat.java @@ -48,7 +48,7 @@ import org.apache.carbondata.hadoop.CarbonInputSplit; import org.apache.commons.collections.CollectionUtils; -import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang3.ArrayUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.JobContext; diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java index fe9dc1e3cf8..70f363bbcc6 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java @@ -67,7 +67,7 @@ import org.apache.carbondata.hadoop.CarbonInputSplit; import com.google.common.collect.Sets; -import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang3.ArrayUtils; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; diff --git a/index/bloom/pom.xml b/index/bloom/pom.xml index 632ce015490..6bfe4831ace 100644 --- a/index/bloom/pom.xml +++ b/index/bloom/pom.xml @@ -24,7 +24,7 @@ com.google.guava guava - 14.0.1 + 20.0 org.scalatest diff --git a/index/examples/pom.xml b/index/examples/pom.xml index 80e778b9db4..d43075ccbc7 100644 --- a/index/examples/pom.xml +++ b/index/examples/pom.xml @@ -36,7 +36,7 @@ org.apache.carbondata - carbondata-spark_${spark.binary.version} + carbondata-spark ${project.version} @@ -77,26 +77,4 @@ - - - - spark-2.3 - - 2.3 - - - - spark-2.4 - - 2.4 - - - - spark-3.1 - - 3.1 - - - - \ No newline at end of file diff --git a/index/lucene/pom.xml b/index/lucene/pom.xml index 412197cac6c..048cf42f207 100644 --- a/index/lucene/pom.xml +++ b/index/lucene/pom.xml @@ -26,7 +26,7 @@ org.apache.commons commons-lang3 - 3.5 + 3.12.0 org.apache.lucene diff --git a/index/secondary-index/pom.xml b/index/secondary-index/pom.xml index 15cb7a7b849..94dc4ec3870 100644 --- a/index/secondary-index/pom.xml +++ b/index/secondary-index/pom.xml @@ -37,7 +37,7 @@ org.apache.carbondata - carbondata-spark_${spark.binary.version} + carbondata-spark ${project.version} test @@ -156,27 +156,6 @@ true - - spark-2.3 - - 2.3 - - - - spark-2.4 - - true - - - 2.4 - - - - spark-3.1 - - 3.1 - - diff --git a/integration/flink/pom.xml b/integration/flink/pom.xml index ba82c3302ce..bc2bce5da3c 100644 --- a/integration/flink/pom.xml +++ b/integration/flink/pom.xml @@ -22,12 +22,12 @@ org.apache.carbondata - carbondata-flink-proxy + carbondata-format ${project.version} org.apache.carbondata - carbondata-format + carbondata-flink-proxy ${project.version} @@ -215,72 +215,21 @@ 4.1.17.Final test - - - - - spark-2.3 - - 2.3 - - - - org.apache.carbondata - carbondata-spark_${spark.binary.version} - ${project.version} - test - - - org.apache.hive - hive-exec - - - - - - - spark-2.4 - - true - - - 2.4 - - - - org.apache.carbondata - carbondata-spark_${spark.binary.version} - ${project.version} - test - - - org.apache.hive - hive-exec - - - - - - - spark-3.1 - - 3.1 - - - + org.apache.carbondata - carbondata-spark_${spark.binary.version} + carbondata-spark ${project.version} test - - - com.thoughtworks.paranamer - paranamer - - - - - + + + com.thoughtworks.paranamer + paranamer + + + + + + sdvtest diff --git a/integration/hive/pom.xml b/integration/hive/pom.xml index f2984b2a668..ebfc472436a 100644 --- a/integration/hive/pom.xml +++ b/integration/hive/pom.xml @@ -36,6 +36,26 @@ + + hadoop-yarn-registry + org.apache.hadoop + ${hadoop.version} + + + org.apache.hadoop + hadoop-yarn-server-resourcemanager + ${hadoop.version} + + + javax.servlet + servlet-api + + + org.slf4j + slf4j-log4j12 + + + org.apache.hive hive-exec @@ -61,6 +81,10 @@ kryo com.esotericsoftware.kryo + + hadoop-yarn-registry + org.apache.hadoop + compile @@ -109,6 +133,12 @@ tez-dag 0.9.1 test + + + hadoop-yarn-client + org.apache.hadoop + + org.apache.spark diff --git a/integration/hive/src/main/java/org/apache/carbondata/hive/test/server/HiveEmbeddedServer2.java b/integration/hive/src/main/java/org/apache/carbondata/hive/test/server/HiveEmbeddedServer2.java index 314c8933c2e..0e1b8dbc489 100644 --- a/integration/hive/src/main/java/org/apache/carbondata/hive/test/server/HiveEmbeddedServer2.java +++ b/integration/hive/src/main/java/org/apache/carbondata/hive/test/server/HiveEmbeddedServer2.java @@ -32,7 +32,6 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hive.service.Service; import org.apache.hive.service.cli.CLIService; @@ -47,7 +46,7 @@ * a child JVM (which Hive calls local) or external. */ public class HiveEmbeddedServer2 { - private String SCRATCH_DIR = ""; + private String STORE_DIR = ""; private static final Logger log = LogServiceFactory.getLogService(Hive.class.getName()); private HiveServer2 hiveServer; private HiveConf config; @@ -55,7 +54,7 @@ public class HiveEmbeddedServer2 { public void start(String storePath) throws Exception { log.info("Starting Hive Local/Embedded Server..."); - SCRATCH_DIR = storePath; + STORE_DIR = storePath; if (hiveServer == null) { System.setProperty("datanucleus.schema.autoCreateAll", "true"); System.setProperty("hive.metastore.schema.verification", "false"); @@ -114,20 +113,17 @@ private CLIService getServiceClientInternal() { private HiveConf configure() throws Exception { log.info("Setting The Hive Conf Variables"); - String scratchDir = SCRATCH_DIR; - Configuration cfg = new Configuration(); HiveConf conf = new HiveConf(cfg, HiveConf.class); conf.addToRestrictList("columns.comments"); conf.set("hive.scratch.dir.permission", "777"); conf.setVar(ConfVars.SCRATCHDIRPERMISSION, "777"); - - conf.set("hive.metastore.warehouse.dir", scratchDir + "/warehouse"); - conf.set("hive.metastore.metadb.dir", scratchDir + "/metastore_db"); - conf.set("hive.exec.scratchdir", scratchDir); + conf.set("hive.metastore.warehouse.dir", STORE_DIR + "/warehouse"); + conf.set("hive.metastore.metadb.dir", STORE_DIR + "/metastore_db"); + conf.set("hive.exec.scratchdir", STORE_DIR + "/scratch"); conf.set("fs.permissions.umask-mode", "000"); conf.set("javax.jdo.option.ConnectionURL", - "jdbc:derby:;databaseName=" + scratchDir + "/metastore_db" + ";create=true"); + "jdbc:derby:;databaseName=" + STORE_DIR + "/metastore_db" + ";create=true"); conf.set("hive.metastore.local", "true"); conf.set("hive.aux.jars.path", ""); conf.set("hive.added.jars.path", ""); @@ -146,9 +142,6 @@ private HiveConf configure() throws Exception { props.remove("mapreduce.framework.name"); props.setProperty("fs.default.name", "file:///"); - // intercept SessionState to clean the threadlocal - Field tss = SessionState.class.getDeclaredField("tss"); - tss.setAccessible(true); return conf; } diff --git a/integration/hive/src/main/java/org/apache/carbondata/hive/util/DataTypeUtil.java b/integration/hive/src/main/java/org/apache/carbondata/hive/util/DataTypeUtil.java index 64a4376b2af..d6693571865 100644 --- a/integration/hive/src/main/java/org/apache/carbondata/hive/util/DataTypeUtil.java +++ b/integration/hive/src/main/java/org/apache/carbondata/hive/util/DataTypeUtil.java @@ -26,7 +26,7 @@ import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.metadata.datatype.StructField; -import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang3.ArrayUtils; public class DataTypeUtil { diff --git a/integration/hive/src/test/java/org/apache/carbondata/hive/HiveCarbonTest.java b/integration/hive/src/test/java/org/apache/carbondata/hive/HiveCarbonTest.java index ccec18eb6a4..21c9cdaa40f 100644 --- a/integration/hive/src/test/java/org/apache/carbondata/hive/HiveCarbonTest.java +++ b/integration/hive/src/test/java/org/apache/carbondata/hive/HiveCarbonTest.java @@ -83,7 +83,7 @@ public void verifyLocalDictionaryValues() throws Exception { statement.execute("CREATE TABLE hive_carbon_table(shortField SMALLINT , intField INT, bigintField BIGINT , doubleField DOUBLE, stringField STRING, timestampField TIMESTAMP, decimalField DECIMAL(18,2), dateField DATE, charField CHAR(5), floatField FLOAT) stored by 'org.apache.carbondata.hive.CarbonStorageHandler' TBLPROPERTIES ('local_dictionary_enable'='true','local_dictionary_include'='stringField')"); statement.execute("insert into hive_carbon_table select * from hive_table"); File rootPath = new File(HiveTestUtils.class.getResource("/").getPath() + "../../../.."); - String storePath = rootPath.getAbsolutePath() + "/integration/hive/target/warehouse/warehouse/hive_carbon_table/"; + String storePath = rootPath.getAbsolutePath() + "/integration/hive/target/warehouse/hive_carbon_table/"; ArrayList dimRawChunk = CarbonTestUtil.getDimRawChunk(storePath, 0); String[] dictionaryData = new String[]{"hive", "impala", "flink", "spark"}; assert(CarbonTestUtil.validateDictionary(dimRawChunk.get(0), dictionaryData)); diff --git a/integration/hive/src/test/java/org/apache/carbondata/hive/HiveTestUtils.java b/integration/hive/src/test/java/org/apache/carbondata/hive/HiveTestUtils.java index db49f3e1f54..ff108aae22e 100644 --- a/integration/hive/src/test/java/org/apache/carbondata/hive/HiveTestUtils.java +++ b/integration/hive/src/test/java/org/apache/carbondata/hive/HiveTestUtils.java @@ -45,8 +45,8 @@ public HiveTestUtils() { static { try { File rootPath = new File(HiveTestUtils.class.getResource("/").getPath() + "../../../.."); - String targetLoc = rootPath.getAbsolutePath() + "/integration/hive/target/warehouse"; - String metadatadbLoc = rootPath.getAbsolutePath() + "/integration/hive/target/metastore_db"; + String targetLoc = rootPath.getCanonicalPath() + "/integration/hive/target"; + String metadatadbLoc = targetLoc + "/metastore_db"; File file = new File(metadatadbLoc); if (file.exists()) { file.delete(); diff --git a/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoTestNonTransactionalTableFiles.scala b/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoTestNonTransactionalTableFiles.scala index c56a77c387c..7276a583c84 100644 --- a/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoTestNonTransactionalTableFiles.scala +++ b/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoTestNonTransactionalTableFiles.scala @@ -23,7 +23,7 @@ import java.util import org.apache.commons.codec.binary.Hex import org.apache.commons.io.FileUtils -import org.apache.commons.lang.RandomStringUtils +import org.apache.commons.lang3.RandomStringUtils import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuiteLike} import org.apache.carbondata.common.logging.LogServiceFactory diff --git a/integration/spark-common-cluster-test/pom.xml b/integration/spark-common-cluster-test/pom.xml index c520ef73dde..b26dcb5ff22 100644 --- a/integration/spark-common-cluster-test/pom.xml +++ b/integration/spark-common-cluster-test/pom.xml @@ -37,7 +37,7 @@ org.apache.carbondata - carbondata-spark_${spark.binary.version} + carbondata-spark ${project.version} test diff --git a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/datasource/CreateTableUsingSparkCarbonFileFormatTestCase.scala b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/datasource/CreateTableUsingSparkCarbonFileFormatTestCase.scala index 6fda1945a61..3ed6545b984 100644 --- a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/datasource/CreateTableUsingSparkCarbonFileFormatTestCase.scala +++ b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/datasource/CreateTableUsingSparkCarbonFileFormatTestCase.scala @@ -22,7 +22,7 @@ import java.text.SimpleDateFormat import java.util.{Date, Random} import org.apache.commons.io.FileUtils -import org.apache.commons.lang.RandomStringUtils +import org.apache.commons.lang3.RandomStringUtils import org.apache.spark.sql.Row import org.apache.spark.sql.common.util.DataSourceTestUtil._ import org.apache.spark.sql.test.TestQueryExecutor diff --git a/integration/spark-common-cluster-test/src/test/scala/org/apache/spark/sql/common/util/QueryTest.scala b/integration/spark-common-cluster-test/src/test/scala/org/apache/spark/sql/common/util/QueryTest.scala index 68a908137fd..c05b60cc707 100644 --- a/integration/spark-common-cluster-test/src/test/scala/org/apache/spark/sql/common/util/QueryTest.scala +++ b/integration/spark-common-cluster-test/src/test/scala/org/apache/spark/sql/common/util/QueryTest.scala @@ -25,7 +25,7 @@ import scala.collection.JavaConverters._ import scala.util.{Failure, Success, Try} import io.prestosql.jdbc.PrestoStatement -import org.apache.commons.lang.StringUtils +import org.apache.commons.lang3.StringUtils import org.apache.spark.sql.{CarbonToSparkAdapter, DataFrame, Row, SQLContext} import org.apache.spark.sql.carbondata.execution.datasources.CarbonFileIndexReplaceRule import org.apache.spark.sql.catalyst.plans._ diff --git a/integration/spark/pom.xml b/integration/spark/pom.xml index 2a9648dc83b..a55b57c0d9e 100644 --- a/integration/spark/pom.xml +++ b/integration/spark/pom.xml @@ -26,7 +26,7 @@ ../../pom.xml - carbondata-spark_${spark.binary.version} + carbondata-spark Apache CarbonData :: Spark @@ -88,41 +88,11 @@ antlr4-runtime ${antlr4.version} - - org.apache.carbondata - carbondata-hive - ${project.version} - - - org.apache.commons - * - - - org.apache.hive - * - - - org.spark-project.hive - * - - - com.beust jcommander 1.72 - - org.apache.carbondata - carbondata-cli - ${project.version} - - - org.apache.hive - hive-exec - - - org.apache.carbondata carbondata-lucene @@ -145,8 +115,7 @@ - - - + org.apache.carbondata - carbondata-streaming_${spark.binary.version} + carbondata-streaming ${project.version} @@ -171,7 +139,7 @@ org.apache.carbondata - carbondata-mv-plan_${spark.binary.version} + carbondata-mv-plan ${project.version} @@ -243,21 +211,10 @@ - - com.databricks - spark-avro_2.11 - 4.0.0 - - - org.apache.avro - avro - - - org.apache.spark spark-avro_${scala.binary.version} - 2.4.5 + ${spark.version} org.apache.avro @@ -279,6 +236,28 @@ + + org.apache.commons + commons-configuration2 + 2.1.1 + + + org.apache.commons + commons-lang3 + + + + + commons-httpclient + commons-httpclient + 3.1 + + + commons-codec + commons-codec + + + org.apache.httpcomponents httpclient @@ -288,6 +267,46 @@ net.java.dev.jets3t jets3t 0.9.0 + + + commons-codec + commons-codec + + + + + org.apache.carbondata + carbondata-cli + ${project.version} + + + org.apache.hive + hive-exec + + + + + org.apache.carbondata + carbondata-hive + ${project.version} + + + org.apache.commons + * + + + org.apache.hive + * + + + org.apache.hadoop + * + + + org.spark-project.hive + * + + @@ -445,13 +464,12 @@ ${project.build.directory}/surefire-reports . CarbonTestSuite.txt - ${argLine} -ea -Xmx3g -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m - + -ea -Xmx3g -Xss4m -XX:MaxMetaspaceSize=2g -XX:ReservedCodeCacheSize=512m - true + false ${carbon.hive.based.metastore} @@ -608,131 +626,5 @@ true - - spark-2.3 - - 2.3 - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - src/main/spark3.1 - src/main/spark2.4 - src/main/common2.4and3.1 - - - - - org.codehaus.mojo - build-helper-maven-plugin - 3.0.0 - - - add-source - generate-sources - - add-source - - - - src/main/spark2.3 - src/main/common2.3and2.4 - - - - - - - - - - spark-2.4 - - true - - - 2.4 - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - src/main/spark3.1 - src/main/spark2.3 - - - - - org.codehaus.mojo - build-helper-maven-plugin - 3.0.0 - - - add-source - generate-sources - - add-source - - - - src/main/spark2.4 - src/main/common2.3and2.4 - src/main/common2.4and3.1 - - - - - - - - - - spark-3.1 - - 3.1 - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - src/main/spark2.4 - src/main/spark2.3 - src/main/common2.3and2.4 - - - - - org.codehaus.mojo - build-helper-maven-plugin - 3.0.0 - - - add-source - generate-sources - - add-source - - - - src/main/spark3.1 - src/main/common2.4and3.1 - - - - - - - - diff --git a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/CarbonDataSourceScanHelper.scala b/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/CarbonDataSourceScanHelper.scala deleted file mode 100644 index 22183dec0c4..00000000000 --- a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/CarbonDataSourceScanHelper.scala +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql - -import org.apache.spark.CarbonInputMetrics -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.catalog.CatalogTablePartition -import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression => SparkExpression} -import org.apache.spark.sql.catalyst.plans.QueryPlan -import org.apache.spark.sql.execution.{ColumnarBatchScan, DataSourceScanExec} -import org.apache.spark.sql.execution.strategy.CarbonPlanHelper -import org.apache.spark.sql.optimizer.CarbonFilters - -import org.apache.carbondata.core.index.IndexFilter -import org.apache.carbondata.core.indexstore.PartitionSpec -import org.apache.carbondata.core.scan.expression.Expression -import org.apache.carbondata.core.scan.expression.logical.AndExpression -import org.apache.carbondata.hadoop.CarbonProjection -import org.apache.carbondata.spark.rdd.CarbonScanRDD - -abstract class CarbonDataSourceScanHelper(relation: CarbonDatasourceHadoopRelation, - output: Seq[Attribute], - partitionFilters: Seq[SparkExpression], - pushedDownFilters: Seq[Expression], - pushedDownProjection: CarbonProjection, - directScanSupport: Boolean, - extraRDD: Option[(RDD[InternalRow], Boolean)], - selectedCatalogPartitions: Seq[CatalogTablePartition], - partitionFilterWithDpp: Seq[SparkExpression], - segmentIds: Option[String]) - extends DataSourceScanExec with ColumnarBatchScan { - - override lazy val supportsBatch: Boolean = { - CarbonPlanHelper.supportBatchedDataSource(sqlContext, output, extraRDD) - } - - lazy val supportsBatchOrColumnar: Boolean = supportsBatch - - val outputAttibutesAfterNormalizingExpressionIds: Seq[Attribute] = output - .map(QueryPlan.normalizeExprId(_, output)) - - @transient lazy val indexFilter: IndexFilter = { - val filter = pushedDownFilters.reduceOption(new AndExpression(_, _)) - .map(new IndexFilter(relation.carbonTable, _, true)).orNull - if (filter != null && pushedDownFilters.length == 1) { - // push down the limit if only one filter - filter.setLimit(relation.limit) - } - filter - } - - @transient lazy val selectedPartitions: Seq[PartitionSpec] = { - CarbonFilters - .getPartitions(partitionFilters, relation.sparkSession, relation.carbonTable) - .orNull - } - - lazy val inputRDD: RDD[InternalRow] = { - val carbonRdd = new CarbonScanRDD[InternalRow]( - relation.sparkSession, - pushedDownProjection, - indexFilter, - relation.identifier, - relation.carbonTable.getTableInfo.serialize(), - relation.carbonTable.getTableInfo, - new CarbonInputMetrics, - selectedPartitions, - segmentIds = segmentIds) - carbonRdd.setVectorReaderSupport(supportsBatch) - carbonRdd.setDirectScanSupport(supportsBatch && directScanSupport) - extraRDD.map(_._1.union(carbonRdd)).getOrElse(carbonRdd) - } -} diff --git a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/SparkVersionAdapter.scala b/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/SparkVersionAdapter.scala deleted file mode 100644 index e5cb5e0b13b..00000000000 --- a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/SparkVersionAdapter.scala +++ /dev/null @@ -1,470 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql - -import scala.collection.mutable - -import org.antlr.v4.runtime.tree.TerminalNode -import org.apache.spark.{SparkContext, TaskContext} -import org.apache.spark.rdd.RDD -import org.apache.spark.serializer.Serializer -import org.apache.spark.sql.catalyst.{CarbonParserUtil, InternalRow, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{Analyzer, UnresolvedRelation} -import org.apache.spark.sql.catalyst.encoders.RowEncoder -import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSeq, Expression, InterpretedPredicate, NamedExpression, SortOrder} -import org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate -import org.apache.spark.sql.catalyst.parser.ParserUtils.operationNotAllowed -import org.apache.spark.sql.catalyst.parser.SqlBaseParser.{BucketSpecContext, ColTypeListContext, CreateTableHeaderContext, LocationSpecContext, QueryContext, SkewSpecContext, TablePropertyListContext} -import org.apache.spark.sql.catalyst.plans.{logical, JoinType, QueryPlan} -import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, Join, LogicalPlan, OneRowRelation} -import org.apache.spark.sql.catalyst.plans.physical.SinglePartition -import org.apache.spark.sql.catalyst.util.DateTimeUtils -import org.apache.spark.sql.execution.{QueryExecution, ShuffledRowRDD, SparkPlan, SQLExecution, UnaryExecNode} -import org.apache.spark.sql.execution.command.{ExplainCommand, Field, PartitionerField, TableModel, TableNewProcessor} -import org.apache.spark.sql.execution.command.table.{CarbonCreateTableAsSelectCommand, CarbonCreateTableCommand} -import org.apache.spark.sql.execution.datasources.{CreateTable, DataSourceStrategy, RefreshTable} -import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec -import org.apache.spark.sql.execution.joins.{BuildLeft, BuildRight, BuildSide} -import org.apache.spark.sql.execution.strategy.CarbonDataSourceScan -import org.apache.spark.sql.internal.{SessionState, SharedState} -import org.apache.spark.sql.parser.CarbonSpark2SqlParser -import org.apache.spark.sql.parser.CarbonSparkSqlParserUtil.{checkIfDuplicateColumnExists, convertDbNameToLowerCase, validateStreamingProperty} -import org.apache.spark.sql.sources.Filter -import org.apache.spark.sql.types.{DataType, StructField} -import org.apache.spark.unsafe.types.UTF8String - -import org.apache.carbondata.common.exceptions.DeprecatedFeatureException -import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException -import org.apache.carbondata.core.constants.CarbonCommonConstants -import org.apache.carbondata.core.datastore.impl.FileFactory -import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier -import org.apache.carbondata.core.metadata.datatype.DataTypes -import org.apache.carbondata.core.metadata.schema.SchemaReader -import org.apache.carbondata.core.util.CarbonProperties -import org.apache.carbondata.core.util.path.CarbonTablePath -import org.apache.carbondata.spark.CarbonOption -import org.apache.carbondata.spark.util.CarbonScalaUtil - -trait SparkVersionAdapter { - - def getPredicate(inputSchema: Seq[Attribute], - condition: Option[Expression]): InternalRow => Boolean = { - GeneratePredicate.generate(condition.get, inputSchema).eval(_) - } - - def stringToTimestamp(timestamp: String): Option[Long] = { - DateTimeUtils.stringToTimestamp(UTF8String.fromString(timestamp)) - } - - def getTableIdentifier(u: UnresolvedRelation): Some[TableIdentifier] = { - Some(u.tableIdentifier) - } - - def dateToString(date: Int): String = { - DateTimeUtils.dateToString(date.toString.toInt) - } - - def timeStampToString(timeStamp: Long): String = { - DateTimeUtils.timestampToString(timeStamp) - } - - def rebaseTime(timestamp: Long, carbonWrittenVersion: String = null): Long = { - // From spark 3.1, spark will store gregorian micros value for timestamp, hence - // rebase is required. For 2.x versions, no need rebase - timestamp - } - - def stringToTime(value: String): java.util.Date = { - DateTimeUtils.stringToTime(value) - } - - def addTaskCompletionListener[U](f: => U) { - TaskContext.get().addTaskCompletionListener { context => - f - } - } - - def createShuffledRowRDD(sparkContext: SparkContext, localTopK: RDD[InternalRow], - child: SparkPlan, serializer: Serializer): ShuffledRowRDD = { - new ShuffledRowRDD( - ShuffleExchangeExec.prepareShuffleDependency( - localTopK, child.output, SinglePartition, serializer)) - } - - def getInsertIntoCommand(table: LogicalPlan, - partition: Map[String, Option[String]], - query: LogicalPlan, - overwrite: Boolean, - ifPartitionNotExists: Boolean): InsertIntoTable = { - InsertIntoTable( - table, - partition, - query, - overwrite, - ifPartitionNotExists) - } - - def getExplainCommandObj(logicalPlan: LogicalPlan = OneRowRelation(), - mode: Option[String]) : ExplainCommand = { - ExplainCommand(logicalPlan, mode.isDefined) - } - - def getExplainCommandObj(mode: Option[String]) : ExplainCommand = { - ExplainCommand(OneRowRelation(), mode.isDefined) - } - - def invokeAnalyzerExecute(analyzer: Analyzer, - plan: LogicalPlan): LogicalPlan = { - analyzer.executeAndCheck(plan) - } - - def normalizeExpressions(r: NamedExpression, attrs: AttributeSeq): NamedExpression = { - QueryPlan.normalizeExprId(r, attrs) - } - - def getBuildRight: BuildSide = { - BuildRight - } - - def getBuildLeft: BuildSide = { - BuildLeft - } - - type CarbonBuildSideType = BuildSide - type InsertIntoStatementWrapper = InsertIntoTable - - def withNewExecutionId[T](sparkSession: SparkSession, queryExecution: QueryExecution): T => T = { - SQLExecution.withNewExecutionId(sparkSession, queryExecution)(_) - } - - def getTableIdentifier(parts: TableIdentifier): TableIdentifier = { - parts - } - - def createJoinNode(child: LogicalPlan, - targetTable: LogicalPlan, - joinType: JoinType, - condition: Option[Expression]): Join = { - Join(child, targetTable, joinType, condition) - } - - def getPartitionsFromInsert(x: InsertIntoStatementWrapper): Map[String, Option[String]] = { - x.partition - } - - def createRefreshTableCommand(tableIdentifier: TableIdentifier): RefreshTable = { - RefreshTable(tableIdentifier) - } - - type RefreshTables = RefreshTable - - - /** - * Validates the partition columns and return's A tuple of partition columns and partitioner - * fields. - * - * @param partitionColumns An instance of ColTypeListContext having parser rules for - * column. - * @param colNames Sequence of Table column names. - * @param tableProperties Table property map. - * @param partitionByStructFields Seq[StructField] Sequence of partition fields. - * @return A Seq of partitioner fields. - */ - def validatePartitionFields( - partitionColumns: ColTypeListContext, - colNames: Seq[String], - tableProperties: mutable.Map[String, String], - partitionByStructFields: Seq[StructField]): Seq[PartitionerField] = { - - val partitionerFields = partitionByStructFields.map { structField => - PartitionerField(structField.name, Some(structField.dataType.toString), null) - } - // validate partition clause - if (partitionerFields.nonEmpty) { - // partition columns should not be part of the schema - val badPartCols = partitionerFields.map(_.partitionColumn.toLowerCase).toSet - .intersect(colNames.map(_.toLowerCase).toSet) - if (badPartCols.nonEmpty) { - operationNotAllowed(s"Partition columns should not be specified in the schema: " + - badPartCols.map("\"" + _ + "\"").mkString("[", ",", "]") - , partitionColumns: ColTypeListContext) - } - } - partitionerFields - } - - - /** - * The method validates the create table command and returns the create table or - * ctas table LogicalPlan. - * - * @param createTableTuple a tuple of (CreateTableHeaderContext, SkewSpecContext, - * BucketSpecContext, ColTypeListContext, ColTypeListContext, - * TablePropertyListContext, - * LocationSpecContext, Option[String], TerminalNode, QueryContext, - * String) - * @param extraTableTuple A tuple of (Seq[StructField], Boolean, TableIdentifier, Boolean, - * Seq[String], - * Option[String], mutable.Map[String, String], Map[String, String], - * Seq[StructField], - * Seq[PartitionerField], CarbonSpark2SqlParser, SparkSession, - * Option[LogicalPlan]) - * @return of create table or ctas table - * - */ - def createCarbonTable(createTableTuple: (CreateTableHeaderContext, SkewSpecContext, - BucketSpecContext, ColTypeListContext, ColTypeListContext, TablePropertyListContext, - LocationSpecContext, Option[String], TerminalNode, QueryContext, String), - extraTableTuple: (Seq[StructField], Boolean, TableIdentifier, Boolean, Seq[String], - Option[String], mutable.Map[String, String], Map[String, String], Seq[StructField], - Seq[PartitionerField], CarbonSpark2SqlParser, SparkSession, - Option[LogicalPlan])): LogicalPlan = { - val (tableHeader, skewSpecContext, bucketSpecContext, partitionColumns, columns, - tablePropertyList, locationSpecContext, tableComment, ctas, query, provider) = createTableTuple - val (cols, external, tableIdentifier, ifNotExists, colNames, tablePath, - tableProperties, properties, partitionByStructFields, partitionFields, - parser, sparkSession, selectQuery) = extraTableTuple - val options = new CarbonOption(properties) - // validate streaming property - validateStreamingProperty(options) - var fields = parser.getFields(cols ++ partitionByStructFields) - // validate for create table as select - selectQuery match { - case Some(q) => - // create table as select does not allow creation of partitioned table - if (partitionFields.nonEmpty) { - val errorMessage = "A Create Table As Select (CTAS) statement is not allowed to " + - "create a partitioned table using Carbondata file formats." - operationNotAllowed(errorMessage, partitionColumns) - } - // create table as select does not allow to explicitly specify schema - if (fields.nonEmpty) { - operationNotAllowed( - "Schema may not be specified in a Create Table As Select (CTAS) statement", columns) - } - // external table is not allow - if (external) { - operationNotAllowed("Create external table as select", tableHeader) - } - fields = parser - .getFields(CarbonEnv.getInstance(sparkSession).carbonMetaStore - .getSchemaFromUnresolvedRelation(sparkSession, Some(q).get)) - case _ => - // ignore this case - } - val columnNames = fields.map(_.name.get) - checkIfDuplicateColumnExists(columns, tableIdentifier, columnNames) - if (partitionFields.nonEmpty && options.isStreaming) { - operationNotAllowed("Streaming is not allowed on partitioned table", partitionColumns) - } - - if (!external && fields.isEmpty) { - throw new MalformedCarbonCommandException("Creating table without column(s) is not supported") - } - if (external && fields.isEmpty && tableProperties.nonEmpty) { - // as fields are always zero for external table, cannot validate table properties. - operationNotAllowed( - "Table properties are not supported for external table", tablePropertyList) - } - - // Global dictionary is deprecated since 2.0 - if (tableProperties.contains(CarbonCommonConstants.DICTIONARY_INCLUDE) || - tableProperties.contains(CarbonCommonConstants.DICTIONARY_EXCLUDE)) { - DeprecatedFeatureException.globalDictNotSupported() - } - - val bucketFields = parser.getBucketFields(tableProperties, fields, options) - var isTransactionalTable: Boolean = true - - val tableInfo = if (external) { - if (fields.nonEmpty) { - // user provided schema for this external table, this is not allow currently - // see CARBONDATA-2866 - operationNotAllowed( - "Schema must not be specified for external table", columns) - } - if (partitionByStructFields.nonEmpty) { - operationNotAllowed( - "Partition is not supported for external table", partitionColumns) - } - // read table info from schema file in the provided table path - // external table also must convert table name to lower case - val identifier = AbsoluteTableIdentifier.from( - tablePath.get, - CarbonEnv.getDatabaseName(tableIdentifier.database)(sparkSession).toLowerCase(), - tableIdentifier.table.toLowerCase()) - val table = try { - val schemaPath = CarbonTablePath.getSchemaFilePath(identifier.getTablePath) - if (!FileFactory.isFileExist(schemaPath)) { - if (provider.equalsIgnoreCase("'carbonfile'")) { - SchemaReader.inferSchema(identifier, true) - } else { - isTransactionalTable = false - SchemaReader.inferSchema(identifier, false) - } - } else { - SchemaReader.getTableInfo(identifier) - } - } catch { - case e: Throwable => - operationNotAllowed(s"Invalid table path provided: ${ tablePath.get } ", tableHeader) - } - - // set "_external" property, so that DROP TABLE will not delete the data - if (provider.equalsIgnoreCase("'carbonfile'")) { - table.getFactTable.getTableProperties.put("_filelevelformat", "true") - table.getFactTable.getTableProperties.put("_external", "false") - } else { - table.getFactTable.getTableProperties.put("_external", "true") - table.getFactTable.getTableProperties.put("_filelevelformat", "false") - } - var isLocalDic_enabled = table.getFactTable.getTableProperties - .get(CarbonCommonConstants.LOCAL_DICTIONARY_ENABLE) - if (null == isLocalDic_enabled) { - table.getFactTable.getTableProperties - .put(CarbonCommonConstants.LOCAL_DICTIONARY_ENABLE, - CarbonProperties.getInstance() - .getProperty(CarbonCommonConstants.LOCAL_DICTIONARY_SYSTEM_ENABLE, - CarbonCommonConstants.LOCAL_DICTIONARY_ENABLE_DEFAULT)) - } - isLocalDic_enabled = table.getFactTable.getTableProperties - .get(CarbonCommonConstants.LOCAL_DICTIONARY_ENABLE) - if (CarbonScalaUtil.validateLocalDictionaryEnable(isLocalDic_enabled) && - isLocalDic_enabled.toBoolean) { - val allColumns = table.getFactTable.getListOfColumns - for (i <- 0 until allColumns.size()) { - val cols = allColumns.get(i) - if (cols.getDataType == DataTypes.STRING || cols.getDataType == DataTypes.VARCHAR) { - cols.setLocalDictColumn(true) - } - } - table.getFactTable.setListOfColumns(allColumns) - } - table - } else { - // prepare table model of the collected tokens - val tableModel: TableModel = CarbonParserUtil.prepareTableModel( - ifNotExists, - convertDbNameToLowerCase(tableIdentifier.database), - tableIdentifier.table.toLowerCase, - fields, - partitionFields, - tableProperties, - bucketFields, - isAlterFlow = false, - tableComment) - TableNewProcessor(tableModel) - } - tableInfo.setTransactionalTable(isTransactionalTable) - selectQuery match { - case query@Some(q) => - CarbonCreateTableAsSelectCommand( - tableInfo = tableInfo, - query = query.get, - ifNotExistsSet = ifNotExists, - tableLocation = tablePath) - case _ => - CarbonCreateTableCommand( - tableInfo = tableInfo, - ifNotExistsSet = ifNotExists, - tableLocation = tablePath, - external) - } - } - - def getField(parser: CarbonSpark2SqlParser, - schema: Seq[StructField], - isExternal: Boolean = false): Seq[Field] = { - schema.map { col => - parser.getFields(col.getComment, col.name, col.dataType, isExternal) - } - } - - def supportsBatchOrColumnar(scan: CarbonDataSourceScan): Boolean = { - scan.supportsBatch - } - - def createDataset(sparkSession: SparkSession, qe: QueryExecution) : Dataset[Row] = { - new Dataset[Row](sparkSession, qe, RowEncoder(qe.analyzed.schema)) - } - - def createSharedState(sparkContext: SparkContext) : SharedState = { - new SharedState(sparkContext) - } - - def translateFilter(dataFilters: Seq[Expression]) : Seq[Filter] = { - dataFilters.flatMap(DataSourceStrategy.translateFilter) - } - - def getCarbonOptimizer(session : SparkSession, sessionState: SessionState) : CarbonOptimizer = { - new CarbonOptimizer(session, sessionState.catalog, sessionState.optimizer) - } - - def isCharType(dataType: DataType): Boolean = { - false - } - - def isVarCharType(dataType: DataType): Boolean = { - false - } - - def getTypeName(s: DataType): String = { - s.typeName - } - - def evaluateWithPredicate(exp: Expression, schema: Seq[Attribute], row: InternalRow): Any = { - InterpretedPredicate.create(exp, schema).expression.eval(row) - } - - def getUpdatedPlan(plan: LogicalPlan, sqlText: String): LogicalPlan = { - plan match { - case create@CreateTable(tableDesc, mode, query) => - if ( tableDesc.storage.locationUri.isDefined && - !sqlText.toUpperCase.startsWith("CREATE EXTERNAL TABLE ")) { - // add a property to differentiate if create table statement has external keyword or not - val newProperties = tableDesc.properties. +("hasexternalkeyword" -> "false") - val updatedTableDesc = tableDesc.copy(properties = newProperties) - CreateTable(updatedTableDesc, mode, query) - } else if (tableDesc.storage.properties.contains("latestversion")) { - val newProperties = tableDesc.storage - .properties.filterNot(_._1.equalsIgnoreCase("latestversion")) - val updatedStorage = tableDesc.storage.copy(properties = newProperties) - CreateTable(tableDesc.copy(storage = updatedStorage), mode, query) - } else { - create - } - case others => others - } - } -} - -case class CarbonBuildSide(buildSide: BuildSide) { - def isRight: Boolean = buildSide.isInstanceOf[BuildRight.type] - def isLeft: Boolean = buildSide.isInstanceOf[BuildLeft.type] -} - -abstract class CarbonTakeOrderedAndProjectExecHelper(sortOrder: Seq[SortOrder], - limit: Int, skipMapOrder: Boolean, readFromHead: Boolean) extends UnaryExecNode { - override def simpleString: String = { - val orderByString = sortOrder.mkString("[", ",", "]") - val outputString = output.mkString("[", ",", "]") - - s"CarbonTakeOrderedAndProjectExec(limit=$limit, orderBy=$orderByString, " + - s"skipMapOrder=$skipMapOrder, readFromHead=$readFromHead, output=$outputString)" - } -} diff --git a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/execution/CarbonCodegenSupport.scala b/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/execution/CarbonCodegenSupport.scala deleted file mode 100644 index 4fceb8b183b..00000000000 --- a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/execution/CarbonCodegenSupport.scala +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.execution - -import org.apache.spark.sql.execution.joins.HashJoin - -trait CarbonCodegenSupport extends SparkPlan with HashJoin { - -} diff --git a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/hive/CarbonAnalyzer.scala b/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/hive/CarbonAnalyzer.scala deleted file mode 100644 index bfa2e98f705..00000000000 --- a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/hive/CarbonAnalyzer.scala +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.hive - -import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.catalyst.analysis.Analyzer -import org.apache.spark.sql.catalyst.catalog.SessionCatalog -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.util.CarbonReflectionUtils - -class CarbonAnalyzer(catalog: SessionCatalog, - conf: SQLConf, - sparkSession: SparkSession, - analyzer: Analyzer) extends Analyzer(catalog, conf) { - - val mvPlan = try { - CarbonReflectionUtils.createObject( - "org.apache.spark.sql.optimizer.MVRewriteRule", - sparkSession)._1.asInstanceOf[Rule[LogicalPlan]] - } catch { - case e: Exception => - null - } - - override def execute(plan: LogicalPlan): LogicalPlan = { - val logicalPlan = analyzer.execute(plan) - if (mvPlan != null) { - mvPlan.apply(logicalPlan) - } else { - logicalPlan - } - } -} diff --git a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/hive/CarbonSqlAstBuilder.scala b/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/hive/CarbonSqlAstBuilder.scala deleted file mode 100644 index 36ec2c806e1..00000000000 --- a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/hive/CarbonSqlAstBuilder.scala +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.hive - -import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.catalyst.parser.ParserUtils.string -import org.apache.spark.sql.catalyst.parser.SqlBaseParser.{AddTableColumnsContext, CreateHiveTableContext} -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.execution.SparkSqlAstBuilder -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.parser.{CarbonHelperSqlAstBuilder, CarbonSpark2SqlParser, CarbonSparkSqlParserUtil} - -class CarbonSqlAstBuilder(conf: SQLConf, parser: CarbonSpark2SqlParser, sparkSession: SparkSession) - extends SparkSqlAstBuilder(conf) with SqlAstBuilderHelper { - - val helper = new CarbonHelperSqlAstBuilder(conf, parser, sparkSession) - - override def visitCreateHiveTable(ctx: CreateHiveTableContext): LogicalPlan = { - val fileStorage = CarbonSparkSqlParserUtil.getFileStorage(ctx.createFileFormat(0)) - - if (fileStorage.equalsIgnoreCase("'carbondata'") || - fileStorage.equalsIgnoreCase("carbondata") || - fileStorage.equalsIgnoreCase("'carbonfile'") || - fileStorage.equalsIgnoreCase("'org.apache.carbondata.format'")) { - val createTableTuple = (ctx.createTableHeader, ctx.skewSpec(0), - ctx.bucketSpec(0), ctx.partitionColumns, ctx.columns, ctx.tablePropertyList(0), - ctx.locationSpec(0), Option(ctx.STRING(0)).map(string), ctx.AS, ctx.query, fileStorage) - helper.createCarbonTable(createTableTuple) - } else { - super.visitCreateHiveTable(ctx) - } - } - - override def visitAddTableColumns(ctx: AddTableColumnsContext): LogicalPlan = { - visitAddTableColumns(parser, ctx) - } -} diff --git a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/hive/SqlAstBuilderHelper.scala b/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/hive/SqlAstBuilderHelper.scala deleted file mode 100644 index a8cbbafb0f5..00000000000 --- a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/hive/SqlAstBuilderHelper.scala +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.hive - -import org.apache.spark.sql.CarbonToSparkAdapter -import org.apache.spark.sql.catalyst.CarbonParserUtil -import org.apache.spark.sql.catalyst.parser.SqlBaseParser -import org.apache.spark.sql.catalyst.parser.SqlBaseParser.{AddTableColumnsContext, ChangeColumnContext, CreateTableContext} -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.execution.SparkSqlAstBuilder -import org.apache.spark.sql.execution.command.{AlterTableAddColumnsModel, AlterTableDataTypeChangeModel} -import org.apache.spark.sql.execution.command.schema.{CarbonAlterTableAddColumnCommand, CarbonAlterTableColRenameDataTypeChangeCommand} -import org.apache.spark.sql.execution.command.table.CarbonExplainCommand -import org.apache.spark.sql.parser.CarbonSpark2SqlParser -import org.apache.spark.sql.types.DecimalType - -trait SqlAstBuilderHelper extends SparkSqlAstBuilder { - - override def visitChangeColumn(ctx: ChangeColumnContext): LogicalPlan = { - - val newColumn = visitColType(ctx.colType) - val isColumnRename = !ctx.identifier.getText.equalsIgnoreCase(newColumn.name) - - val (typeString, values): (String, Option[List[(Int, Int)]]) = newColumn.dataType match { - case d: DecimalType => ("decimal", Some(List((d.precision, d.scale)))) - case _ => (newColumn.dataType.typeName.toLowerCase, None) - } - - val alterTableColRenameAndDataTypeChangeModel = - AlterTableDataTypeChangeModel( - CarbonParserUtil.parseDataType(newColumn.name, typeString, values), - CarbonParserUtil.convertDbNameToLowerCase(Option(ctx.tableIdentifier().db).map(_.getText)), - ctx.tableIdentifier().table.getText.toLowerCase, - ctx.identifier.getText.toLowerCase, - newColumn.name.toLowerCase, - isColumnRename) - - CarbonAlterTableColRenameDataTypeChangeCommand(alterTableColRenameAndDataTypeChangeModel) - } - - - def visitAddTableColumns(parser: CarbonSpark2SqlParser, - ctx: AddTableColumnsContext): LogicalPlan = { - val cols = Option(ctx.columns).toSeq.flatMap(visitColTypeList) - val fields = CarbonToSparkAdapter.getField(parser, cols) - val tblProperties = scala.collection.mutable.Map.empty[String, String] - val tableModel = CarbonParserUtil.prepareTableModel(false, - CarbonParserUtil.convertDbNameToLowerCase(Option(ctx.tableIdentifier().db).map(_.getText)), - ctx.tableIdentifier.table.getText.toLowerCase, - fields, - Seq.empty, - tblProperties, - None, - true) - - val alterTableAddColumnsModel = AlterTableAddColumnsModel( - Option(ctx.tableIdentifier().db).map(_.getText), - ctx.tableIdentifier.table.getText, - tblProperties.toMap, - tableModel.dimCols, - tableModel.msrCols, - tableModel.highCardinalityDims.getOrElse(Seq.empty)) - - CarbonAlterTableAddColumnCommand(alterTableAddColumnsModel) - } - - override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = { - super.visitCreateTable(ctx) - } - - override def visitExplain(ctx: SqlBaseParser.ExplainContext): LogicalPlan = { - CarbonExplainCommand(super.visitExplain(ctx)) - } -} diff --git a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/hive/execution/command/CarbonResetCommand.scala b/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/hive/execution/command/CarbonResetCommand.scala deleted file mode 100644 index eab772d9fb6..00000000000 --- a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/hive/execution/command/CarbonResetCommand.scala +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.hive.execution.command - -import org.apache.spark.sql.{CarbonEnv, Row, SparkSession} -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.execution.command.{ResetCommand, RunnableCommand} - -case class CarbonResetCommand() - extends RunnableCommand { - override val output = ResetCommand.output - - override def run(sparkSession: SparkSession): Seq[Row] = { - CarbonEnv.getInstance(sparkSession).carbonSessionInfo.getSessionParams.clear() - ResetCommand.run(sparkSession) - } -} - -/** - * This method matches the reset command based on the spark version - */ -object MatchResetCommand { - def unapply(plan: LogicalPlan): Option[LogicalPlan] = { - plan match { - case r@ResetCommand => - Some(plan) - case _ => - None - } - } -} diff --git a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/parser/CarbonExtensionSqlParser.scala b/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/parser/CarbonExtensionSqlParser.scala deleted file mode 100644 index 18d0f1bdf95..00000000000 --- a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/parser/CarbonExtensionSqlParser.scala +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.parser - -import org.apache.spark.sql.{CarbonEnv, CarbonThreadUtil, CarbonToSparkAdapter, SparkSession} -import org.apache.spark.sql.catalyst.parser.ParserInterface -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.execution.SparkSqlParser -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.util.CarbonException - -import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException -import org.apache.carbondata.spark.util.CarbonScalaUtil - -/** - * parser order: carbon parser => spark parser - */ -class CarbonExtensionSqlParser( - conf: SQLConf, - sparkSession: SparkSession, - initialParser: ParserInterface -) extends SparkSqlParser(conf) { - - val parser = new CarbonExtensionSpark2SqlParser - val antlrParser = new CarbonAntlrParser(this) - - override def parsePlan(sqlText: String): LogicalPlan = { - parser.synchronized { - CarbonEnv.getInstance(sparkSession) - } - CarbonThreadUtil.updateSessionInfoToCurrentThread(sparkSession) - try { - parser.parse(sqlText) - } catch { - case ce: MalformedCarbonCommandException => - throw ce - case ct: Throwable => - try { - antlrParser.parse(sqlText) - } catch { - case ce: MalformedCarbonCommandException => - throw ce - case at: Throwable => - try { - val parsedPlan = CarbonToSparkAdapter.getUpdatedPlan(initialParser.parsePlan(sqlText), - sqlText) - CarbonScalaUtil.cleanParserThreadLocals - parsedPlan - } catch { - case mce: MalformedCarbonCommandException => - throw mce - case st: Throwable => - st.printStackTrace(System.err) - CarbonScalaUtil.cleanParserThreadLocals - CarbonException.analysisException( - s"""== Spark Parser: ${initialParser.getClass.getName} == - |${st.getMessage} - |== Carbon Parser: ${ parser.getClass.getName } == - |${ct.getMessage} - |== Antlr Parser: ${antlrParser.getClass.getName} == - |${at.getMessage} - """.stripMargin.trim) - } - } - } - } -} diff --git a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala b/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala deleted file mode 100644 index 6fbd4b62d3d..00000000000 --- a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.parser - -import scala.collection.mutable - -import org.antlr.v4.runtime.tree.TerminalNode -import org.apache.spark.sql.{CarbonThreadUtil, CarbonToSparkAdapter, SparkSession} -import org.apache.spark.sql.catalyst.parser.{AbstractSqlParser, SqlBaseParser} -import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.execution.{SparkSqlAstBuilder, SparkSqlParser} -import org.apache.spark.sql.internal.{SQLConf, VariableSubstitution} -import org.apache.spark.sql.parser.CarbonSparkSqlParserUtil.convertPropertiesToLowercase -import org.apache.spark.sql.types.StructField -import org.apache.spark.sql.util.CarbonException -import org.apache.spark.util.CarbonReflectionUtils - -import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException -import org.apache.carbondata.spark.util.CarbonScalaUtil - -/** - * Concrete parser for Spark SQL statements and carbon specific - * statements - */ -class CarbonSparkSqlParser(conf: SQLConf, sparkSession: SparkSession) extends SparkSqlParser(conf) { - - val parser = new CarbonSpark2SqlParser - - override val astBuilder = CarbonReflectionUtils.getAstBuilder(conf, parser, sparkSession) - - private val substitutor = new VariableSubstitution(conf) - - override def parsePlan(sqlText: String): LogicalPlan = { - CarbonThreadUtil.updateSessionInfoToCurrentThread(sparkSession) - try { - val parsedPlan = super.parsePlan(sqlText) - CarbonScalaUtil.cleanParserThreadLocals - parsedPlan - } catch { - case ce: MalformedCarbonCommandException => - CarbonScalaUtil.cleanParserThreadLocals - throw ce - case ex: Throwable => - try { - parser.parse(sqlText) - } catch { - case mce: MalformedCarbonCommandException => - throw mce - case e: Throwable => - CarbonException.analysisException( - s"""== Parse1 == - |${ex.getMessage} - |== Parse2 == - |${e.getMessage} - """.stripMargin.trim) - } - } - } - - protected override def parse[T](command: String)(toResult: SqlBaseParser => T): T = { - super.parse(substitutor.substitute(command))(toResult) - } -} - -class CarbonHelperSqlAstBuilder(conf: SQLConf, - parser: CarbonSpark2SqlParser, - sparkSession: SparkSession) - extends SparkSqlAstBuilderWrapper(conf) { - /** - * Parse a key-value map from a [[TablePropertyListContext]], assuming all values are specified. - */ - override def visitPropertyKeyValues(ctx: TablePropertyListContext): Map[String, String] = { - val props = visitTablePropertyList(ctx) - CarbonSparkSqlParserUtil.visitPropertyKeyValues(ctx, props) - } - - def getPropertyKeyValues(ctx: TablePropertyListContext): Map[String, String] - = { - Option(ctx).map(visitPropertyKeyValues) - .getOrElse(Map.empty) - } - - def createCarbonTable(createTableTuple: (CreateTableHeaderContext, SkewSpecContext, - BucketSpecContext, ColTypeListContext, ColTypeListContext, TablePropertyListContext, - LocationSpecContext, Option[String], TerminalNode, QueryContext, String)): LogicalPlan = { - // val parser = new CarbonSpark2SqlParser - - val (tableHeader, skewSpecContext, - bucketSpecContext, - partitionColumns, - columns, - tablePropertyList, - locationSpecContext, - tableComment, - ctas, - query, - provider) = createTableTuple - - val (tableIdent, temp, ifNotExists, external) = visitCreateTableHeader(tableHeader) - val tableIdentifier = CarbonToSparkAdapter.getTableIdentifier(tableIdent) - val cols: Seq[StructField] = Option(columns).toSeq.flatMap(visitColTypeList) - val colNames: Seq[String] = CarbonSparkSqlParserUtil - .validateCreateTableReqAndGetColumns(tableHeader, - skewSpecContext, - bucketSpecContext, - columns, - cols, - tableIdentifier, - temp) - val tablePath: Option[String] = if (locationSpecContext != null) { - Some(visitLocationSpec(locationSpecContext)) - } else { - None - } - - val properties: Map[String, String] = getPropertyKeyValues(tablePropertyList) - val tableProperties = convertPropertiesToLowercase(properties) - // validate partition clause - val partitionByStructFields = Option(partitionColumns).toSeq.flatMap(visitColTypeList) - val partitionFields = CarbonToSparkAdapter. - validatePartitionFields(partitionColumns, colNames, tableProperties, - partitionByStructFields) - - // validate for create table as select - val selectQuery = Option(query).map(plan) - val extraTableTuple = (cols, external, tableIdentifier, ifNotExists, colNames, tablePath, - tableProperties, properties, partitionByStructFields, partitionFields, - parser, sparkSession, selectQuery) - CarbonToSparkAdapter.createCarbonTable(createTableTuple, extraTableTuple) - } -} - -trait CarbonAstTrait { - def getFileStorage (createFileFormat : CreateFileFormatContext): String -} - - diff --git a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/parser/SparkSqlAstBuilderWrapper.scala b/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/parser/SparkSqlAstBuilderWrapper.scala deleted file mode 100644 index 5c4de438ada..00000000000 --- a/integration/spark/src/main/common2.3and2.4/org/apache/spark/sql/parser/SparkSqlAstBuilderWrapper.scala +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.parser - -import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ -import org.apache.spark.sql.execution.SparkSqlAstBuilder -import org.apache.spark.sql.internal.SQLConf - -/** - * use this wrapper to adapter multiple spark versions - */ -abstract class SparkSqlAstBuilderWrapper(conf: SQLConf) - extends SparkSqlAstBuilder(conf) { - - def visitPropertyKeyValues(ctx: TablePropertyListContext): Map[String, String] -} diff --git a/integration/spark/src/main/common2.4and3.1/org/apache/spark/sql/CarbonBoundReference.scala b/integration/spark/src/main/common2.4and3.1/org/apache/spark/sql/CarbonBoundReference.scala deleted file mode 100644 index 8efdb25ecf1..00000000000 --- a/integration/spark/src/main/common2.4and3.1/org/apache/spark/sql/CarbonBoundReference.scala +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql - -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.{Attribute, ExprId, LeafExpression, NamedExpression} -import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback -import org.apache.spark.sql.types.DataType - -import org.apache.carbondata.core.scan.expression.ColumnExpression - -case class CarbonBoundReference(colExp: ColumnExpression, dataType: DataType, nullable: Boolean) - extends LeafExpression with NamedExpression with CodegenFallback { - - type EvaluatedType = Any - - override def toString: String = s"input[" + colExp.getColIndex + "]" - - override def eval(input: InternalRow): Any = input.get(colExp.getColIndex, dataType) - - override def name: String = colExp.getColumnName - - override def toAttribute: Attribute = throw new UnsupportedOperationException - - override def exprId: ExprId = throw new UnsupportedOperationException - - override def qualifier: Seq[String] = null - - override def newInstance(): NamedExpression = throw new UnsupportedOperationException -} diff --git a/integration/spark/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java b/integration/spark/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java index e7b74c7b2e5..1fe166ab514 100644 --- a/integration/spark/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java +++ b/integration/spark/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java @@ -24,7 +24,6 @@ import org.apache.carbondata.core.scan.result.vector.CarbonDictionary; import org.apache.carbondata.core.scan.scanner.LazyPageLoader; -import org.apache.spark.sql.CarbonToSparkAdapter; import org.apache.spark.sql.CarbonVectorProxy; import org.apache.spark.sql.carbondata.execution.datasources.CarbonSparkDataSourceUtil; import org.apache.spark.sql.types.Decimal; @@ -122,8 +121,7 @@ public void putInts(int rowId, int count, int value) { @Override public void putLong(int rowId, long value) { if (!filteredRows[rowId]) { - sparkColumnVectorProxy - .putLong(counter++, CarbonToSparkAdapter.rebaseTime(value, carbonDataFileWrittenVersion)); + sparkColumnVectorProxy.putLong(counter++, value); } } diff --git a/integration/spark/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapperDirect.java b/integration/spark/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapperDirect.java index 15766d51e08..ef5846aa6d3 100644 --- a/integration/spark/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapperDirect.java +++ b/integration/spark/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapperDirect.java @@ -24,7 +24,6 @@ import org.apache.carbondata.core.scan.result.vector.CarbonDictionary; import org.apache.carbondata.core.scan.scanner.LazyPageLoader; -import org.apache.spark.sql.CarbonToSparkAdapter; import org.apache.spark.sql.CarbonVectorProxy; import org.apache.spark.sql.carbondata.execution.datasources.CarbonSparkDataSourceUtil; import org.apache.spark.sql.types.Decimal; @@ -96,8 +95,7 @@ public void putInts(int rowId, int count, int value) { @Override public void putLong(int rowId, long value) { - sparkColumnVectorProxy - .putLong(rowId, CarbonToSparkAdapter.rebaseTime(value, carbonDataFileWrittenVersion)); + sparkColumnVectorProxy.putLong(rowId, value); } @Override diff --git a/integration/spark/src/main/java/org/apache/carbondata/spark/vectorreader/VectorizedCarbonRecordReader.java b/integration/spark/src/main/java/org/apache/carbondata/spark/vectorreader/VectorizedCarbonRecordReader.java index 5b85f78e59e..3dc6877b6ab 100644 --- a/integration/spark/src/main/java/org/apache/carbondata/spark/vectorreader/VectorizedCarbonRecordReader.java +++ b/integration/spark/src/main/java/org/apache/carbondata/spark/vectorreader/VectorizedCarbonRecordReader.java @@ -53,7 +53,7 @@ import org.apache.spark.sql.CarbonVectorProxy; import org.apache.spark.sql.carbondata.execution.datasources.CarbonSparkDataSourceUtil; import org.apache.spark.sql.catalyst.InternalRow; -import org.apache.spark.sql.execution.vectorized.ColumnVectorUtils; +import org.apache.spark.sql.execution.vectorized.CarbonColumnVectorUtils; import org.apache.spark.sql.types.DecimalType; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; @@ -274,7 +274,7 @@ public void initBatch(MemoryMode memMode, StructType partitionColumns, if (partitionColumns != null) { int partitionIdx = fields.length; for (int i = 0; i < partitionColumns.fields().length; i++) { - ColumnVectorUtils.populate(vectorProxy.column(i + partitionIdx), partitionValues, i); + CarbonColumnVectorUtils.populate(vectorProxy.column(i + partitionIdx), partitionValues, i); vectorProxy.column(i + partitionIdx).setIsConstant(); } } diff --git a/integration/spark/src/main/java/org/apache/spark/sql/CarbonMergeIntoSQLCommand.scala b/integration/spark/src/main/java/org/apache/spark/sql/CarbonMergeIntoSQLCommand.scala index 67611fdf726..99e265c2f0a 100644 --- a/integration/spark/src/main/java/org/apache/spark/sql/CarbonMergeIntoSQLCommand.scala +++ b/integration/spark/src/main/java/org/apache/spark/sql/CarbonMergeIntoSQLCommand.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.AtomicRunnableCommand import org.apache.spark.sql.execution.command.mutation.merge._ import org.apache.spark.sql.functions.col @@ -116,4 +117,9 @@ case class CarbonMergeIntoSQLCommand(mergeInto: CarbonMergeIntoModel) } override protected def opName: String = "MERGE SQL COMMAND" + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]) + : LogicalPlan = { + this + } } diff --git a/integration/spark/src/main/java/org/apache/spark/sql/CarbonVectorProxy.java b/integration/spark/src/main/java/org/apache/spark/sql/CarbonVectorProxy.java index ab310bbca0c..50c25720677 100644 --- a/integration/spark/src/main/java/org/apache/spark/sql/CarbonVectorProxy.java +++ b/integration/spark/src/main/java/org/apache/spark/sql/CarbonVectorProxy.java @@ -25,7 +25,11 @@ import org.apache.spark.memory.MemoryMode; import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.execution.vectorized.WritableColumnVector; -import org.apache.spark.sql.types.*; +import org.apache.spark.sql.types.CalendarIntervalType; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.Decimal; +import org.apache.spark.sql.types.DecimalType; +import org.apache.spark.sql.types.StructType; import org.apache.spark.sql.vectorized.ColumnVector; import org.apache.spark.sql.vectorized.ColumnarArray; import org.apache.spark.sql.vectorized.ColumnarBatch; diff --git a/integration/spark/src/main/java/org/apache/spark/sql/execution/vectorized/CarbonColumnVectorUtils.java b/integration/spark/src/main/java/org/apache/spark/sql/execution/vectorized/CarbonColumnVectorUtils.java new file mode 100644 index 00000000000..7af769bc790 --- /dev/null +++ b/integration/spark/src/main/java/org/apache/spark/sql/execution/vectorized/CarbonColumnVectorUtils.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.vectorized; + +import java.math.BigInteger; + +import org.apache.spark.sql.catalyst.InternalRow; +import org.apache.spark.sql.types.CalendarIntervalType; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.DateType; +import org.apache.spark.sql.types.DayTimeIntervalType; +import org.apache.spark.sql.types.Decimal; +import org.apache.spark.sql.types.DecimalType; +import org.apache.spark.sql.types.TimestampNTZType; +import org.apache.spark.sql.types.TimestampType; +import org.apache.spark.sql.types.YearMonthIntervalType; +import org.apache.spark.unsafe.types.CalendarInterval; +import org.apache.spark.unsafe.types.UTF8String; + +public class CarbonColumnVectorUtils { + + /** + * Populates the entire `col` with `row[fieldIdx]` + */ + public static void populate(WritableColumnVector col, InternalRow row, int fieldIdx) { + int capacity = col.capacity; + DataType t = col.dataType(); + + if (row.isNullAt(fieldIdx)) { + col.putNulls(0, capacity); + } else { + if (t == DataTypes.BooleanType) { + col.putBooleans(0, capacity, row.getBoolean(fieldIdx)); + } else if (t == DataTypes.BinaryType) { + col.putByteArray(0, row.getBinary(fieldIdx)); + } else if (t == DataTypes.ByteType) { + col.putBytes(0, capacity, row.getByte(fieldIdx)); + } else if (t == DataTypes.ShortType) { + col.putShorts(0, capacity, row.getShort(fieldIdx)); + } else if (t == DataTypes.IntegerType) { + col.putInts(0, capacity, row.getInt(fieldIdx)); + } else if (t == DataTypes.LongType) { + col.putLongs(0, capacity, row.getLong(fieldIdx)); + } else if (t == DataTypes.FloatType) { + col.putFloats(0, capacity, row.getFloat(fieldIdx)); + } else if (t == DataTypes.DoubleType) { + col.putDoubles(0, capacity, row.getDouble(fieldIdx)); + } else if (t == DataTypes.StringType) { + UTF8String v = row.getUTF8String(fieldIdx); + byte[] bytes = v.getBytes(); + for (int i = 0; i < capacity; i++) { + col.putByteArray(i, bytes); + } + } else if (t instanceof DecimalType) { + DecimalType dt = (DecimalType)t; + Decimal d = row.getDecimal(fieldIdx, dt.precision(), dt.scale()); + if (dt.precision() <= Decimal.MAX_INT_DIGITS()) { + col.putInts(0, capacity, (int)d.toUnscaledLong()); + } else if (dt.precision() <= Decimal.MAX_LONG_DIGITS()) { + col.putLongs(0, capacity, d.toUnscaledLong()); + } else { + final BigInteger integer = d.toJavaBigDecimal().unscaledValue(); + byte[] bytes = integer.toByteArray(); + for (int i = 0; i < capacity; i++) { + col.putByteArray(i, bytes, 0, bytes.length); + } + } + } else if (t instanceof CalendarIntervalType) { + CalendarInterval c = (CalendarInterval)row.get(fieldIdx, t); + col.getChild(0).putInts(0, capacity, c.months); + col.getChild(1).putInts(0, capacity, c.days); + col.getChild(2).putLongs(0, capacity, c.microseconds); + } else if (t instanceof DateType || t instanceof YearMonthIntervalType) { + col.putInts(0, capacity, row.getInt(fieldIdx)); + } else if (t instanceof TimestampType || t instanceof TimestampNTZType || + t instanceof DayTimeIntervalType) { + col.putLongs(0, capacity, row.getLong(fieldIdx)); + } else { + throw new RuntimeException(String.format("DataType %s is not supported" + + " in column vectorized reader.", t.sql())); + } + } + } +} diff --git a/integration/spark/src/main/scala/org/apache/carbondata/spark/load/CsvRDDHelper.scala b/integration/spark/src/main/scala/org/apache/carbondata/spark/load/CsvRDDHelper.scala index 4867e5f7ded..778cb13516b 100644 --- a/integration/spark/src/main/scala/org/apache/carbondata/spark/load/CsvRDDHelper.scala +++ b/integration/spark/src/main/scala/org/apache/carbondata/spark/load/CsvRDDHelper.scala @@ -25,6 +25,7 @@ import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.{TaskAttemptID, TaskType} import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat, FileSplit} import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl +import org.apache.spark.paths.SparkPath import org.apache.spark.rdd.RDD import org.apache.spark.sql.{CarbonToSparkAdapter, SparkSession} import org.apache.spark.sql.catalyst.InternalRow @@ -70,7 +71,7 @@ object CsvRDDHelper { totalLength = totalLength + fileSplit.getLength PartitionedFile( InternalRow.empty, - fileSplit.getPath.toString, + SparkPath.fromPath(fileSplit.getPath), fileSplit.getStart, fileSplit.getLength, fileSplit.getLocations) @@ -111,7 +112,7 @@ object CsvRDDHelper { // 2. read function val readFunction = getReadFunction(hadoopConf) - new FileScanRDD(spark, readFunction, partitions) + new FileScanRDD(spark, readFunction, partitions, null) } /** @@ -132,7 +133,7 @@ object CsvRDDHelper { val tableBlock = distributable.asInstanceOf[TableBlockInfo] PartitionedFile( InternalRow.empty, - tableBlock.getFilePath, + SparkPath.fromPathString(tableBlock.getFilePath), tableBlock.getBlockOffset, tableBlock.getBlockLength, tableBlock.getLocations) @@ -146,7 +147,7 @@ object CsvRDDHelper { // 2. read function val readFunction = getReadFunction(hadoopConf) - new FileScanRDD(spark, readFunction, partitions) + new FileScanRDD(spark, readFunction, partitions, null) } private def getReadFunction(configuration: Configuration): (PartitionedFile => @@ -161,7 +162,7 @@ object CsvRDDHelper { val hadoopAttemptContext = new TaskAttemptContextImpl(FileFactory.getConfiguration, attemptId) val inputSplit = - new FileSplit(new Path(file.filePath), file.start, file.length, file.locations) + new FileSplit(file.filePath.toPath, file.start, file.length, file.locations) var finished = false val inputFormat = new CSVInputFormat() val reader = inputFormat.createRecordReader(inputSplit, hadoopAttemptContext) diff --git a/integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala b/integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala index 90f1f8b5d3a..c48ed4c2c17 100644 --- a/integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala +++ b/integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala @@ -570,27 +570,7 @@ class CarbonScanRDD[T: ClassTag]( throw new java.util.NoSuchElementException("End of stream") } havePair = false - val value = reader.getCurrentValue - if (CarbonProperties.getInstance() - .getProperty(CarbonCommonConstants.CARBON_SPARK_VERSION_SPARK3, - CarbonCommonConstants.CARBON_SPARK_VERSION_SPARK3_DEFAULT).toBoolean && - timeStampProjectionColumns.nonEmpty) { - value match { - case row: GenericInternalRow if needRebaseTimeValue(reader) => - // rebase timestamp data by converting julian to Gregorian time - timeStampProjectionColumns.foreach { - projectionColumnWithIndex => - val timeStampData = row.get(projectionColumnWithIndex._2, - org.apache.spark.sql.types.DataTypes.TimestampType) - if (null != timeStampData) { - row.update(projectionColumnWithIndex._2, - CarbonToSparkAdapter.rebaseTime(timeStampData.asInstanceOf[Long])) - } - } - case _ => - } - } - value + reader.getCurrentValue } } @@ -622,17 +602,6 @@ class CarbonScanRDD[T: ClassTag]( } } - def needRebaseTimeValue(reader: RecordReader[Void, Object]): Boolean = { - // carbonDataFileWrittenVersion will be in the format x.x.x-SNAPSHOT - // (eg., 2.1.0-SNAPSHOT), get the version name and check if the data file is - // written before 2.2.0 version, then rebase timestamp value - reader.isInstanceOf[CarbonRecordReader[T]] && - null != reader.asInstanceOf[CarbonRecordReader[T]].getCarbonDataFileWrittenVersion && - reader.asInstanceOf[CarbonRecordReader[T]].getCarbonDataFileWrittenVersion - .split(CarbonCommonConstants.HYPHEN).head - .compareTo(CarbonCommonConstants.CARBON_SPARK3_VERSION) < 0 - } - private def addTaskCompletionListener(split: Partition, context: TaskContext, queryStartTime: Long, @@ -648,9 +617,9 @@ class CarbonScanRDD[T: ClassTag]( context.getClass.getDeclaredField("onCompleteCallbacks") onCompleteCallbacksField.setAccessible(true) val listeners = onCompleteCallbacksField.get(context) - .asInstanceOf[ArrayBuffer[TaskCompletionListener]] + .asInstanceOf[java.util.Stack[TaskCompletionListener]] - val isAdded = listeners.exists(p => p.isInstanceOf[CarbonLoadTaskCompletionListener]) + val isAdded = listeners.asScala.exists(p => p.isInstanceOf[CarbonLoadTaskCompletionListener]) model.setFreeUnsafeMemory(!isAdded) // add task completion before calling initialize as initialize method will internally // call for usage of unsafe method for processing of one blocklet and if there is any diff --git a/integration/spark/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala b/integration/spark/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala index 5b3914b9b58..8293c91932a 100644 --- a/integration/spark/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala +++ b/integration/spark/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala @@ -35,21 +35,17 @@ import org.apache.spark.{SparkContext, SparkEnv} import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier} import org.apache.spark.sql.catalyst.expressions.{UnsafeArrayData, UnsafeMapData, UnsafeRow} import org.apache.spark.sql.execution.command.{ColumnProperty, Field, PartitionerField} -import org.apache.spark.sql.types.{ArrayType, DataType, DateType, DecimalType, MapType, StringType, StructField, StructType, TimestampType} +import org.apache.spark.sql.types._ import org.apache.spark.util.FileUtils import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException import org.apache.carbondata.common.logging.LogServiceFactory import org.apache.carbondata.core.constants.CarbonCommonConstants -import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.keygenerator.directdictionary.timestamp.DateDirectDictionaryGenerator import org.apache.carbondata.core.memory.{UnsafeMemoryManager, UnsafeSortMemoryManager} -import org.apache.carbondata.core.metadata.CarbonMetadata import org.apache.carbondata.core.metadata.datatype.DataTypes import org.apache.carbondata.core.metadata.schema.table.CarbonTable -import org.apache.carbondata.core.statusmanager.SegmentStatusManager import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil, DataTypeUtil, ThreadLocalTaskInfo} -import org.apache.carbondata.core.util.path.CarbonTablePath import org.apache.carbondata.processing.datatypes.{ArrayDataType, GenericDataType, StructDataType} import org.apache.carbondata.processing.loading.CarbonDataLoadConfiguration import org.apache.carbondata.processing.loading.complexobjects.{ArrayObject, StructObject} diff --git a/integration/spark/src/main/scala/org/apache/carbondata/streamer/Source.scala b/integration/spark/src/main/scala/org/apache/carbondata/streamer/Source.scala index 6103f57f7be..2f721c491c5 100644 --- a/integration/spark/src/main/scala/org/apache/carbondata/streamer/Source.scala +++ b/integration/spark/src/main/scala/org/apache/carbondata/streamer/Source.scala @@ -23,10 +23,10 @@ import scala.collection.JavaConverters._ import org.apache.avro.Schema import org.apache.avro.Schema.Type import org.apache.avro.generic.GenericRecord -import org.apache.spark.sql.{CarbonEnv, Row, SparkSession} -import org.apache.spark.sql.avro.{AvroDeserializer, SchemaConverters} +import org.apache.spark.sql.{CarbonEnv, Encoders, Row, SparkSession} +import org.apache.spark.sql.avro.{AvroFileFormatFactory, SchemaConverters} import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.encoders.RowEncoder +import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder} import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.execution.command.mutation.merge.CarbonMergeDataSetUtil import org.apache.spark.sql.functions.col @@ -162,7 +162,7 @@ abstract class Source { }.map { case (field, i) => (field.name(), field.defaultVal(), i) } - val encoder = RowEncoder.apply(sparkDataTypes).resolveAndBind() + val encoder = Encoders.row(sparkDataTypes).asInstanceOf[ExpressionEncoder[Row]] new Iterator[Row] { override def hasNext: Boolean = { iterator.hasNext @@ -174,7 +174,8 @@ abstract class Source { } val record = iterator.next() val avroWriteSchema = record.getSchema - var sparkAvroDeserializer = new AvroDeserializer(avroWriteSchema, sparkDataTypes) + var sparkAvroDeserializer = + AvroFileFormatFactory.getAvroDeserializer(avroWriteSchema, sparkDataTypes) val internalRow = sparkAvroDeserializer.deserialize(record).asInstanceOf[InternalRow] // update with the default values if the value is null if (avroWriteSchema.getFields.size() != sparkDataTypes.fields.length) { diff --git a/integration/spark/src/main/scala/org/apache/carbondata/trash/DataTrashManager.scala b/integration/spark/src/main/scala/org/apache/carbondata/trash/DataTrashManager.scala index 7b024204e65..5a528a8aa1a 100644 --- a/integration/spark/src/main/scala/org/apache/carbondata/trash/DataTrashManager.scala +++ b/integration/spark/src/main/scala/org/apache/carbondata/trash/DataTrashManager.scala @@ -19,7 +19,7 @@ package org.apache.carbondata.trash import scala.collection.JavaConverters._ -import org.apache.commons.lang.StringUtils +import org.apache.commons.lang3.StringUtils import org.apache.spark.sql.SparkSession import org.apache.spark.sql.hive.CarbonHiveIndexMetadataUtil diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonBoundReference.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonBoundReference.scala index 589e49c71f0..2acc1fc55e0 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonBoundReference.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonBoundReference.scala @@ -17,7 +17,32 @@ package org.apache.spark.sql -import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, ExprId, LeafExpression, NamedExpression} +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.types.DataType + +import org.apache.carbondata.core.scan.expression.ColumnExpression + +case class CarbonBoundReference(colExp: ColumnExpression, dataType: DataType, nullable: Boolean) + extends LeafExpression with NamedExpression with CodegenFallback { + + type EvaluatedType = Any + + override def toString: String = s"input[" + colExp.getColIndex + "]" + + override def eval(input: InternalRow): Any = input.get(colExp.getColIndex, dataType) + + override def name: String = colExp.getColumnName + + override def toAttribute: Attribute = throw new UnsupportedOperationException + + override def exprId: ExprId = throw new UnsupportedOperationException + + override def qualifier: Seq[String] = null + + override def newInstance(): NamedExpression = throw new UnsupportedOperationException +} object ExtractReferences { def apply(expr: Expression): Array[String] = { diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonCatalystOperators.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonCatalystOperators.scala index 4813aa561d9..d66f9d357db 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonCatalystOperators.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonCatalystOperators.scala @@ -38,6 +38,12 @@ case class ProjectForUpdate( columns: List[String], children: Seq[LogicalPlan]) extends LogicalPlan { override def output: Seq[Attribute] = Seq.empty + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]) + : LogicalPlan = { + copy( + children = newChildren) + } } case class UpdateTable( @@ -48,6 +54,11 @@ case class UpdateTable( filer: String) extends LogicalPlan { override def children: Seq[LogicalPlan] = Seq.empty override def output: Seq[Attribute] = Seq.empty + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]) + : LogicalPlan = { + this + } } case class DeleteRecords( @@ -56,6 +67,11 @@ case class DeleteRecords( table: UnresolvedRelation) extends LogicalPlan { override def children: Seq[LogicalPlan] = Seq.empty override def output: Seq[AttributeReference] = Seq.empty + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]) + : LogicalPlan = { + this + } } /** @@ -78,6 +94,14 @@ case class InsertIntoCarbonTable (table: CarbonDatasourceHadoopRelation, // This is the expected schema of the table prepared to be inserted into // including dynamic partition columns. val tableOutput = table.carbonRelation.output + + override def children: Seq[LogicalPlan] = Seq(child) + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]) + : LogicalPlan = { + copy( + child = newChildren.head) + } } /** diff --git a/integration/spark/src/main/spark3.1/org/apache/spark/sql/CarbonDataSourceScanHelper.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonDataSourceScanHelper.scala similarity index 96% rename from integration/spark/src/main/spark3.1/org/apache/spark/sql/CarbonDataSourceScanHelper.scala rename to integration/spark/src/main/scala/org/apache/spark/sql/CarbonDataSourceScanHelper.scala index 20a8f5bf30a..8077a94000d 100644 --- a/integration/spark/src/main/spark3.1/org/apache/spark/sql/CarbonDataSourceScanHelper.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonDataSourceScanHelper.scala @@ -21,7 +21,10 @@ import org.apache.spark.CarbonInputMetrics import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.{CatalogTablePartition, ExternalCatalogUtils} -import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression => SparkExpression, PlanExpression} +import org.apache.spark.sql.catalyst.expressions.{ + Attribute, Expression => SparkExpression, + PlanExpression +} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext import org.apache.spark.sql.execution.{DataSourceScanExec, WholeStageCodegenExec} import org.apache.spark.sql.execution.metric.SQLMetrics @@ -50,7 +53,7 @@ abstract class CarbonDataSourceScanHelper(relation: CarbonDatasourceHadoopRelati extends DataSourceScanExec { override lazy val supportsColumnar: Boolean = CarbonPlanHelper - .supportBatchedDataSource(sqlContext, output, extraRDD) + .supportBatchedDataSource(output, extraRDD) lazy val supportsBatchOrColumnar: Boolean = supportsColumnar diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonEnv.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonEnv.scala index b73772f0975..b61e314e60f 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonEnv.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonEnv.scala @@ -21,7 +21,7 @@ import java.util.concurrent.ConcurrentHashMap import org.apache.hadoop.fs.Path import org.apache.spark.internal.config.ConfigEntry -import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchTableException} import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.events.{MergeBloomIndexEventListener, MergeIndexEventListener} @@ -85,8 +85,7 @@ class CarbonEnv { sparkSession.udf.register("getBlockPaths", new BlockPathsUDF) // add NI as a temp function, for queries to not hit SI table, it will be added as HiveSimpleUDF CreateFunctionCommand( - databaseName = None, - functionName = "NI", + identifier = FunctionIdentifier("NI", None, None), className = "org.apache.spark.sql.hive.NonIndexUDFExpression", resources = Seq(), isTemp = true, @@ -140,10 +139,6 @@ class CarbonEnv { .addNonSerializableProperty(CarbonCommonConstants.IS_DRIVER_INSTANCE, "true") Profiler.initialize(sparkSession.sparkContext) CarbonToSparkAdapter.addSparkSessionListener(sparkSession) - if(sparkSession.sparkContext.version.startsWith("3.1")) { - CarbonProperties.getInstance().addProperty(CarbonCommonConstants - .CARBON_SPARK_VERSION_SPARK3, "true") - } initialized = true LOGGER.info("Initialize CarbonEnv completed...") } diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSession.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSession.scala index 52e7d514eb3..41c295c80c4 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSession.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSession.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql import java.io.File import java.util.concurrent.atomic.AtomicLong -import org.apache.commons.lang.StringUtils +import org.apache.commons.lang3.StringUtils import org.apache.hadoop.conf.Configuration import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.SparkSession.Builder diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSource.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSource.scala index fdf904ccc95..28e2bb573ce 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSource.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSource.scala @@ -24,7 +24,7 @@ import scala.collection.JavaConverters._ import scala.collection.mutable import scala.language.implicitConversions -import org.apache.commons.lang.StringUtils +import org.apache.commons.lang3.StringUtils import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogTableType} import org.apache.spark.sql.execution.streaming.Sink import org.apache.spark.sql.hive.CarbonMetaStore @@ -442,7 +442,7 @@ object CarbonSource { } else { throw ex } - case ex => throw ex + case ex: Throwable => throw ex } } } diff --git a/integration/spark/src/main/spark3.1/org/apache/spark/sql/CarbonToSparkAdapter.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonToSparkAdapter.scala similarity index 98% rename from integration/spark/src/main/spark3.1/org/apache/spark/sql/CarbonToSparkAdapter.scala rename to integration/spark/src/main/scala/org/apache/spark/sql/CarbonToSparkAdapter.scala index 9ab78880e2b..690a723a7cf 100644 --- a/integration/spark/src/main/spark3.1/org/apache/spark/sql/CarbonToSparkAdapter.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonToSparkAdapter.scala @@ -26,7 +26,8 @@ import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd} import org.apache.spark.sql.carbondata.execution.datasources.CarbonFileIndexReplaceRule import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, ExternalCatalogWithListener} -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, DynamicPruningSubquery, Expression, ExprId, NamedExpression, Predicate, ScalaUDF, SubqueryExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, + AttributeSet, DynamicPruningSubquery, Expression, ExprId, NamedExpression, Predicate, ScalaUDF} import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReference import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CustomDeterministicExpression.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CustomDeterministicExpression.scala index c88828be235..af9949f89b0 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/CustomDeterministicExpression.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/CustomDeterministicExpression.scala @@ -39,4 +39,9 @@ case class CustomDeterministicExpression(nonDt: Expression ) extends Expression override def genCode(ctx: CodegenContext): ExprCode = nonDt.genCode(ctx) override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = ev.copy() + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression + = { + this + } } diff --git a/integration/spark/src/main/spark2.4/org/apache/spark/sql/SparkSqlAdapter.scala b/integration/spark/src/main/scala/org/apache/spark/sql/SparkSqlAdapter.scala similarity index 88% rename from integration/spark/src/main/spark2.4/org/apache/spark/sql/SparkSqlAdapter.scala rename to integration/spark/src/main/scala/org/apache/spark/sql/SparkSqlAdapter.scala index e1c804e275d..d3ce3e2d33c 100644 --- a/integration/spark/src/main/spark2.4/org/apache/spark/sql/SparkSqlAdapter.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/SparkSqlAdapter.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.FileSourceScanExec import org.apache.spark.sql.execution.datasources.HadoopFsRelation import org.apache.spark.sql.types.StructType @@ -42,7 +43,12 @@ object SparkSqlAdapter { outputSchema, partitionFilters, None, + None, dataFilters, tableIdentifier) } + + def ofRows(sparkSession: SparkSession, logicalPlan: LogicalPlan): DataFrame = { + Dataset.ofRows(sparkSession, logicalPlan) + } } diff --git a/integration/spark/src/main/spark3.1/org/apache/spark/sql/SparkVersionAdapter.scala b/integration/spark/src/main/scala/org/apache/spark/sql/SparkVersionAdapter.scala similarity index 89% rename from integration/spark/src/main/spark3.1/org/apache/spark/sql/SparkVersionAdapter.scala rename to integration/spark/src/main/scala/org/apache/spark/sql/SparkVersionAdapter.scala index 805af13a683..5d58ac8b27c 100644 --- a/integration/spark/src/main/spark3.1/org/apache/spark/sql/SparkVersionAdapter.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/SparkVersionAdapter.scala @@ -34,9 +34,12 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSeq, Expression, Predicate, SortOrder} import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide} import org.apache.spark.sql.catalyst.parser.ParserUtils.operationNotAllowed -import org.apache.spark.sql.catalyst.parser.SqlBaseParser.{BucketSpecContext, ColTypeListContext, CreateTableHeaderContext, LocationSpecContext, PartitionFieldListContext, QueryContext, SkewSpecContext, TablePropertyListContext} +import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.catalyst.plans.{JoinType, QueryPlan} -import org.apache.spark.sql.catalyst.plans.logical.{CreateTableStatement, InsertIntoStatement, Join, JoinHint, LogicalPlan, OneRowRelation, QualifiedColType} +import org.apache.spark.sql.catalyst.plans.logical.{ + CreateTable, InsertIntoStatement, Join, + JoinHint, LogicalPlan, OneRowRelation, QualifiedColType, TableSpec +} import org.apache.spark.sql.catalyst.plans.physical.SinglePartition import org.apache.spark.sql.catalyst.util.{DateTimeUtils, RebaseDateTime, TimestampFormatter} import org.apache.spark.sql.execution.{ExplainMode, QueryExecution, ShuffledRowRDD, SimpleMode, SparkPlan, SQLExecution, UnaryExecNode} @@ -61,7 +64,7 @@ import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier import org.apache.carbondata.core.metadata.datatype.DataTypes import org.apache.carbondata.core.metadata.schema.SchemaReader -import org.apache.carbondata.core.util.{CarbonProperties, ThreadLocalSessionInfo} +import org.apache.carbondata.core.util.CarbonProperties import org.apache.carbondata.core.util.path.CarbonTablePath import org.apache.carbondata.spark.CarbonOption import org.apache.carbondata.spark.util.CarbonScalaUtil @@ -119,25 +122,6 @@ trait SparkVersionAdapter { DateTimeUtils.daysToLocalDate(date).toString } - /** - * Rebase the timestamp value from julian to gregorian time micros - */ - def rebaseTime(timestamp: Long): Long = { - RebaseDateTime.rebaseJulianToGregorianMicros(timestamp) - } - - def rebaseTime(timestamp: Long, carbonDataFileWrittenVersion: String): Long = { - // carbonDataFileWrittenVersion will be in the format x.x.x-SNAPSHOT(eg., 2.1.0-SNAPSHOT), - // get the version name and check if the data file is written before 2.2.0 version - if (null != carbonDataFileWrittenVersion && - carbonDataFileWrittenVersion.split(CarbonCommonConstants.HYPHEN).head - .compareTo(CarbonCommonConstants.CARBON_SPARK3_VERSION) < 0) { - RebaseDateTime.rebaseJulianToGregorianMicros(timestamp) - } else { - timestamp - } - } - // Note that due to this scala bug: https://github.com/scala/bug/issues/11016, we need to make // this function polymorphic for every scala version >= 2.12, otherwise an overloaded method // resolution error occurs at compile time. @@ -255,8 +239,8 @@ trait SparkVersionAdapter { * */ def createCarbonTable(createTableTuple: (CreateTableHeaderContext, SkewSpecContext, - BucketSpecContext, PartitionFieldListContext, ColTypeListContext, TablePropertyListContext, - LocationSpecContext, Option[String], TerminalNode, QueryContext, String), + BucketSpecContext, PartitionFieldListContext, CreateOrReplaceTableColTypeListContext, + PropertyListContext, LocationSpecContext, Option[String], TerminalNode, QueryContext, String), extraTableTuple: (Seq[StructField], Boolean, TableIdentifier, Boolean, Seq[String], Option[String], mutable.Map[String, String], Map[String, String], Seq[StructField], Seq[PartitionerField], CarbonSpark2SqlParser, SparkSession, @@ -429,7 +413,7 @@ trait SparkVersionAdapter { } def createDataset(sparkSession: SparkSession, qe: QueryExecution) : Dataset[Row] = { - new Dataset[Row](qe, RowEncoder(qe.analyzed.schema)) + new Dataset[Row](qe, RowEncoder.encoderFor(qe.analyzed.schema)) } def createSharedState(sparkContext: SparkContext) : SharedState = { @@ -474,21 +458,16 @@ trait SparkVersionAdapter { def getUpdatedPlan(plan: LogicalPlan, sqlText: String): LogicalPlan = { plan match { - case create@CreateTableStatement(_, _, _, _, properties, _, _, - location, _, _, _, _) => - if ( location.isDefined && + case create@CreateTable(_, _, _, table@TableSpec(properties, _, _, location, _, _, _), _) => + if (location.isDefined && !sqlText.toUpperCase.startsWith("CREATE EXTERNAL TABLE ")) { // add a property to differentiate if create table statement has external keyword or not - val newProperties = properties. +("hasexternalkeyword" -> "false") - CreateTableStatement(create.tableName, create.tableSchema, create.partitioning, - create.bucketSpec, newProperties, create.provider, create.options, - location, create.comment, create.serde, create.external, create.ifNotExists) - } else if (create.options.contains("latestversion")) { + val newProperties = properties + ("hasexternalkeyword" -> "false") + create.copy(tableSpec = table.copy(properties = newProperties)) + } else if (table.options.contains("latestversion")) { // remove latestversion property in options if present - val newOptions = create.options.filterNot(_._1.equalsIgnoreCase("latestversion")) - CreateTableStatement(create.tableName, create.tableSchema, create.partitioning, - create.bucketSpec, properties, create.provider, newOptions, - location, create.comment, create.serde, create.external, create.ifNotExists) + val newOptions = table.options.filterNot(_._1.equalsIgnoreCase("latestversion")) + create.copy(tableSpec = table.copy(options = newOptions)) } else { create } diff --git a/integration/spark/src/main/common2.4and3.1/org/apache/spark/sql/avro/AvroFileFormatFactory.scala b/integration/spark/src/main/scala/org/apache/spark/sql/avro/AvroFileFormatFactory.scala similarity index 87% rename from integration/spark/src/main/common2.4and3.1/org/apache/spark/sql/avro/AvroFileFormatFactory.scala rename to integration/spark/src/main/scala/org/apache/spark/sql/avro/AvroFileFormatFactory.scala index 7fcd5dc16c9..fddd623d788 100644 --- a/integration/spark/src/main/common2.4and3.1/org/apache/spark/sql/avro/AvroFileFormatFactory.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/avro/AvroFileFormatFactory.scala @@ -16,9 +16,11 @@ */ package org.apache.spark.sql.avro +import org.apache.avro.Schema import org.apache.spark.rdd.RDD import org.apache.spark.sql.Row import org.apache.spark.sql.execution.datasources.OutputWriterFactory +import org.apache.spark.sql.types.DataType object AvroFileFormatFactory { @@ -45,4 +47,8 @@ object AvroFileFormatFactory { .set("avro.mapred.ignore.inputs.without.extension", "false") spark.read.format("avro").load(s"$deltaPath").rdd } + + def getAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType): AvroDeserializer = { + new AvroDeserializer(rootAvroType, rootCatalystType, "EXCEPTION", false) + } } diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/carbondata/execution/datasources/CarbonFileIndex.scala b/integration/spark/src/main/scala/org/apache/spark/sql/carbondata/execution/datasources/CarbonFileIndex.scala index e6c8546ca84..29e8a9791df 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/carbondata/execution/datasources/CarbonFileIndex.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/carbondata/execution/datasources/CarbonFileIndex.scala @@ -116,7 +116,7 @@ case class CarbonFileIndex( dir.files.filter{f => f.getPath.getName.endsWith(CarbonTablePath.INDEX_FILE_EXT) || f.getPath.getName.endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT)}. - map(new HDFSCarbonFile(_)) + map(f => new HDFSCarbonFile(f.fileStatus)) }.toArray.asInstanceOf[Array[CarbonFile]] if (indexFiles.length == 0 && totalFiles > 0) { return directories diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/carbondata/execution/datasources/SparkCarbonFileFormat.scala b/integration/spark/src/main/scala/org/apache/spark/sql/carbondata/execution/datasources/SparkCarbonFileFormat.scala index 30646c32726..73e6f1228c8 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/carbondata/execution/datasources/SparkCarbonFileFormat.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/carbondata/execution/datasources/SparkCarbonFileFormat.scala @@ -36,8 +36,9 @@ import org.apache.spark.sql._ import org.apache.spark.sql.carbondata.execution.datasources.readsupport.SparkUnsafeRowReadSupport import org.apache.spark.sql.carbondata.execution.datasources.tasklisteners.{CarbonLoadTaskCompletionListenerImpl, CarbonQueryTaskCompletionListenerImpl} import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.JoinedRow +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, JoinedRow} import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection +import org.apache.spark.sql.catalyst.types.DataTypeUtils import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.sources.{DataSourceRegister, Filter} @@ -191,7 +192,7 @@ class SparkCarbonFileFormat extends FileFormat /** * Writer class for carbondata files */ - private class CarbonOutputWriter(path: String, + private class CarbonOutputWriter(val path: String, context: TaskAttemptContext, fieldTypes: Array[StructField]) extends OutputWriter with AbstractCarbonOutputWriter { @@ -397,10 +398,10 @@ class SparkCarbonFileFormat extends FileFormat file: PartitionedFile => { assert(file.partitionValues.numFields == partitionSchema.size) - - if (file.filePath.endsWith(CarbonTablePath.CARBON_DATA_EXT)) { + val filePath = file.filePath.toString + if (filePath.endsWith(CarbonTablePath.CARBON_DATA_EXT)) { val split = new CarbonInputSplit("null", - new Path(new URI(file.filePath)).toString, + new Path(new URI(filePath)).toString, file.start, file.length, file.locations, @@ -455,9 +456,10 @@ class SparkCarbonFileFormat extends FileFormat if (carbonReader.isInstanceOf[VectorizedCarbonRecordReader] && readVector) { iter.asInstanceOf[Iterator[InternalRow]] } else { - val fullSchema = requiredSchema.toAttributes ++ partitionSchema.toAttributes + val fullSchema = StructType(requiredSchema.fields).merge(partitionSchema) + .map(f => DataTypeUtils.toAttribute(f)) val joinedRow = new JoinedRow() - val appendPartitionColumns = GenerateUnsafeProjection.generate(fullSchema, fullSchema) + val appendPartitionColumns = GenerateUnsafeProjection.generate(fullSchema) if (partitionSchema.length == 0) { // There is no partition columns iter.asInstanceOf[Iterator[InternalRow]] @@ -506,13 +508,7 @@ case class CarbonSQLHadoopMapReduceCommitProtocol(jobId: String, path: String, i // Call only in case of carbon flow. if (carbonFlow != null) { val (allAbsPathFiles, allPartitionPaths) = - // spark 2.1 and 2.2 case - if (taskCommits.exists(_.obj.isInstanceOf[Map[String, String]])) { - (taskCommits.map(_.obj.asInstanceOf[Map[String, String]]), null) - } else { - // spark 2.3 and above taskCommits.map(_.obj.asInstanceOf[(Map[String, String], Set[String])]).unzip - } val filesToMove = allAbsPathFiles.foldLeft(Map[String, String]())(_ ++ _) val fs = new Path(path).getFileSystem(jobContext.getConfiguration) // Move files from stage directory to actual location. diff --git a/integration/spark/src/main/spark3.1/org/apache/spark/sql/execution/CarbonCodegenSupport.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/CarbonCodegenSupport.scala similarity index 100% rename from integration/spark/src/main/spark3.1/org/apache/spark/sql/execution/CarbonCodegenSupport.scala rename to integration/spark/src/main/scala/org/apache/spark/sql/execution/CarbonCodegenSupport.scala diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/CarbonTakeOrderedAndProjectExec.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/CarbonTakeOrderedAndProjectExec.scala index 8102a0c6aae..d743a7613f0 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/CarbonTakeOrderedAndProjectExec.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/CarbonTakeOrderedAndProjectExec.scala @@ -113,4 +113,7 @@ case class CarbonTakeOrderedAndProjectExec( } } + override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = { + withNewChildren(Seq(newChild)) + } } diff --git a/integration/spark/src/main/common2.4and3.1/org/apache/spark/sql/execution/CreateDataSourceTableCommand.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/CreateDataSourceTableCommand.scala similarity index 92% rename from integration/spark/src/main/common2.4and3.1/org/apache/spark/sql/execution/CreateDataSourceTableCommand.scala rename to integration/spark/src/main/scala/org/apache/spark/sql/execution/CreateDataSourceTableCommand.scala index c1a7d7f2e64..b2a5be48572 100644 --- a/integration/spark/src/main/common2.4and3.1/org/apache/spark/sql/execution/CreateDataSourceTableCommand.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/CreateDataSourceTableCommand.scala @@ -19,7 +19,8 @@ package org.apache.spark.sql.execution import org.apache.log4j.Logger import org.apache.spark.sql.{AnalysisException, Row, SparkSession} -import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, CatalogUtils} +import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.util.CreateTableCommonUtil.getCatalogTable @@ -48,6 +49,11 @@ case class CreateDataSourceTableCommand(table: CatalogTable, ignoreIfExists: Boo sessionState.catalog.createTable(newTable, ignoreIfExists = false, validateLocation = false) Seq.empty[Row] } + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]) + : LogicalPlan = { + this + } } object CreateDataSourceTableCommand { diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/index/DropIndexCommand.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/index/DropIndexCommand.scala index f73ce3780be..a79c752a05a 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/index/DropIndexCommand.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/index/DropIndexCommand.scala @@ -22,6 +22,7 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.{CarbonEnv, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.NoSuchTableException +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.hive.{CarbonHiveIndexMetadataUtil, CarbonMetaStore, CarbonRelation} import org.apache.spark.sql.index.CarbonIndexUtil @@ -263,4 +264,9 @@ private[sql] case class DropIndexCommand( CarbonInternalMetastore .removeTableFromMetadataCache(dbName, parentTableName)(sparkSession) } + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]) + : LogicalPlan = { + this + } } diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonInsertIntoHadoopFsRelationCommand.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonInsertIntoHadoopFsRelationCommand.scala index cfa19dbd99e..bf698b551c7 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonInsertIntoHadoopFsRelationCommand.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonInsertIntoHadoopFsRelationCommand.scala @@ -63,13 +63,13 @@ case class CarbonInsertIntoHadoopFsRelationCommand( fileIndex: Option[FileIndex], outputColumnNames: Seq[String]) extends DataWritingCommand { + import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.escapePathName override def run(sparkSession: SparkSession, child: SparkPlan): Seq[Row] = { // Most formats don't do well with duplicate columns, so lets not allow that SchemaUtils.checkColumnNameDuplication( outputColumnNames, - s"when inserting into $outputPath", sparkSession.sessionState.conf.caseSensitiveAnalysis) val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(options) @@ -274,4 +274,8 @@ case class CarbonInsertIntoHadoopFsRelationCommand( } }.toMap } + + override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = { + copy(query = newChild) + } } diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala index 1497740be1d..c3cf9eb3c2b 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala @@ -26,6 +26,8 @@ import scala.collection.mutable import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.types.DataTypeUtils import org.apache.spark.sql.execution.command.{AtomicRunnableCommand, DataLoadTableFileMapping} import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, LogicalRelation, SparkCarbonTableFormat} import org.apache.spark.sql.hive.CarbonHiveIndexMetadataUtil @@ -296,7 +298,7 @@ case class CarbonLoadDataCommand(databaseNameOp: Option[String], options = options.toMap)(sparkSession = sparkSession) CarbonReflectionUtils.getLogicalRelation(hdfsRelation, - hdfsRelation.schema.toAttributes, + DataTypeUtils.toAttributes(hdfsRelation.schema), Some(catalogTable), false) } @@ -309,4 +311,9 @@ case class CarbonLoadDataCommand(databaseNameOp: Option[String], "LOAD DATA" } } + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]) + : LogicalPlan = { + this + } } diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/management/CommonLoadUtils.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/management/CommonLoadUtils.scala index 9a75895d1e6..0c43d799ea8 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/management/CommonLoadUtils.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/management/CommonLoadUtils.scala @@ -460,7 +460,8 @@ object CommonLoadUtils { isNoRearrangeFlow: Boolean, table: CarbonTable, partition: Map[String, Option[String]]): (LogicalPlan, Int, Option[RDD[InternalRow]]) = { - val catalogAttributes = catalogTable.schema.toAttributes + val catalogAttributes = catalogTable.schema.map( + x => AttributeReference(x.name, x.dataType, x.nullable, x.metadata)()) // Converts the data as per the loading steps before give it to writer or sorter var attributes = curAttributes.map(a => { catalogAttributes.find(_.name.equalsIgnoreCase(a.name)).get @@ -746,12 +747,14 @@ object CommonLoadUtils { if (options.contains(DataLoadProcessorConstants.NO_REARRANGE_OF_ROWS)) { CarbonReflectionUtils.getLogicalRelation(hdfsRelation, - metastoreSchema.toAttributes, + metastoreSchema.map( + x => AttributeReference(x.name, x.dataType, x.nullable, x.metadata)()), Some(catalogTable), false) } else { CarbonReflectionUtils.getLogicalRelation(hdfsRelation, - hdfsRelation.schema.toAttributes, + hdfsRelation.schema.map( + x => AttributeReference(x.name, x.dataType, x.nullable, x.metadata)()), Some(catalogTable), false) } @@ -872,7 +875,8 @@ object CommonLoadUtils { CarbonThreadUtil.threadSet("partition.operationcontext", loadParams.operationContext) val attributes = if (loadParams.scanResultRDD.isDefined) { // take the already re-arranged attributes - catalogTable.schema.toAttributes + catalogTable.schema.map( + x => AttributeReference(x.name, x.dataType, x.nullable, x.metadata)()) } else { // input data from csv files. Convert to logical plan val allCols = new ArrayBuffer[String]() @@ -881,7 +885,8 @@ object CommonLoadUtils { allCols ++= table.getVisibleMeasures.asScala.map(_.getColName) StructType( allCols.filterNot(_.equals(CarbonCommonConstants.DEFAULT_INVISIBLE_DUMMY_MEASURE)).map( - StructField(_, StringType))).toAttributes + StructField(_, StringType))).map( + x => AttributeReference(x.name, x.dataType, x.nullable, x.metadata)()) } var partitionsLen = 0 val sortScope = CarbonDataProcessorUtil.getSortScope(loadParams.carbonLoadModel.getSortScope) @@ -1093,7 +1098,7 @@ object CommonLoadUtils { overwrite = false, ifPartitionNotExists = false) SparkUtil.setNullExecutionId(loadParams.sparkSession) - Dataset.ofRows(loadParams.sparkSession, convertedPlan).collect() + SparkSqlAdapter.ofRows(loadParams.sparkSession, convertedPlan).collect() } catch { case ex: Throwable => val (executorMessage, errorMessage) = CarbonScalaUtil.retrieveAndLogErrorMsg(ex, LOGGER) diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/mutation/merge/CarbonMergeDataSetCommand.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/mutation/merge/CarbonMergeDataSetCommand.scala index 53f5c26612f..eebb2ed6181 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/mutation/merge/CarbonMergeDataSetCommand.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/mutation/merge/CarbonMergeDataSetCommand.scala @@ -34,7 +34,8 @@ import org.apache.spark.sql.avro.AvroFileFormatFactory import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, GenericInternalRow} -import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} +import org.apache.spark.sql.catalyst.types.DataTypeUtils import org.apache.spark.sql.execution.{LogicalRDD, ProjectExec} import org.apache.spark.sql.execution.command.{DataCommand, ExecutionErrors, UpdateTableModel} import org.apache.spark.sql.execution.command.mutation.HorizontalCompaction @@ -462,7 +463,7 @@ case class CarbonMergeDataSetCommand( insertHistOfUpdate, insertHistOfDelete) val loadDF = Dataset.ofRows(sparkSession, - LogicalRDD(targetSchema.toAttributes, + LogicalRDD(targetSchema.map(f => DataTypeUtils.toAttribute(f)), processedRDD)(sparkSession)) loadDF.cache() @@ -870,4 +871,9 @@ case class CarbonMergeDataSetCommand( } override protected def opName: String = "MERGE DATASET" + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]) + : LogicalPlan = { + this + } } diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/mutation/merge/MergeProjection.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/mutation/merge/MergeProjection.scala index 96b765a7fe5..adb534660c5 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/mutation/merge/MergeProjection.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/mutation/merge/MergeProjection.scala @@ -17,11 +17,10 @@ package org.apache.spark.sql.execution.command.mutation.merge -import scala.collection.mutable - import org.apache.spark.sql.{CarbonDatasourceHadoopRelation, CarbonToSparkAdapter, Dataset, Row, SparkSession} import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, InterpretedPredicate} +import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} +import org.apache.spark.sql.catalyst.types.DataTypeUtils import org.apache.spark.sql.types.StructType /** @@ -44,7 +43,7 @@ case class MergeProjection( val targetTableAttributes = rltn.carbonRelation.output val indexesToFetch: Seq[(Expression, Int)] = { - val existingDsOutput = rltn.carbonRelation.schema.toAttributes + val existingDsOutput = DataTypeUtils.toAttributes(rltn.carbonRelation.schema) val literalToAttributeMap: collection.mutable.Map[Expression, Attribute] = collection.mutable.Map.empty[Expression, Attribute] val colsMap = mergeAction match { diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/package.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/package.scala index d721192a40c..a0c15b68bb0 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/package.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/package.scala @@ -23,6 +23,8 @@ import scala.language.implicitConversions import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.NoSuchTableException +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.types.DataTypeUtils import org.apache.spark.sql.hive.CarbonRelation import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.SparkSQLUtil @@ -54,7 +56,7 @@ object Checker { .carbonMetaStore .lookupRelation(databaseNameOp, tableName)(sparkSession) .asInstanceOf[CarbonRelation] - (StructType.fromAttributes(relation.output), relation.carbonTable) + (DataTypeUtils.fromAttributes(relation.output), relation.carbonTable) } } @@ -136,6 +138,11 @@ abstract class MetadataCommand override def run(sparkSession: SparkSession): Seq[Row] = { runWithAudit(processMetadata, sparkSession) } + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]) + : LogicalPlan = { + this + } } /** @@ -145,6 +152,11 @@ abstract class DataCommand extends RunnableCommand with DataProcessOperation wit override def run(sparkSession: SparkSession): Seq[Row] = { runWithAudit(processData, sparkSession) } + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]) + : LogicalPlan = { + this + } } /** @@ -166,6 +178,7 @@ abstract class AtomicRunnableCommand throw e } }, sparkSession) + } /** @@ -180,4 +193,9 @@ abstract class AtomicRunnableCommand LogServiceFactory.getLogService(this.getClass.getCanonicalName).error(msg) Seq.empty } + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]) + : LogicalPlan = { + this + } } diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateDataSourceTableCommand.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateDataSourceTableCommand.scala index 934812b4435..d47beb8c5f5 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateDataSourceTableCommand.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateDataSourceTableCommand.scala @@ -21,6 +21,7 @@ import org.apache.hadoop.hive.metastore.api.AlreadyExistsException import org.apache.spark.sql.{AnalysisException, CarbonEnv, CarbonSource, Row, SparkSession} import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.{CreateDataSourceTableCommand, DropTableCommand, MetadataCommand} import org.apache.spark.sql.execution.datasources.PartitioningUtils @@ -92,7 +93,7 @@ case class CarbonCreateDataSourceTableCommand( } else { throw ex } - case ex => + case ex: Throwable => throw ex } @@ -114,4 +115,8 @@ case class CarbonCreateDataSourceTableCommand( rows } + override protected def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]) + : LogicalPlan = { + this + } } diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkCarbonTableFormat.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkCarbonTableFormat.scala index b0d1809f3b9..f3afc9cf6ef 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkCarbonTableFormat.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkCarbonTableFormat.scala @@ -472,7 +472,7 @@ private trait AbstractCarbonOutputWriter { def writeCarbon(row: InternalRow): Unit } -private class CarbonOutputWriter(path: String, +private class CarbonOutputWriter(val path: String, context: TaskAttemptContext, nonPartitionFieldTypes: Seq[DataType], taskNo : String, diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/joins/BroadCastPolygonFilterPushJoin.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/joins/BroadCastPolygonFilterPushJoin.scala index 1c083436f25..5940b2aef07 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/joins/BroadCastPolygonFilterPushJoin.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/joins/BroadCastPolygonFilterPushJoin.scala @@ -112,6 +112,11 @@ case class BroadCastPolygonFilterPushJoin( } override def output: Seq[Attribute] = left.output ++ right.output + + override protected def withNewChildrenInternal(newLeft: SparkPlan, + newRight: SparkPlan): SparkPlan = { + copy(left = newLeft, right = newRight) + } } object BroadCastPolygonFilterPushJoin { diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonPlanHelper.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonPlanHelper.scala index 8c7e8251fa6..7f8879f144c 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonPlanHelper.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonPlanHelper.scala @@ -266,9 +266,9 @@ object CarbonPlanHelper { vectorizedReader.toBoolean } - def supportBatchedDataSource(sqlContext: SQLContext, - cols: Seq[Attribute], + def supportBatchedDataSource(cols: Seq[Attribute], extraRDD: Option[(RDD[InternalRow], Boolean)]): Boolean = { + val sqlContext = SparkSession.getActiveSession.orNull.sqlContext vectorReaderEnabled() && extraRDD.getOrElse((null, true))._2 && sqlContext.conf.wholeStageEnabled && diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonSourceStrategy.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonSourceStrategy.scala index 9564f446e7d..bdf5d617bed 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonSourceStrategy.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonSourceStrategy.scala @@ -74,7 +74,7 @@ private[sql] object CarbonSourceStrategy extends SparkStrategy { projects foreach { case alias: Alias if alias.child.isInstanceOf[Expression] => alias.child match { - case Cast(s: ScalaUDF, _, _) => validateGeoUtilUDFs(s) + case Cast(s: ScalaUDF, _, _, _) => validateGeoUtilUDFs(s) case s: ScalaUDF => validateGeoUtilUDFs(s) case _ => } @@ -323,8 +323,7 @@ private[sql] object CarbonSourceStrategy extends SparkStrategy { } else { requiredColumns } - val supportBatch = CarbonPlanHelper.supportBatchedDataSource(relation.relation.sqlContext, - updateRequestedColumns, extraRDD) + val supportBatch = CarbonPlanHelper.supportBatchedDataSource(updateRequestedColumns, extraRDD) if (directScanSupport && !supportBatch && filterSet.nonEmpty && !filterSet.toSeq.exists(_.dataType.isInstanceOf[ArrayType])) { // revert for row scan diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DDLHelper.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DDLHelper.scala index 1839820b846..52e30f651b6 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DDLHelper.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DDLHelper.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.strategy -import org.apache.commons.lang.StringUtils +import org.apache.commons.lang3.StringUtils import org.apache.hadoop.hive.metastore.api.InvalidOperationException import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.{CarbonParserUtil, TableIdentifier} @@ -315,16 +315,17 @@ object DDLHelper { showPartitionsCommand: ShowPartitionsCommand, sparkSession: SparkSession): RunnableCommand = { val tableName = showPartitionsCommand.tableName + val output = showPartitionsCommand.output val cols = showPartitionsCommand.spec val carbonTable = getTable(tableName, sparkSession) if (!carbonTable.isHivePartitionTable) { showPartitionsCommand } else { if (cols.isDefined) { - ShowPartitionsCommand(tableName, + ShowPartitionsCommand(tableName, output, Option(CarbonSparkSqlParserUtil.copyTablePartition(cols.get))) } else { - ShowPartitionsCommand(tableName, None) + ShowPartitionsCommand(tableName, output, None) } } } diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DDLStrategy.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DDLStrategy.scala index f4f8c793175..91c2c5e0742 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DDLStrategy.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DDLStrategy.scala @@ -134,9 +134,7 @@ object DDLStrategy extends SparkStrategy { if (EnvHelper.isLegacy(sparkSession)) { Nil } else { - DataWritingCommandExec( - DDLHelper.createCarbonFileHiveTableAsSelect(ctas), - planLater(ctas.query)) :: Nil + ExecutedCommandExec(DDLHelper.createCarbonFileHiveTableAsSelect(ctas)) :: Nil } case showCreateTable: ShowCreateTableCommand if isCarbonTable(showCreateTable.table) => diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DMLStrategy.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DMLStrategy.scala index 9236e1e1dcb..bcf549341a6 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DMLStrategy.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DMLStrategy.scala @@ -94,7 +94,7 @@ object DMLStrategy extends SparkStrategy { val dataType = StringType var children: Seq[Expression] = mutable.Seq.empty val geoHashColumn = condition.get.children.head match { - case Cast(attr: AttributeReference, _, _) => + case Cast(attr: AttributeReference, _, _, _) => attr case attr: AttributeReference => attr @@ -252,7 +252,7 @@ object DMLStrategy extends SparkStrategy { case join: Join => ExtractEquiJoinKeys.unapply(join) match { // TODO: Spark is using hints now, carbon also should use join hints - case Some(x) => Some(x._1, x._2, x._3, x._4, x._5, x._6) + case Some(x) => Some(x._1, x._2, x._3, x._4, x._6, x._7) case None => None } case _ => None @@ -395,7 +395,7 @@ case class UnionCommandExec(cmd: RunnableCommand) extends LeafExecNode { protected[sql] lazy val sideEffectResult: Seq[InternalRow] = { val converter = CatalystTypeConverters.createToCatalystConverter(schema) - val internalRow = cmd.run(sqlContext.sparkSession).map(converter(_).asInstanceOf[InternalRow]) + val internalRow = cmd.run(session).map(converter(_).asInstanceOf[InternalRow]) val unsafeProjection = UnsafeProjection.create(output.map(_.dataType).toArray) // To make GenericInternalRow to UnsafeRow val row = unsafeProjection(internalRow.head) @@ -405,7 +405,7 @@ case class UnionCommandExec(cmd: RunnableCommand) extends LeafExecNode { override def output: Seq[Attribute] = cmd.output protected override def doExecute(): RDD[InternalRow] = { - sqlContext.sparkContext.parallelize(sideEffectResult, 1) + sparkContext.parallelize(sideEffectResult, 1) } } diff --git a/integration/spark/src/main/spark3.1/org/apache/spark/sql/hive/CarbonAnalyzer.scala b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonAnalyzer.scala similarity index 100% rename from integration/spark/src/main/spark3.1/org/apache/spark/sql/hive/CarbonAnalyzer.scala rename to integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonAnalyzer.scala diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonHiveIndexMetadataUtil.scala b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonHiveIndexMetadataUtil.scala index 24a05804e36..857c0badfd5 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonHiveIndexMetadataUtil.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonHiveIndexMetadataUtil.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.hive import scala.collection.JavaConverters._ import org.apache.hadoop.hive.ql.exec.UDF +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF import org.apache.spark.sql.{CarbonEnv, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.Expression @@ -161,14 +162,16 @@ object CarbonHiveIndexMetadataUtil { def transformToRemoveNI(expression: Expression): Expression = { expression.transform { - case hiveUDF: HiveSimpleUDF if hiveUDF.function.isInstanceOf[NonIndexUDFExpression] => - hiveUDF.asInstanceOf[HiveSimpleUDF].children.head + case hiveUDF: HiveSimpleUDF if hiveUDF.funcWrapper + .createFunction[UDF]().isInstanceOf[NonIndexUDFExpression] => + hiveUDF.children.head } } def checkNIUDF(condition: Expression): Boolean = { condition match { - case hiveUDF: HiveSimpleUDF if hiveUDF.function.isInstanceOf[NonIndexUDFExpression] => true + case hiveUDF: HiveSimpleUDF if hiveUDF.funcWrapper + .createFunction[UDF]().isInstanceOf[NonIndexUDFExpression] => true case _ => false } } diff --git a/integration/spark/src/main/spark3.1/org/apache/spark/sql/hive/CarbonSessionStateBuilder.scala b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonSessionStateBuilder.scala similarity index 93% rename from integration/spark/src/main/spark3.1/org/apache/spark/sql/hive/CarbonSessionStateBuilder.scala rename to integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonSessionStateBuilder.scala index 5ecc38183c4..a4e6d62092a 100644 --- a/integration/spark/src/main/spark3.1/org/apache/spark/sql/hive/CarbonSessionStateBuilder.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonSessionStateBuilder.scala @@ -23,8 +23,8 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.spark.sql.{CarbonEnv, SparkSession} import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry} -import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTablePartition, ExternalCatalogWithListener, FunctionResourceLoader, GlobalTempViewManager} +import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry, TableFunctionRegistry} +import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTablePartition, ExternalCatalogWithListener, FunctionExpressionBuilder, FunctionResourceLoader, GlobalTempViewManager} import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.parser.ParserInterface import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan @@ -53,19 +53,23 @@ class CarbonHiveSessionCatalog( externalCatalog: HiveExternalCatalog, globalTempViewManager: GlobalTempViewManager, functionRegistry: FunctionRegistry, + tableFunctionRegistry: TableFunctionRegistry, sparkSession: SparkSession, conf: SQLConf, hadoopConf: Configuration, parser: ParserInterface, - functionResourceLoader: FunctionResourceLoader) + functionResourceLoader: FunctionResourceLoader, + functionExpressionBuilder: FunctionExpressionBuilder) extends HiveSessionCatalog ( () => externalCatalog, () => globalTempViewManager, new HiveMetastoreCatalog(sparkSession), functionRegistry, + tableFunctionRegistry, hadoopConf, parser, - functionResourceLoader + functionResourceLoader, + functionExpressionBuilder ) with CarbonSessionCatalog { private lazy val carbonEnv = { @@ -144,7 +148,7 @@ class CarbonHiveSessionCatalog( */ class CarbonSessionStateBuilder(sparkSession: SparkSession, parentState: Option[SessionState] = None) - extends HiveSessionStateBuilder(sparkSession, parentState, Map.empty) { + extends HiveSessionStateBuilder(sparkSession, parentState) { override lazy val sqlParser: ParserInterface = new CarbonSparkSqlParser(conf, sparkSession) @@ -164,11 +168,13 @@ class CarbonSessionStateBuilder(sparkSession: SparkSession, externalCatalog, session.sharedState.globalTempViewManager, functionRegistry, + tableFunctionRegistry, sparkSession, conf, SessionState.newHadoopConf(session.sparkContext.hadoopConfiguration, conf), sqlParser, - resourceLoader) + resourceLoader, + HiveUDFExpressionBuilder) parentState.foreach(_.catalog.copyStateTo(catalog)) catalog } diff --git a/integration/spark/src/main/spark3.1/org/apache/spark/sql/hive/CarbonSqlAstBuilder.scala b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonSqlAstBuilder.scala similarity index 83% rename from integration/spark/src/main/spark3.1/org/apache/spark/sql/hive/CarbonSqlAstBuilder.scala rename to integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonSqlAstBuilder.scala index fcfa54817cc..0b91b1cb6e1 100644 --- a/integration/spark/src/main/spark3.1/org/apache/spark/sql/hive/CarbonSqlAstBuilder.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonSqlAstBuilder.scala @@ -18,12 +18,15 @@ package org.apache.spark.sql.hive import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.catalyst.parser.ParserUtils.string -import org.apache.spark.sql.catalyst.parser.SqlBaseParser.{AddTableColumnsContext, CreateTableContext, CreateTableLikeContext} +import org.apache.spark.sql.catalyst.parser.ParserUtils +import org.apache.spark.sql.catalyst.parser.SqlBaseParser.{AddTableColumnsContext, CreateTableContext} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.SparkSqlAstBuilder import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.parser.{CarbonHelperSqlAstBuilder, CarbonSpark2SqlParser, CarbonSparkSqlParserUtil} +import org.apache.spark.sql.parser.{ + CarbonHelperSqlAstBuilder, CarbonSpark2SqlParser, + CarbonSparkSqlParserUtil +} class CarbonSqlAstBuilder(conf: SQLConf, parser: CarbonSpark2SqlParser, sparkSession: SparkSession) extends SparkSqlAstBuilder with SqlAstBuilderHelper { @@ -44,12 +47,13 @@ class CarbonSqlAstBuilder(conf: SQLConf, parser: CarbonSpark2SqlParser, sparkSes val commentSpecContext = if (commentSpec.isEmpty) { null } else { - string(commentSpec.get(0).STRING()) + ParserUtils.string(commentSpec.get(0).COMMENT()) } val createTableTuple = (ctx.createTableHeader, createTableClauses.skewSpec(0), - createTableClauses.bucketSpec(0), createTableClauses.partitioning, ctx.colTypeList(), - createTableClauses.tablePropertyList(0), createTableClauses.locationSpec(0), - Option(commentSpecContext), ctx.AS, ctx.query, fileStorage) + createTableClauses.bucketSpec(0), createTableClauses.partitioning, + ctx.createOrReplaceTableColTypeList(), createTableClauses.tableProps, + createTableClauses.locationSpec(0), Option(commentSpecContext), ctx.AS, ctx.query, + fileStorage) helper.createCarbonTable(createTableTuple) } else { super.visitCreateTable(ctx) diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CreateCarbonSourceTableAsSelectCommand.scala b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CreateCarbonSourceTableAsSelectCommand.scala index 675b1b84195..8ecbceb98b6 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CreateCarbonSourceTableAsSelectCommand.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CreateCarbonSourceTableAsSelectCommand.scala @@ -22,7 +22,7 @@ import java.net.URI import org.apache.spark.sql.{AnalysisException, Dataset, Row, SaveMode, SparkSession} import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, CatalogUtils} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.execution.command.{AlterTableRecoverPartitionsCommand, AtomicRunnableCommand} +import org.apache.spark.sql.execution.command.{AtomicRunnableCommand, RepairTableCommand} import org.apache.spark.sql.execution.datasources.{DataSource, HadoopFsRelation} import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.util.CarbonReflectionUtils @@ -85,7 +85,11 @@ case class CreateCarbonSourceTableAsSelectCommand( case fs: HadoopFsRelation if table.partitionColumnNames.nonEmpty && sparkSession.sqlContext.conf.manageFilesourcePartitions => // Need to recover partitions into the metastore so our saved data is visible. - sessionState.executePlan(AlterTableRecoverPartitionsCommand(table.identifier)).toRdd + sessionState + .executePlan(RepairTableCommand(table.identifier, + enableAddPartitions = true, + enableDropPartitions = false)) + .toRdd } } @@ -130,4 +134,9 @@ case class CreateCarbonSourceTableAsSelectCommand( } override protected def opName: String = "CREATE TABLE AS SELECT" + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]) + : LogicalPlan = { + copy(query = newChildren.head) + } } diff --git a/integration/spark/src/main/spark3.1/org/apache/spark/sql/hive/SqlAstBuilderHelper.scala b/integration/spark/src/main/scala/org/apache/spark/sql/hive/SqlAstBuilderHelper.scala similarity index 90% rename from integration/spark/src/main/spark3.1/org/apache/spark/sql/hive/SqlAstBuilderHelper.scala rename to integration/spark/src/main/scala/org/apache/spark/sql/hive/SqlAstBuilderHelper.scala index 08c053825ff..a38fa572a81 100644 --- a/integration/spark/src/main/spark3.1/org/apache/spark/sql/hive/SqlAstBuilderHelper.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/hive/SqlAstBuilderHelper.scala @@ -17,21 +17,22 @@ package org.apache.spark.sql.hive -import java.util - -import scala.collection.JavaConverters._ - +import collection.JavaConverters._ import org.apache.spark.sql.CarbonToSparkAdapter import org.apache.spark.sql.catalyst.CarbonParserUtil import org.apache.spark.sql.catalyst.parser.SqlBaseParser -import org.apache.spark.sql.catalyst.parser.SqlBaseParser.{AddTableColumnsContext, CreateTableContext, HiveChangeColumnContext} +import org.apache.spark.sql.catalyst.parser.SqlBaseParser.{ + AddTableColumnsContext, + CreateTableContext, HiveChangeColumnContext +} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, QualifiedColType} import org.apache.spark.sql.execution.SparkSqlAstBuilder import org.apache.spark.sql.execution.command.{AlterTableAddColumnsModel, AlterTableDataTypeChangeModel} import org.apache.spark.sql.execution.command.schema.{CarbonAlterTableAddColumnCommand, CarbonAlterTableColRenameDataTypeChangeCommand} import org.apache.spark.sql.execution.command.table.CarbonExplainCommand import org.apache.spark.sql.parser.CarbonSpark2SqlParser -import org.apache.spark.sql.types.{DecimalType, StructField} +import org.apache.spark.sql.types.DecimalType + trait SqlAstBuilderHelper extends SparkSqlAstBuilder { @@ -46,7 +47,7 @@ trait SqlAstBuilderHelper extends SparkSqlAstBuilder { case _ => (newColumn.dataType.typeName.toLowerCase, None) } - val fullTableName = visitMultipartIdentifier(ctx.table) + val fullTableName = visitMultipartIdentifier(ctx.table.multipartIdentifier()) val alterTableColRenameAndDataTypeChangeModel = AlterTableDataTypeChangeModel( CarbonParserUtil.parseDataType(newColumn.name, typeString, values), @@ -67,7 +68,7 @@ trait SqlAstBuilderHelper extends SparkSqlAstBuilder { .map(typedVisit[QualifiedColType]).toSeq val fields = CarbonToSparkAdapter.getField(parser, col) val tblProperties = scala.collection.mutable.Map.empty[String, String] - val fullTableName = visitMultipartIdentifier(ctx.multipartIdentifier) + val fullTableName = visitMultipartIdentifier(ctx.identifierReference().multipartIdentifier()) val tableModel = CarbonParserUtil.prepareTableModel(ifNotExistPresent = false, CarbonParserUtil.convertDbNameToLowerCase(Option(fullTableName.head)), fullTableName(1).toLowerCase, diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/hive/execution/command/CarbonHiveCommands.scala b/integration/spark/src/main/scala/org/apache/spark/sql/hive/execution/command/CarbonHiveCommands.scala index a3e3b41c109..27983d75701 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/hive/execution/command/CarbonHiveCommands.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/hive/execution/command/CarbonHiveCommands.scala @@ -22,6 +22,7 @@ import org.apache.spark.sql.{CarbonEnv, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchTableException} import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.command.table.CarbonDropTableCommand @@ -86,6 +87,11 @@ case class CarbonDropDatabaseCommand(command: DropDatabaseCommand) CarbonUtil.dropDatabaseDirectory(carbonDatabaseLocation) rows } + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]) + : LogicalPlan = { + this + } } case class CarbonSetCommand(command: SetCommand) @@ -105,6 +111,10 @@ case class CarbonSetCommand(command: SetCommand) override protected def opName: String = "SET" + override protected def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]) + : LogicalPlan = { + this + } } object CarbonSetCommand { diff --git a/integration/spark/src/main/spark3.1/org/apache/spark/sql/hive/execution/command/CarbonResetCommand.scala b/integration/spark/src/main/scala/org/apache/spark/sql/hive/execution/command/CarbonResetCommand.scala similarity index 92% rename from integration/spark/src/main/spark3.1/org/apache/spark/sql/hive/execution/command/CarbonResetCommand.scala rename to integration/spark/src/main/scala/org/apache/spark/sql/hive/execution/command/CarbonResetCommand.scala index 784e8c7227d..14c648792b1 100644 --- a/integration/spark/src/main/spark3.1/org/apache/spark/sql/hive/execution/command/CarbonResetCommand.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/hive/execution/command/CarbonResetCommand.scala @@ -29,6 +29,11 @@ case class CarbonResetCommand() CarbonEnv.getInstance(sparkSession).carbonSessionInfo.getSessionParams.clear() ResetCommand(None).run(sparkSession) } + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]) + : LogicalPlan = { + this + } } /** diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala b/integration/spark/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala index 2dd8c234f05..312ddb8b619 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala @@ -89,17 +89,17 @@ object CarbonFilters { translateEqualTo(a.name, v, columnTypes) case EqualTo(Literal(v, _), a: Attribute) => translateEqualTo(a.name, v, columnTypes) - case c@EqualTo(Cast(_: Attribute, _, _), _: Literal) => + case c@EqualTo(Cast(_: Attribute, _, _, _), _: Literal) => CastExpressionOptimization.checkIfCastCanBeRemove(c) - case c@EqualTo(_: Literal, Cast(_: Attribute, _, _)) => + case c@EqualTo(_: Literal, Cast(_: Attribute, _, _, _)) => CastExpressionOptimization.checkIfCastCanBeRemove(c) case Not(EqualTo(a: Attribute, Literal(v, _))) => translateNotEqualTo(a.name, v, columnTypes) case Not(EqualTo(Literal(v, _), a: Attribute)) => translateNotEqualTo(a.name, v, columnTypes) - case c@Not(EqualTo(Cast(_: Attribute, _, _), _: Literal)) => + case c@Not(EqualTo(Cast(_: Attribute, _, _, _), _: Literal)) => CastExpressionOptimization.checkIfCastCanBeRemove(c) - case c@Not(EqualTo(_: Literal, Cast(_: Attribute, _, _))) => + case c@Not(EqualTo(_: Literal, Cast(_: Attribute, _, _, _))) => CastExpressionOptimization.checkIfCastCanBeRemove(c) case IsNotNull(a: Attribute) => translateNotEqualTo(a.name, null, columnTypes, true) @@ -109,9 +109,9 @@ object CarbonFilters { translateNotIn(a.name, list.map(e => e.eval(EmptyRow)), columnTypes) case In(a: Attribute, list) if list.forall(_.isInstanceOf[Literal]) => translateIn(a.name, list.map(e => e.eval(EmptyRow)), columnTypes) - case c@Not(In(Cast(_: Attribute, _, _), list)) if list.forall(_.isInstanceOf[Literal]) => + case c@Not(In(Cast(_: Attribute, _, _, _), list)) if list.forall(_.isInstanceOf[Literal]) => Some(transformExpression(c)) - case c@In(Cast(_: Attribute, _, _), list) if list.forall(_.isInstanceOf[Literal]) => + case c@In(Cast(_: Attribute, _, _, _), list) if list.forall(_.isInstanceOf[Literal]) => Some(transformExpression(c)) case InSet(a: Attribute, set) => translateIn(a.name, set.toSeq, columnTypes) @@ -121,33 +121,33 @@ object CarbonFilters { translateGreaterThan(a.name, v, columnTypes) case GreaterThan(Literal(v, _), a: Attribute) => translateLessThan(a.name, v, columnTypes) - case c@GreaterThan(Cast(_: Attribute, _, _), _: Literal) => + case c@GreaterThan(Cast(_: Attribute, _, _, _), _: Literal) => CastExpressionOptimization.checkIfCastCanBeRemove(c) - case c@GreaterThan(_: Literal, Cast(_: Attribute, _, _)) => + case c@GreaterThan(_: Literal, Cast(_: Attribute, _, _, _)) => CastExpressionOptimization.checkIfCastCanBeRemove(c) case LessThan(a: Attribute, Literal(v, _)) => translateLessThan(a.name, v, columnTypes) case LessThan(Literal(v, _), a: Attribute) => translateGreaterThan(a.name, v, columnTypes) - case c@LessThan(Cast(_: Attribute, _, _), _: Literal) => + case c@LessThan(Cast(_: Attribute, _, _, _), _: Literal) => CastExpressionOptimization.checkIfCastCanBeRemove(c) - case c@LessThan(_: Literal, Cast(_: Attribute, _, _)) => + case c@LessThan(_: Literal, Cast(_: Attribute, _, _, _)) => CastExpressionOptimization.checkIfCastCanBeRemove(c) case GreaterThanOrEqual(a: Attribute, Literal(v, _)) => translateGreaterThanEqual(a.name, v, columnTypes) case GreaterThanOrEqual(Literal(v, _), a: Attribute) => translateLessThanEqual(a.name, v, columnTypes) - case c@GreaterThanOrEqual(Cast(_: Attribute, _, _), _: Literal) => + case c@GreaterThanOrEqual(Cast(_: Attribute, _, _, _), _: Literal) => CastExpressionOptimization.checkIfCastCanBeRemove(c) - case c@GreaterThanOrEqual(_: Literal, Cast(_: Attribute, _, _)) => + case c@GreaterThanOrEqual(_: Literal, Cast(_: Attribute, _, _, _)) => CastExpressionOptimization.checkIfCastCanBeRemove(c) case LessThanOrEqual(a: Attribute, Literal(v, _)) => translateLessThanEqual(a.name, v, columnTypes) case LessThanOrEqual(Literal(v, _), a: Attribute) => translateGreaterThanEqual(a.name, v, columnTypes) - case c@LessThanOrEqual(Cast(_: Attribute, _, _), Literal(v, t)) => + case c@LessThanOrEqual(Cast(_: Attribute, _, _, _), Literal(v, t)) => CastExpressionOptimization.checkIfCastCanBeRemove(c) - case c@LessThanOrEqual(_: Literal, Cast(_: Attribute, _, _)) => + case c@LessThanOrEqual(_: Literal, Cast(_: Attribute, _, _, _)) => CastExpressionOptimization.checkIfCastCanBeRemove(c) case StartsWith(a: Attribute, Literal(v, _)) if v.toString.nonEmpty => translateStartsWith(a.name, v, columnTypes) @@ -159,7 +159,7 @@ object CarbonFilters { Some(new FalseExpression(null)) case ArrayContains(a: Attribute, Literal(v, _)) => translateArrayContains(a, v, columnTypes) - case ac@ArrayContains(Cast(_: Attribute, _, _), _: Literal) => + case ac@ArrayContains(Cast(_: Attribute, _, _, _), _: Literal) => CastExpressionOptimization.checkIfCastCanBeRemove(EqualTo(ac.left, ac.right)) case _ => None } diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/optimizer/MVMatcher.scala b/integration/spark/src/main/scala/org/apache/spark/sql/optimizer/MVMatcher.scala index 4b64ae357b9..dc6ad05602e 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/optimizer/MVMatcher.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/optimizer/MVMatcher.scala @@ -1569,6 +1569,8 @@ private object SelectSelectGroupbyChildDelta ev: ExprCode): ExprCode = ev override def dataType: DataType = alias.dataType + + override protected def withNewChildInternal(newChild: Expression): Expression = this } } diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/optimizer/MVRewrite.scala b/integration/spark/src/main/scala/org/apache/spark/sql/optimizer/MVRewrite.scala index 48fb93f45c7..45da2dbb73d 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/optimizer/MVRewrite.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/optimizer/MVRewrite.scala @@ -542,25 +542,27 @@ class MVRewrite(catalog: MVCatalogInSpark, logicalPlan: LogicalPlan, subsumeeTable: LeafNode, subsumerIndex: Int, subsumeeIndex: Int): Boolean = { - (subsumerTable, subsumeeTable) match { - case _: (ModularRelation, ModularRelation) => - val subsumerTableParent = subsumer.find(plan => plan.children.contains(subsumerTable)).get - val subsumeeTableParent = subsumee.find(plan => plan.children.contains(subsumeeTable)).get - (subsumerTableParent, subsumeeTableParent) match { - case (subsumerSelect: Select, subsumeeSelect: Select) => - val intersectJoinEdges = subsumeeSelect.joinEdges intersect subsumerSelect.joinEdges - if (intersectJoinEdges.nonEmpty) { - return intersectJoinEdges.exists( - join => - join.left == subsumerIndex && - join.left == subsumeeIndex || join.right == subsumerIndex && - join.right == subsumeeIndex - ) - } - case _ => return false + if (!subsumeeTable.isInstanceOf[ModularRelation] + || !subsumeeTable.isInstanceOf[ModularRelation]) { + return true + } + + val subsumerTableParent = subsumer.find(plan => plan.children.contains(subsumerTable)).get + val subsumeeTableParent = subsumee.find(plan => plan.children.contains(subsumeeTable)).get + (subsumerTableParent, subsumeeTableParent) match { + case (subsumerSelect: Select, subsumeeSelect: Select) => + val intersectJoinEdges = subsumeeSelect.joinEdges intersect subsumerSelect.joinEdges + if (intersectJoinEdges.nonEmpty) { + return intersectJoinEdges.exists( + join => + join.left == subsumerIndex && + join.left == subsumeeIndex || join.right == subsumerIndex && + join.right == subsumeeIndex + ) } + true + case _ => false } - true } // add Select operator as placeholder on top of subsumee to facilitate matching diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/optimizer/MVRewriteRule.scala b/integration/spark/src/main/scala/org/apache/spark/sql/optimizer/MVRewriteRule.scala index b5e983fc493..a0ed7a1da70 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/optimizer/MVRewriteRule.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/optimizer/MVRewriteRule.scala @@ -56,7 +56,7 @@ class MVRewriteRule(session: SparkSession) extends Rule[LogicalPlan] { try { tryRewritePlan(logicalPlan) } catch { - case e => + case e: Throwable => // if exception is thrown while rewriting the query, will fallback to original query plan. MVRewriteRule.LOGGER .warn("Failed to rewrite plan with mv. Enable debug log to check the Exception") diff --git a/integration/spark/src/main/spark3.1/org/apache/spark/sql/parser/CarbonExtensionSqlParser.scala b/integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonExtensionSqlParser.scala similarity index 100% rename from integration/spark/src/main/spark3.1/org/apache/spark/sql/parser/CarbonExtensionSqlParser.scala rename to integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonExtensionSqlParser.scala diff --git a/integration/spark/src/main/spark3.1/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala b/integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala similarity index 89% rename from integration/spark/src/main/spark3.1/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala rename to integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala index 49978d4e3c2..d5aa38c9499 100644 --- a/integration/spark/src/main/spark3.1/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala @@ -16,12 +16,11 @@ */ package org.apache.spark.sql.parser -import scala.collection.mutable - +import collection.JavaConverters._ import org.antlr.v4.runtime.tree.TerminalNode import org.apache.spark.sql.{CarbonThreadUtil, CarbonToSparkAdapter, SparkSession} -import org.apache.spark.sql.catalyst.parser.{AbstractSqlParser, SqlBaseParser} import org.apache.spark.sql.catalyst.parser.ParserUtils.operationNotAllowed +import org.apache.spark.sql.catalyst.parser.SqlBaseParser import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.{SparkSqlAstBuilder, SparkSqlParser} @@ -81,23 +80,24 @@ class CarbonSparkSqlParser(conf: SQLConf, sparkSession: SparkSession) extends Sp class CarbonHelperSqlAstBuilder(conf: SQLConf, parser: CarbonSpark2SqlParser, sparkSession: SparkSession) - extends SparkSqlAstBuilderWrapper(conf) { + extends SparkSqlAstBuilder { /** * Parse a key-value map from a [[TablePropertyListContext]], assuming all values are specified. */ - override def visitPropertyKeyValues(ctx: TablePropertyListContext): Map[String, String] = { - val props = visitTablePropertyList(ctx) + override def visitPropertyKeyValues(ctx: PropertyListContext): Map[String, String] = { + val props = visitPropertyKeyValues(ctx) CarbonSparkSqlParserUtil.visitPropertyKeyValues(ctx, props) } - def getPropertyKeyValues(ctx: TablePropertyListContext): Map[String, String] = { + def getPropertyKeyValues(ctx: PropertyListContext): Map[String, String] = { Option(ctx).map(visitPropertyKeyValues) .getOrElse(Map.empty) } def createCarbonTable(createTableTuple: (CreateTableHeaderContext, SkewSpecContext, - BucketSpecContext, PartitionFieldListContext, ColTypeListContext, TablePropertyListContext, - LocationSpecContext, Option[String], TerminalNode, QueryContext, String)): LogicalPlan = { + BucketSpecContext, PartitionFieldListContext, CreateOrReplaceTableColTypeListContext, + PropertyListContext, LocationSpecContext, Option[String], TerminalNode, QueryContext, String) + ): LogicalPlan = { val (tableHeader, skewSpecContext, bucketSpecContext, @@ -111,8 +111,10 @@ class CarbonHelperSqlAstBuilder(conf: SQLConf, provider) = createTableTuple val (tableIdent, temp, ifNotExists, external) = visitCreateTableHeader(tableHeader) - val tableIdentifier = CarbonToSparkAdapter.getTableIdentifier(tableIdent) - val cols: Seq[StructField] = Option(columns).toSeq.flatMap(visitColTypeList) + val parts = visitMultipartIdentifier(tableIdent.multipartIdentifier()) + val tableIdentifier = CarbonToSparkAdapter.getTableIdentifier(parts) + val cols: Seq[StructField] = columns.createOrReplaceTableColType() + .asScala.map(visitCreateOrReplaceTableColType) val colNames: Seq[String] = CarbonSparkSqlParserUtil .validateCreateTableReqAndGetColumns(tableHeader, skewSpecContext, diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParserUtil.scala b/integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParserUtil.scala index d4b313c2dd8..94fd47638c1 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParserUtil.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParserUtil.scala @@ -339,7 +339,7 @@ object CarbonSparkSqlParserUtil { * @param props Map of table property list * @return Map of transformed table property. */ - def visitPropertyKeyValues(ctx: TablePropertyListContext, + def visitPropertyKeyValues(ctx: PropertyListContext, props: Map[String, String]): Map[String, String] = { val badKeys = props.filter { case (_, v) => v == null }.keys if (badKeys.nonEmpty) { @@ -383,7 +383,7 @@ object CarbonSparkSqlParserUtil { def validateCreateTableReqAndGetColumns(tableHeader: CreateTableHeaderContext, skewSpecContext: SkewSpecContext, bucketSpecContext: BucketSpecContext, - columns: ColTypeListContext, + columns: CreateOrReplaceTableColTypeListContext, cols: Seq[StructField], tableIdentifier: TableIdentifier, isTempTable: Boolean): Seq[String] = { @@ -406,7 +406,7 @@ object CarbonSparkSqlParserUtil { colNames } - def checkIfDuplicateColumnExists(columns: ColTypeListContext, + def checkIfDuplicateColumnExists(columns: CreateOrReplaceTableColTypeListContext, tableIdentifier: TableIdentifier, colNames: Seq[String]): Unit = { if (colNames.length != colNames.distinct.length) { @@ -436,7 +436,7 @@ object CarbonSparkSqlParserUtil { case Some(value) => val result = value.children.get(1).getText if (result.equalsIgnoreCase("by")) { - value.storageHandler().STRING().getSymbol.getText + value.storageHandler().stringLit().STRING_LITERAL().getSymbol.getText } else if (result.equalsIgnoreCase("as") && value.children.size() > 1) { value.children.get(2).getText } else { diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/command/SILoadCommand.scala b/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/command/SILoadCommand.scala index 9bef2ae22aa..0b56ffcbcd5 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/command/SILoadCommand.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/command/SILoadCommand.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.secondaryindex.command import scala.collection.JavaConverters._ import org.apache.spark.sql.{CarbonEnv, Row, SparkSession, SQLContext} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.hive.CarbonRelation import org.apache.spark.sql.index.CarbonIndexUtil @@ -169,4 +170,9 @@ private[sql] case class LoadDataForSecondaryIndex(indexModel: IndexModel) extend loadMetadataDetails } } + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]) + : LogicalPlan = { + this + } } diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/hive/CarbonInternalMetastore.scala b/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/hive/CarbonInternalMetastore.scala index 4144963ff08..58cafc2824e 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/hive/CarbonInternalMetastore.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/hive/CarbonInternalMetastore.scala @@ -249,9 +249,9 @@ object CarbonInternalMetastore { } } } catch { - case e: Exception => + case _: Exception => // In case of creating a table, hive table will not be available. - LOGGER.error(e.getMessage) + // LOGGER.error(e.getMessage) } } // add cg and fg index info to table properties diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/joins/BroadCastSIFilterPushJoin.scala b/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/joins/BroadCastSIFilterPushJoin.scala index 527720aecc3..8a776721dc5 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/joins/BroadCastSIFilterPushJoin.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/joins/BroadCastSIFilterPushJoin.scala @@ -139,6 +139,11 @@ case class BroadCastSIFilterPushJoin( } def inputRDDs(): Seq[RDD[InternalRow]] = secondaryIndexRDD + + override protected def withNewChildrenInternal(newLeft: SparkPlan, + newRight: SparkPlan): SparkPlan = { + copy(left = newLeft, right = newRight) + } } object BroadCastSIFilterPushJoin { diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala b/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala index 11e1edfba1a..b23796e85b6 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala @@ -239,7 +239,7 @@ class CarbonSecondaryIndexOptimizer(sparkSession: SparkSession) { } private def createDF(sparkSession: SparkSession, logicalPlan: LogicalPlan): DataFrame = { - new Dataset[Row](sparkSession, logicalPlan, RowEncoder(logicalPlan.schema)) + new Dataset[Row](sparkSession, logicalPlan, RowEncoder.encoderFor(logicalPlan.schema)) } /** diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/test/ResourceRegisterAndCopier.scala b/integration/spark/src/main/scala/org/apache/spark/sql/test/ResourceRegisterAndCopier.scala deleted file mode 100644 index fee1bbe50b2..00000000000 --- a/integration/spark/src/main/scala/org/apache/spark/sql/test/ResourceRegisterAndCopier.scala +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.test - -import java.io._ -import java.net.URL -import java.util.zip.ZipFile - -import scala.collection.mutable.ArrayBuffer - -import org.apache.hadoop.io.IOUtils - -import org.apache.carbondata.common.logging.LogServiceFactory -import org.apache.carbondata.core.datastore.impl.FileFactory -import org.apache.carbondata.core.locks.HdfsFileLock -import org.apache.carbondata.core.util.CarbonUtil - -/** - * All the registered resources will be checked into hdfs and copies to it if not exists. - */ -object ResourceRegisterAndCopier { - - private val LOGGER = LogServiceFactory.getLogService(this.getClass.getCanonicalName) - - val link = "https://raw.githubusercontent" + - ".com/ravipesala/incubator-carbondata/sdv-test_data/integration/spark-common-test" + - "/src/test/resources" - - def copyResourcesifNotExists(hdfsPath: String, - resourcePath: String, - dataFilesPath: String): Unit = { - val file = FileFactory.getCarbonFile(hdfsPath) - if (!file.exists()) { - sys.error(s"""Provided path $hdfsPath does not exist""") - } - LOGGER.info("Try downloading resource data") - val lock = new HdfsFileLock(hdfsPath, "/resource.lock") - var bool = false - try { - bool = lockWithRetries(lock) - if (bool) { - val resources = readDataFiles(dataFilesPath) - resources.foreach { file => - val hdfsDataPath = hdfsPath + "/" + file - val rsFile = FileFactory.getCarbonFile(hdfsDataPath) - val target = resourcePath + "/" + file - if (!rsFile.exists()) { - if (file.lastIndexOf("/") > -1) { - new File(resourcePath + "/" + file.substring(0, file.lastIndexOf("/"))).mkdirs() - } - downloadFile(link, file, target) - // copy it - copyLocalFile(hdfsDataPath, target) - // Unzip the zip file to local directory - if (target.endsWith("zip")) { - unzip(target, new File(resourcePath + "/" + file.substring(0, file.lastIndexOf("/"))) - .getAbsolutePath) - } - new File(target).delete() - } else if (target.endsWith("zip")) { - if (new File(target).exists()) { - FileFactory.deleteAllFilesOfDir(new File(target)) - } - if (file.lastIndexOf("/") > -1) { - new File(resourcePath + "/" + file.substring(0, file.lastIndexOf("/"))).mkdirs() - } - downloadFile(link, file, target) - unzip(target, new File(resourcePath + "/" + file.substring(0, file.lastIndexOf("/"))) - .getAbsolutePath) - } - } - } - } finally { - if (bool) { - lock.unlock() - } - } - } - - def lockWithRetries(lock: HdfsFileLock): Boolean = { - try { - var i = 0 - while (i < 10) { - if (lock.lock()) { - return true - } else { - Thread.sleep(30 * 1000L) - } - i += 1 - } - } catch { - case _: InterruptedException => - return false - } - false - } - - def readDataFiles(dataFilesPath: String): Seq[String] = { - val buffer = new ArrayBuffer[String]() - val reader = new BufferedReader(new FileReader(dataFilesPath)) - var line = reader.readLine() - while (line != null) { - buffer += line - line = reader.readLine() - } - reader.close() - buffer - } - - def copyLocalFile(dst: String, - src: String): Unit = { - LOGGER.info(s"Copying file : $src to $dst") - if (FileFactory.isFileExist(src)) { - val dataOutputStream = FileFactory.getDataOutputStream(dst) - val dataInputStream = FileFactory.getDataInputStream(src) - IOUtils.copyBytes(dataInputStream, dataOutputStream, 8 * 1024) - CarbonUtil.closeStream(dataInputStream) - CarbonUtil.closeStream(dataOutputStream) - } - } - - def downloadFile(relativeLink: String, fileToDownLoad: String, targetFile: String): Unit = { - import java.io.FileOutputStream - val link = relativeLink + "/" + fileToDownLoad - LOGGER.info(s"Downloading file $link") - val url = new URL(link) - val c = url.openConnection - c.setRequestProperty("User-Agent", - "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.0.3705; .NET CLR 1.1.4322;" + - " .NET CLR 1.2.30703)") - - val input = c.getInputStream - val buffer = new Array[Byte](4096) - var n = input.read(buffer) - - val output = new FileOutputStream(new File(targetFile)) - while (n != -1) { - output.write(buffer, 0, n) - n = input.read(buffer) - } - output.close() - input.close() - } - - private def unzip(zipFilePath: String, destDir: String) = { - LOGGER.info(s"Uncompressing $zipFilePath to the directory $destDir") - try { - val zipFile = new ZipFile(zipFilePath) - val enu = zipFile.entries - while ( { enu.hasMoreElements }) { - val zipEntry = enu.nextElement - val name = destDir + "/" + zipEntry.getName - val file = new File(name) - if (name.endsWith("/")) { - file.mkdirs - } else { - val parent = file.getParentFile - if (parent != null) { - parent.mkdirs - } - val is = zipFile.getInputStream(zipEntry) - val fos = new FileOutputStream(file) - val bytes = new Array[Byte](1024) - var length = is.read(bytes) - while (length >= 0) { - fos.write(bytes, 0, length) - length = is.read(bytes) - } - is.close - fos.close() - } - } - zipFile.close - } catch { - case e: IOException => - e.printStackTrace() - } - } - -} diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/test/SparkTestQueryExecutor.scala b/integration/spark/src/main/scala/org/apache/spark/sql/test/SparkTestQueryExecutor.scala index 00285ca31c5..e24e78aa90a 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/test/SparkTestQueryExecutor.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/test/SparkTestQueryExecutor.scala @@ -82,14 +82,6 @@ object SparkTestQueryExecutor { .getOrCreate() spark.experimental.extraOptimizations = Seq(new CarbonFileIndexReplaceRule) CarbonEnv.getInstance(spark) - if (warehouse.startsWith("hdfs://")) { - System.setProperty(CarbonCommonConstants.HDFS_TEMP_LOCATION, warehouse) - CarbonProperties.getInstance().addProperty(CarbonCommonConstants.LOCK_TYPE, - CarbonCommonConstants.CARBON_LOCK_TYPE_HDFS) - ResourceRegisterAndCopier. - copyResourcesifNotExists(hdfsUrl, s"$integrationPath/spark/src/test/resources", - s"$integrationPath//spark-common-cluster-test/src/test/resources/testdatafileslist.txt") - } if (System.getProperty("useIndexServer") != null) { CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_INDEX_SERVER_IP, "localhost") diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/test/TestQueryExecutor.scala b/integration/spark/src/main/scala/org/apache/spark/sql/test/TestQueryExecutor.scala index e50cc65c1f1..9c8054bde59 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/test/TestQueryExecutor.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/test/TestQueryExecutor.scala @@ -162,7 +162,7 @@ object TestQueryExecutor { jarsLocal } - lazy val INSTANCE = lookupQueryExecutor.newInstance().asInstanceOf[TestQueryExecutorRegister] + lazy val INSTANCE = new SparkTestQueryExecutor() CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "FORCE") .addProperty(CarbonCommonConstants.CARBON_ENABLE_AUDIT, "false") @@ -171,11 +171,6 @@ object TestQueryExecutor { .addProperty(CarbonCommonConstants.CARBON_MAX_EXECUTOR_LRU_CACHE_SIZE, "1024") .addProperty(CarbonCommonConstants.CARBON_MINMAX_ALLOWED_BYTE_COUNT, "40") - private def lookupQueryExecutor: Class[_] = { - ServiceLoader.load(classOf[TestQueryExecutorRegister], Utils.getContextOrSparkClassLoader) - .iterator().next().getClass - } - private def createDirectory(badStoreLocation: String) = { FileFactory.mkdirs(badStoreLocation) } diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/util/SparkSQLUtil.scala b/integration/spark/src/main/scala/org/apache/spark/sql/util/SparkSQLUtil.scala index 272bea1dc8b..5dbe863449a 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/util/SparkSQLUtil.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/util/SparkSQLUtil.scala @@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.{AttributeSeq, NamedExpression} import org.apache.spark.sql.catalyst.optimizer.{CheckCartesianProducts, EliminateOuterJoin, NullPropagation, PullupCorrelatedPredicates, RemoveRedundantAliases, ReorderJoin} import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan, Statistics, Union} import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.types.DataTypeUtils import org.apache.spark.sql.execution.LogicalRDD import org.apache.spark.sql.internal.{SessionState, SQLConf} import org.apache.spark.sql.types.StructType @@ -45,7 +46,7 @@ object SparkSQLUtil { } def execute(rdd: RDD[InternalRow], schema: StructType, sparkSession: SparkSession): DataFrame = { - execute(LogicalRDD(schema.toAttributes, rdd)(sparkSession), sparkSession) + execute(LogicalRDD(DataTypeUtils.toAttributes(schema), rdd)(sparkSession), sparkSession) } def getSparkSession: SparkSession = { diff --git a/integration/spark/src/main/scala/org/apache/spark/util/CarbonReflectionUtils.scala b/integration/spark/src/main/scala/org/apache/spark/util/CarbonReflectionUtils.scala index 107a86a91fd..7c56d3eac66 100644 --- a/integration/spark/src/main/scala/org/apache/spark/util/CarbonReflectionUtils.scala +++ b/integration/spark/src/main/scala/org/apache/spark/util/CarbonReflectionUtils.scala @@ -167,7 +167,7 @@ object CarbonReflectionUtils { mode: SaveMode, query: LogicalPlan, physicalPlan: SparkPlan): BaseRelation = { - dataSourceObj.writeAndRead(mode, query, query.output.map(_.name), physicalPlan) + dataSourceObj.writeAndRead(mode, query, query.output.map(_.name)) } /** diff --git a/integration/spark/src/main/spark2.3/com/databricks/spark/avro/AvroWriter.scala b/integration/spark/src/main/spark2.3/com/databricks/spark/avro/AvroWriter.scala deleted file mode 100644 index 8de31bec27f..00000000000 --- a/integration/spark/src/main/spark2.3/com/databricks/spark/avro/AvroWriter.scala +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.databricks.spark.avro - -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.Row -import org.apache.spark.sql.execution.datasources.OutputWriterFactory - -/** - * This class is to get the avro writer from databricks avro module, as its not present in spark2.3 - * and spark-avro module is included in spark project from spark-2.4. So for spark-2.4, we use Avro - * writer from spark project. - */ -object AvroWriter { - - def getWriter(spark: org.apache.spark.sql.SparkSession, - job: org.apache.hadoop.mapreduce.Job, - dataSchema: org.apache.spark.sql.types.StructType, - options: scala.Predef.Map[scala.Predef.String, scala.Predef.String] = Map.empty) - : OutputWriterFactory = { - new DefaultSource().prepareWrite(spark, job, - options, dataSchema) - } -} - -/** - * This reads the avro files from the given path and return the RDD[Row] - */ -object AvroReader { - - def readAvro(spark: org.apache.spark.sql.SparkSession, deltaPath: String): RDD[Row] = { - spark.sparkContext - .hadoopConfiguration - .set("avro.mapred.ignore.inputs.without.extension", "false") - spark.read.avro(deltaPath).rdd - } -} diff --git a/integration/spark/src/main/spark2.3/org/apache/spark/sql/CarbonBoundReference.scala b/integration/spark/src/main/spark2.3/org/apache/spark/sql/CarbonBoundReference.scala deleted file mode 100644 index 3b185c3f783..00000000000 --- a/integration/spark/src/main/spark2.3/org/apache/spark/sql/CarbonBoundReference.scala +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql - -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.{Attribute, ExprId, LeafExpression, NamedExpression} -import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback -import org.apache.spark.sql.types.DataType - -import org.apache.carbondata.core.scan.expression.ColumnExpression - -case class CarbonBoundReference(colExp: ColumnExpression, dataType: DataType, nullable: Boolean) - extends LeafExpression with NamedExpression with CodegenFallback { - - type EvaluatedType = Any - - override def toString: String = s"input[" + colExp.getColIndex + "]" - - override def eval(input: InternalRow): Any = input.get(colExp.getColIndex, dataType) - - override def name: String = colExp.getColumnName - - override def toAttribute: Attribute = throw new UnsupportedOperationException - - override def exprId: ExprId = throw new UnsupportedOperationException - - override def qualifier: Option[String] = null - - override def newInstance(): NamedExpression = throw new UnsupportedOperationException -} diff --git a/integration/spark/src/main/spark2.3/org/apache/spark/sql/CarbonToSparkAdapter.scala b/integration/spark/src/main/spark2.3/org/apache/spark/sql/CarbonToSparkAdapter.scala deleted file mode 100644 index 2c3483bdf53..00000000000 --- a/integration/spark/src/main/spark2.3/org/apache/spark/sql/CarbonToSparkAdapter.scala +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql - -import java.net.URI - -import scala.collection.mutable.ArrayBuffer - -import org.apache.spark.SparkContext -import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd} -import org.apache.spark.sql.carbondata.execution.datasources.CarbonFileIndexReplaceRule -import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, SessionCatalog} -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, Expression, ExprId, NamedExpression, ScalaUDF, SubqueryExpression} -import org.apache.spark.sql.catalyst.expressions.codegen.ExprCode -import org.apache.spark.sql.catalyst.optimizer.Optimizer -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} -import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile} -import org.apache.spark.sql.hive.HiveExternalCatalog -import org.apache.spark.sql.optimizer.{CarbonIUDRule, CarbonUDFTransformRule, MVRewriteRule} -import org.apache.spark.sql.secondaryindex.optimizer.CarbonSITransformationRule -import org.apache.spark.sql.types.{DataType, Metadata, StringType} - -import org.apache.carbondata.core.util.ThreadLocalSessionInfo -import org.apache.carbondata.geo.{InPolygonJoinUDF, ToRangeListAsStringUDF} - -object CarbonToSparkAdapter extends SparkVersionAdapter { - - def createFilePartition(index: Int, files: ArrayBuffer[PartitionedFile]): FilePartition = { - FilePartition(index, files.toArray.toSeq) - } - - def addSparkSessionListener(sparkSession: SparkSession): Unit = { - sparkSession.sparkContext.addSparkListener(new SparkListener { - override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = { - CarbonEnv.carbonEnvMap.remove(sparkSession) - ThreadLocalSessionInfo.unsetAll() - } - }) - } - - def addSparkListener(sparkContext: SparkContext): Unit = { - sparkContext.addSparkListener(new SparkListener { - override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = { - SparkSession.setDefaultSession(null) - } - }) - } - - def lowerCaseAttribute(expression: Expression): Expression = expression.transform { - case attr: AttributeReference => - CarbonToSparkAdapter.createAttributeReference( - attr.name.toLowerCase, - attr.dataType, - attr.nullable, - attr.metadata, - attr.exprId, - attr.qualifier) - } - - def createAttributeReference(name: String, dataType: DataType, nullable: Boolean, - metadata: Metadata, exprId: ExprId, qualifier: Option[String], - attrRef : NamedExpression = null): AttributeReference = { - AttributeReference( - name, - dataType, - nullable, - metadata)(exprId, qualifier) - } - - def createAttributeReference(attr: AttributeReference, - attrName: String, - newSubsume: String): AttributeReference = { - AttributeReference(attrName, attr.dataType)( - exprId = attr.exprId, - qualifier = Some(newSubsume)) - } - - def getTheLastQualifier(attribute: Attribute): String = { - attribute.qualifier.head - } - - def getOutput(subQueryAlias: SubqueryAlias): Seq[Attribute] = { - subQueryAlias.output - } - - def createScalaUDF(s: ScalaUDF, reference: AttributeReference): ScalaUDF = { - ScalaUDF(s.function, s.dataType, Seq(reference), s.inputTypes) - } - - def createRangeListScalaUDF(toRangeListUDF: ToRangeListAsStringUDF, - dataType: StringType.type, - children: Seq[Expression], - inputTypes: Seq[DataType]): ScalaUDF = { - ScalaUDF(toRangeListUDF, - dataType, - children, - inputTypes, - Some("ToRangeListAsString")) - } - - def getTransformedPolygonJoinUdf(scalaUdf: ScalaUDF, - udfChildren: Seq[Expression], - polygonJoinUdf: InPolygonJoinUDF): ScalaUDF = { - ScalaUDF(polygonJoinUdf, - scalaUdf.dataType, - udfChildren, - scalaUdf.inputTypes :+ scalaUdf.inputTypes.head, - scalaUdf.udfName, - scalaUdf.nullable, - scalaUdf.udfDeterministic) - } - - def createExprCode(code: String, isNull: String, value: String, dataType: DataType = null - ): ExprCode = { - ExprCode(code, isNull, value) - } - - def createAliasRef(child: Expression, - name: String, - exprId: ExprId = NamedExpression.newExprId, - qualifier: Option[String] = None, - explicitMetadata: Option[Metadata] = None) : Alias = { - - Alias(child, name)(exprId, qualifier, explicitMetadata) - } - - // Create the aliases using two plan outputs mappings. - def createAliases(mappings: Seq[(NamedExpression, NamedExpression)]): Seq[NamedExpression] = { - mappings.map{ case (o1, o2) => - o2 match { - case al: Alias if o1.name == o2.name && o1.exprId != o2.exprId => - Alias(al.child, o1.name)(exprId = o1.exprId) - case other => - if (o1.name != o2.name || o1.exprId != o2.exprId) { - Alias(o2, o1.name)(exprId = o1.exprId) - } else { - o2 - } - } - } - } - - /** - * As a part of SPARK-24085 Hive tables supports scala subquery for - * the partitioned tables,so Carbon also needs to supports - * @param partitionSet - * @param filterPredicates - * @return - */ - def getPartitionFilter( - partitionSet: AttributeSet, - filterPredicates: Seq[Expression]): Seq[Expression] = { - filterPredicates - .filterNot(SubqueryExpression.hasSubquery) - .filter { filter => - filter.references.nonEmpty && filter.references.subsetOf(partitionSet) - } - } - - def getDataFilter(partitionSet: AttributeSet, - filter: Seq[Expression], - partitionFilter: Seq[Expression]): Seq[Expression] = { - filter - } - - // As per SPARK-22520 OptimizeCodegen is removed in 2.3.1 - def getOptimizeCodegenRule(): Seq[Rule[LogicalPlan]] = { - Seq.empty - } - - def getUpdatedStorageFormat(storageFormat: CatalogStorageFormat, - map: Map[String, String], - tablePath: String): CatalogStorageFormat = { - storageFormat.copy(properties = map, locationUri = Some(new URI(tablePath))) - } - - def getHiveExternalCatalog(sparkSession: SparkSession): HiveExternalCatalog = { - sparkSession.sessionState.catalog.externalCatalog.asInstanceOf[HiveExternalCatalog] - } -} - -class CarbonOptimizer( - session: SparkSession, - catalog: SessionCatalog, - optimizer: Optimizer) extends Optimizer(catalog) { - - private lazy val mvRules = Seq(Batch("Materialized View Optimizers", Once, - Seq(new MVRewriteRule(session)): _*)) - - private lazy val iudRule = Batch("IUD Optimizers", fixedPoint, - Seq(new CarbonIUDRule(), new CarbonUDFTransformRule(), new CarbonFileIndexReplaceRule()): _*) - - private lazy val secondaryIndexRule = Batch("SI Optimizers", Once, - Seq(new CarbonSITransformationRule(session)): _*) - - override def batches: Seq[Batch] = { - mvRules ++ convertedBatch() :+ iudRule :+ secondaryIndexRule - } - - def convertedBatch(): Seq[Batch] = { - optimizer.batches.map { batch => - Batch( - batch.name, - batch.strategy match { - case optimizer.Once => - Once - case _: optimizer.FixedPoint => - fixedPoint - }, - batch.rules: _* - ) - } - } -} diff --git a/integration/spark/src/main/spark2.3/org/apache/spark/sql/SparkSqlAdapter.scala b/integration/spark/src/main/spark2.3/org/apache/spark/sql/SparkSqlAdapter.scala deleted file mode 100644 index eb3c5fdc834..00000000000 --- a/integration/spark/src/main/spark2.3/org/apache/spark/sql/SparkSqlAdapter.scala +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql - -import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd} -import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} -import org.apache.spark.sql.execution.FileSourceScanExec -import org.apache.spark.sql.execution.datasources.HadoopFsRelation -import org.apache.spark.sql.types.StructType - -import org.apache.carbondata.core.util.ThreadLocalSessionInfo - -object SparkSqlAdapter { - - def initSparkSQL(): Unit = { - } - - def getScanForSegments( - @transient relation: HadoopFsRelation, - output: Seq[Attribute], - outputSchema: StructType, - partitionFilters: Seq[Expression], - dataFilters: Seq[Expression], - tableIdentifier: Option[TableIdentifier] - ): FileSourceScanExec = { - FileSourceScanExec( - relation, - output, - outputSchema, - partitionFilters, - dataFilters, - tableIdentifier) - } -} diff --git a/integration/spark/src/main/spark2.3/org/apache/spark/sql/avro/AvroFileFormatFactory.scala b/integration/spark/src/main/spark2.3/org/apache/spark/sql/avro/AvroFileFormatFactory.scala deleted file mode 100644 index 616f052bca8..00000000000 --- a/integration/spark/src/main/spark2.3/org/apache/spark/sql/avro/AvroFileFormatFactory.scala +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.avro - -import com.databricks.spark.avro.{AvroReader, AvroWriter} -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.Row -import org.apache.spark.sql.execution.datasources.OutputWriterFactory - -object AvroFileFormatFactory { - - /** - * return the avro writer to write the avro files - * @return avro writer - */ - def getAvroWriter(spark: org.apache.spark.sql.SparkSession, - job: org.apache.hadoop.mapreduce.Job, - dataSchema: org.apache.spark.sql.types.StructType, - options: scala.Predef.Map[scala.Predef.String, scala.Predef.String] = Map.empty) - : OutputWriterFactory = { - AvroWriter.getWriter(spark, job, dataSchema, options) - } - - /** - * Reads the avro files present at the given path - * @param deltaPath path to read the avro files from. - * @return RDD[Row] - */ - def readAvro(spark: org.apache.spark.sql.SparkSession, deltaPath: String): RDD[Row] = { - spark.sparkContext - .hadoopConfiguration - .set("avro.mapred.ignore.inputs.without.extension", "false") - AvroReader.readAvro(spark, deltaPath) - } -} diff --git a/integration/spark/src/main/spark2.3/org/apache/spark/sql/execution/CreateDataSourceTableCommand.scala b/integration/spark/src/main/spark2.3/org/apache/spark/sql/execution/CreateDataSourceTableCommand.scala deleted file mode 100644 index 9a54f8a5886..00000000000 --- a/integration/spark/src/main/spark2.3/org/apache/spark/sql/execution/CreateDataSourceTableCommand.scala +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution - -import org.apache.spark.sql.{Row, SparkSession} -import org.apache.spark.sql.catalyst.catalog.CatalogTable - -object CreateDataSourceTableCommand { - def createDataSource(catalogTable: CatalogTable, - ignoreIfExists: Boolean, - sparkSession: SparkSession): Seq[Row] = { - org.apache.spark.sql.execution.command. - CreateDataSourceTableCommand(catalogTable, ignoreIfExists).run(sparkSession) - } -} diff --git a/integration/spark/src/main/spark2.3/org/apache/spark/sql/hive/CarbonSessionStateBuilder.scala b/integration/spark/src/main/spark2.3/org/apache/spark/sql/hive/CarbonSessionStateBuilder.scala deleted file mode 100644 index b08f1060814..00000000000 --- a/integration/spark/src/main/spark2.3/org/apache/spark/sql/hive/CarbonSessionStateBuilder.scala +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.hive - -import java.util.concurrent.Callable - -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.Path -import org.apache.spark.sql.{CarbonEnv, SparkSession} -import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry} -import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTablePartition, FunctionResourceLoader, GlobalTempViewManager} -import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.catalyst.parser.ParserInterface -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.execution.strategy.{CarbonSourceStrategy, DDLStrategy, DMLStrategy, StreamingTableStrategy} -import org.apache.spark.sql.hive.client.HiveClient -import org.apache.spark.sql.internal.{SessionState, SQLConf} -import org.apache.spark.sql.optimizer.{CarbonIUDRule, CarbonUDFTransformRule} -import org.apache.spark.sql.parser.CarbonSparkSqlParser - -import org.apache.carbondata.core.metadata.schema.table.CarbonTable - -/** - * This class will have carbon catalog and refresh the relation from cache if the carbon table in - * carbon catalog is not same as cached carbon relation's carbon table - * - * @param externalCatalog - * @param globalTempViewManager - * @param sparkSession - * @param functionResourceLoader - * @param functionRegistry - * @param conf - * @param hadoopConf - */ -class CarbonHiveSessionCatalog( - externalCatalog: HiveExternalCatalog, - globalTempViewManager: GlobalTempViewManager, - functionRegistry: FunctionRegistry, - sparkSession: SparkSession, - conf: SQLConf, - hadoopConf: Configuration, - parser: ParserInterface, - functionResourceLoader: FunctionResourceLoader) - extends HiveSessionCatalog ( - externalCatalog, - globalTempViewManager, - new HiveMetastoreCatalog(sparkSession), - functionRegistry, - conf, - hadoopConf, - parser, - functionResourceLoader - ) with CarbonSessionCatalog { - - private lazy val carbonEnv = { - val env = new CarbonEnv - env.init(sparkSession) - env - } - - /** - * return's the carbonEnv instance - * @return - */ - override def getCarbonEnv() : CarbonEnv = { - carbonEnv - } - - // Initialize all listeners to the Operation bus. - CarbonEnv.init - - override def lookupRelation(name: TableIdentifier): LogicalPlan = { - var rtnRelation = super.lookupRelation(name) - val isRelationRefreshed = - CarbonSessionUtil.refreshRelationAndSetStats(rtnRelation, name)(sparkSession) - if (isRelationRefreshed) { - rtnRelation = super.lookupRelation(name) - // Reset the stats after lookup. - CarbonSessionUtil.refreshRelationAndSetStats(rtnRelation, name)(sparkSession) - } - rtnRelation - } - - override def getCachedPlan(t: QualifiedTableName, - c: Callable[LogicalPlan]): LogicalPlan = { - val plan = super.getCachedPlan(t, c) - CarbonSessionUtil.updateCachedPlan(plan) - } - - /** - * returns hive client from HiveExternalCatalog - * - * @return - */ - override def getClient(): org.apache.spark.sql.hive.client.HiveClient = { - CarbonSessionCatalogUtil.getClient(sparkSession) - } - - /** - * This is alternate way of getting partition information. It first fetches all partitions from - * hive and then apply filter instead of querying hive along with filters. - * @param partitionFilters - * @param sparkSession - * @param carbonTable - * @return - */ - override def getPartitionsAlternate(partitionFilters: Seq[Expression], - sparkSession: SparkSession, carbonTable: CarbonTable): Seq[CatalogTablePartition] = { - CarbonSessionCatalogUtil.getPartitionsAlternate(partitionFilters, sparkSession, carbonTable) - } - - /** - * Update the storage format with new location information - */ - override def updateStorageLocation( - path: Path, - storage: CatalogStorageFormat, - newTableName: String, - dbName: String): CatalogStorageFormat = { - CarbonSessionCatalogUtil.updateStorageLocation(path, storage, newTableName, dbName) - } -} - -/** - * Session state implementation to override sql parser and adding strategies - * - * @param sparkSession - */ -class CarbonSessionStateBuilder(sparkSession: SparkSession, - parentState: Option[SessionState] = None) - extends HiveSessionStateBuilder(sparkSession, parentState) { - - override lazy val sqlParser: ParserInterface = new CarbonSparkSqlParser(conf, sparkSession) - - experimentalMethods.extraStrategies = - Seq(StreamingTableStrategy, DMLStrategy, DDLStrategy, CarbonSourceStrategy) - experimentalMethods.extraOptimizations = Seq(new CarbonIUDRule, new CarbonUDFTransformRule) - - /** - * Internal catalog for managing table and database states. - */ - /** - * Create a [[CarbonSessionStateBuilder]]. - */ - override protected lazy val catalog: CarbonHiveSessionCatalog = { - val catalog = new CarbonHiveSessionCatalog( - externalCatalog, - session.sharedState.globalTempViewManager, - functionRegistry, - sparkSession, - conf, - SessionState.newHadoopConf(session.sparkContext.hadoopConfiguration, conf), - sqlParser, - resourceLoader) - parentState.foreach(_.catalog.copyStateTo(catalog)) - catalog - } - - private def externalCatalog: HiveExternalCatalog = - session.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog] - - /** - * Create a Hive aware resource loader. - */ - override protected lazy val resourceLoader: HiveSessionResourceLoader = { - val client: HiveClient = externalCatalog.client.newSession() - new HiveSessionResourceLoader(session, client) - } - - override protected def analyzer: Analyzer = - new CarbonAnalyzer(catalog, conf, sparkSession, super.analyzer) -} - diff --git a/integration/spark/src/main/spark2.4/org/apache/spark/sql/CarbonToSparkAdapter.scala b/integration/spark/src/main/spark2.4/org/apache/spark/sql/CarbonToSparkAdapter.scala deleted file mode 100644 index 89212a6d985..00000000000 --- a/integration/spark/src/main/spark2.4/org/apache/spark/sql/CarbonToSparkAdapter.scala +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql - -import java.net.URI - -import scala.collection.mutable -import scala.collection.mutable.ArrayBuffer - -import org.apache.spark.{SparkContext, TaskContext} -import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd} -import org.apache.spark.sql.carbondata.execution.datasources.CarbonFileIndexReplaceRule -import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, ExternalCatalogWithListener, SessionCatalog} -import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.codegen._ -import org.apache.spark.sql.catalyst.expressions.codegen.Block._ -import org.apache.spark.sql.catalyst.optimizer.Optimizer -import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} -import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.execution.command._ -import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile} -import org.apache.spark.sql.hive.HiveExternalCatalog -import org.apache.spark.sql.optimizer.{CarbonIUDRule, CarbonUDFTransformRule, MVRewriteRule} -import org.apache.spark.sql.secondaryindex.optimizer.CarbonSITransformationRule -import org.apache.spark.sql.types.{DataType, Metadata, StringType} - -import org.apache.carbondata.core.util.ThreadLocalSessionInfo -import org.apache.carbondata.geo.{InPolygonJoinUDF, ToRangeListAsStringUDF} - -object CarbonToSparkAdapter extends SparkVersionAdapter { - - def createFilePartition(index: Int, files: ArrayBuffer[PartitionedFile]): FilePartition = { - FilePartition(index, files.toArray) - } - - def addSparkSessionListener(sparkSession: SparkSession): Unit = { - sparkSession.sparkContext.addSparkListener(new SparkListener { - override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = { - CarbonEnv.carbonEnvMap.remove(sparkSession) - ThreadLocalSessionInfo.unsetAll() - } - }) - } - - def addSparkListener(sparkContext: SparkContext): Unit = { - sparkContext.addSparkListener(new SparkListener { - override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = { - SparkSession.setDefaultSession(null) - } - }) - } - - def createAttributeReference( - name: String, - dataType: DataType, - nullable: Boolean, - metadata: Metadata, - exprId: ExprId, - qualifier: Option[String], - attrRef : NamedExpression = null): AttributeReference = { - val qf = if (qualifier.nonEmpty) Seq(qualifier.get) else Seq.empty - AttributeReference( - name, - dataType, - nullable, - metadata)(exprId, qf) - } - - def createAttributeReference( - name: String, - dataType: DataType, - nullable: Boolean, - metadata: Metadata, - exprId: ExprId, - qualifier: Seq[String]): AttributeReference = { - AttributeReference( - name, - dataType, - nullable, - metadata)(exprId, qualifier) - } - - def lowerCaseAttribute(expression: Expression): Expression = expression.transform { - case attr: AttributeReference => - CarbonToSparkAdapter.createAttributeReference( - attr.name.toLowerCase, - attr.dataType, - attr.nullable, - attr.metadata, - attr.exprId, - attr.qualifier) - } - - def createAttributeReference(attr: AttributeReference, - attrName: String, - newSubsume: String): AttributeReference = { - AttributeReference(attrName, attr.dataType)( - exprId = attr.exprId, - qualifier = newSubsume.split("\n").map(_.trim)) - } - - def createScalaUDF(s: ScalaUDF, reference: AttributeReference): ScalaUDF = { - ScalaUDF(s.function, s.dataType, Seq(reference), s.inputsNullSafe, s.inputTypes) - } - - def createRangeListScalaUDF(toRangeListUDF: ToRangeListAsStringUDF, - dataType: StringType.type, - children: Seq[Expression], - inputTypes: Seq[DataType]): ScalaUDF = { - val inputsNullSafe: Seq[Boolean] = Seq(true, true, true) - ScalaUDF(toRangeListUDF, - dataType, - children, - inputsNullSafe, - inputTypes, - Some("ToRangeListAsString")) - } - - def getTransformedPolygonJoinUdf(scalaUdf: ScalaUDF, - udfChildren: Seq[Expression], - polygonJoinUdf: InPolygonJoinUDF): ScalaUDF = { - ScalaUDF(polygonJoinUdf, - scalaUdf.dataType, - udfChildren, - scalaUdf.inputsNullSafe, - scalaUdf.inputTypes :+ scalaUdf.inputTypes.head, - scalaUdf.udfName) - } - - def createExprCode(code: String, isNull: String, value: String, dataType: DataType): ExprCode = { - ExprCode( - code"$code", - JavaCode.isNullVariable(isNull), - JavaCode.variable(value, dataType)) - } - - def createAliasRef( - child: Expression, - name: String, - exprId: ExprId = NamedExpression.newExprId, - qualifier: Seq[String] = Seq.empty, - explicitMetadata: Option[Metadata] = None) : Alias = { - Alias(child, name)(exprId, qualifier, explicitMetadata) - } - - def createAliasRef( - child: Expression, - name: String, - exprId: ExprId, - qualifier: Option[String]) : Alias = { - Alias(child, name)(exprId, - if (qualifier.isEmpty) Seq.empty else Seq(qualifier.get), - None) - } - - // Create the aliases using two plan outputs mappings. - def createAliases(mappings: Seq[(NamedExpression, NamedExpression)]): Seq[NamedExpression] = { - mappings.map{ case (o1, o2) => - o2 match { - case al: Alias if o1.name == o2.name && o1.exprId != o2.exprId => - Alias(al.child, o1.name)(exprId = o1.exprId) - case other => - if (o1.name != o2.name || o1.exprId != o2.exprId) { - Alias(o2, o1.name)(exprId = o1.exprId) - } else { - o2 - } - } - } - } - - def getTheLastQualifier(attribute: Attribute): String = { - attribute.qualifier.reverse.head - } - - - /** - * As a part of SPARK-24085 Hive tables supports scala subquery for - * the partitioned tables,so Carbon also needs to supports - * @param partitionSet - * @param filterPredicates - * @return - */ - def getPartitionFilter( - partitionSet: AttributeSet, - filterPredicates: Seq[Expression]): Seq[Expression] = { - filterPredicates - .filterNot(SubqueryExpression.hasSubquery) - .filter { filter => - filter.references.nonEmpty && filter.references.subsetOf(partitionSet) - } - } - - def getDataFilter(partitionSet: AttributeSet, - filter: Seq[Expression], - partitionFilter: Seq[Expression]): Seq[Expression] = { - filter - } - - // As per SPARK-22520 OptimizeCodegen is removed in 2.3.1 - def getOptimizeCodegenRule(): Seq[Rule[LogicalPlan]] = { - Seq.empty - } - - def getUpdatedStorageFormat(storageFormat: CatalogStorageFormat, - map: Map[String, String], - tablePath: String): CatalogStorageFormat = { - storageFormat.copy(properties = map, locationUri = Some(new URI(tablePath))) - } - - def getOutput(subQueryAlias: SubqueryAlias): Seq[Attribute] = { - val newAlias = Seq(subQueryAlias.name.identifier) - subQueryAlias.child.output.map(_.withQualifier(newAlias)) - } - - def getHiveExternalCatalog(sparkSession: SparkSession): HiveExternalCatalog = { - sparkSession.sessionState.catalog.externalCatalog - .asInstanceOf[ExternalCatalogWithListener] - .unwrapped - .asInstanceOf[HiveExternalCatalog] - } -} - -class CarbonOptimizer( - session: SparkSession, - catalog: SessionCatalog, - optimizer: Optimizer) extends Optimizer(catalog) { - - private lazy val mvRules = Seq(Batch("Materialized View Optimizers", Once, - Seq(new MVRewriteRule(session)): _*)) - - private lazy val iudRule = Batch("IUD Optimizers", fixedPoint, - Seq(new CarbonIUDRule(), new CarbonUDFTransformRule(), new CarbonFileIndexReplaceRule()): _*) - - private lazy val secondaryIndexRule = Batch("SI Optimizers", Once, - Seq(new CarbonSITransformationRule(session)): _*) - - override def defaultBatches: Seq[Batch] = { - mvRules ++ convertedBatch() :+ iudRule :+ secondaryIndexRule - } - - def convertedBatch(): Seq[Batch] = { - optimizer.batches.map { batch => - Batch( - batch.name, - batch.strategy match { - case optimizer.Once => - Once - case _: optimizer.FixedPoint => - fixedPoint - }, - batch.rules: _* - ) - } - } -} diff --git a/integration/spark/src/main/spark2.4/org/apache/spark/sql/hive/CarbonSessionStateBuilder.scala b/integration/spark/src/main/spark2.4/org/apache/spark/sql/hive/CarbonSessionStateBuilder.scala deleted file mode 100644 index bace849cc37..00000000000 --- a/integration/spark/src/main/spark2.4/org/apache/spark/sql/hive/CarbonSessionStateBuilder.scala +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.hive - -import java.util.concurrent.Callable - -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.Path -import org.apache.spark.sql.{CarbonEnv, SparkSession} -import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry} -import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTablePartition, ExternalCatalogWithListener, FunctionResourceLoader, GlobalTempViewManager} -import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.catalyst.parser.ParserInterface -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.execution.strategy.{CarbonSourceStrategy, DDLStrategy, DMLStrategy, StreamingTableStrategy} -import org.apache.spark.sql.hive.client.HiveClient -import org.apache.spark.sql.internal.{SessionState, SQLConf} -import org.apache.spark.sql.optimizer.{CarbonIUDRule, CarbonUDFTransformRule} -import org.apache.spark.sql.parser.CarbonSparkSqlParser - -import org.apache.carbondata.core.metadata.schema.table.CarbonTable - -/** - * This class will have carbon catalog and refresh the relation from cache if the carbontable in - * carbon catalog is not same as cached carbon relation's carbon table - * - * @param externalCatalog - * @param globalTempViewManager - * @param sparkSession - * @param functionResourceLoader - * @param functionRegistry - * @param conf - * @param hadoopConf - */ -class CarbonHiveSessionCatalog( - externalCatalog: HiveExternalCatalog, - globalTempViewManager: GlobalTempViewManager, - functionRegistry: FunctionRegistry, - sparkSession: SparkSession, - conf: SQLConf, - hadoopConf: Configuration, - parser: ParserInterface, - functionResourceLoader: FunctionResourceLoader) - extends HiveSessionCatalog ( - () => externalCatalog, - () => globalTempViewManager, - new HiveMetastoreCatalog(sparkSession), - functionRegistry, - conf, - hadoopConf, - parser, - functionResourceLoader - ) with CarbonSessionCatalog { - - private lazy val carbonEnv = { - val env = new CarbonEnv - env.init(sparkSession) - env - } - - /** - * return's the carbonEnv instance - * @return - */ - override def getCarbonEnv() : CarbonEnv = { - carbonEnv - } - - // Initialize all listeners to the Operation bus. - CarbonEnv.init - - override def lookupRelation(name: TableIdentifier): LogicalPlan = { - var rtnRelation = super.lookupRelation(name) - val isRelationRefreshed = - CarbonSessionUtil.refreshRelationAndSetStats(rtnRelation, name)(sparkSession) - if (isRelationRefreshed) { - rtnRelation = super.lookupRelation(name) - // Reset the stats after lookup. - CarbonSessionUtil.refreshRelationAndSetStats(rtnRelation, name)(sparkSession) - } - rtnRelation - } - - override def getCachedPlan(t: QualifiedTableName, - c: Callable[LogicalPlan]): LogicalPlan = { - val plan = super.getCachedPlan(t, c) - CarbonSessionUtil.updateCachedPlan(plan) - } - - /** - * returns hive client from HiveExternalCatalog - * - * @return - */ - override def getClient(): org.apache.spark.sql.hive.client.HiveClient = { - CarbonSessionCatalogUtil.getClient(sparkSession) - } - - /** - * This is alternate way of getting partition information. It first fetches all partitions from - * hive and then apply filter instead of querying hive along with filters. - * @param partitionFilters - * @param sparkSession - * @param carbonTable - * @return - */ - override def getPartitionsAlternate(partitionFilters: Seq[Expression], - sparkSession: SparkSession, carbonTable: CarbonTable): Seq[CatalogTablePartition] = { - CarbonSessionCatalogUtil.getPartitionsAlternate(partitionFilters, sparkSession, carbonTable) - } - - /** - * Update the storageformat with new location information - */ - override def updateStorageLocation( - path: Path, - storage: CatalogStorageFormat, - newTableName: String, - dbName: String): CatalogStorageFormat = { - CarbonSessionCatalogUtil.updateStorageLocation(path, storage, newTableName, dbName) - } -} - -/** - * Session state implementation to override sql parser and adding strategies - * - * @param sparkSession - */ -class CarbonSessionStateBuilder(sparkSession: SparkSession, - parentState: Option[SessionState] = None) - extends HiveSessionStateBuilder(sparkSession, parentState) { - - override lazy val sqlParser: ParserInterface = new CarbonSparkSqlParser(conf, sparkSession) - - experimentalMethods.extraStrategies = - Seq(StreamingTableStrategy, DMLStrategy, DDLStrategy, CarbonSourceStrategy) - experimentalMethods.extraOptimizations = Seq(new CarbonIUDRule, - new CarbonUDFTransformRule) - - /** - * Internal catalog for managing table and database states. - */ - /** - * Create a [[CarbonSessionStateBuilder]]. - */ - override protected lazy val catalog: CarbonHiveSessionCatalog = { - val catalog = new CarbonHiveSessionCatalog( - externalCatalog, - session.sharedState.globalTempViewManager, - functionRegistry, - sparkSession, - conf, - SessionState.newHadoopConf(session.sparkContext.hadoopConfiguration, conf), - sqlParser, - resourceLoader) - parentState.foreach(_.catalog.copyStateTo(catalog)) - catalog - } - - private def externalCatalog: HiveExternalCatalog = - session - .sharedState - .externalCatalog - .asInstanceOf[ExternalCatalogWithListener] - .unwrapped - .asInstanceOf[HiveExternalCatalog] - - /** - * Create a Hive aware resource loader. - */ - override protected lazy val resourceLoader: HiveSessionResourceLoader = { - val client: HiveClient = externalCatalog.client.newSession() - new HiveSessionResourceLoader(session, () => client) - } - - override protected def analyzer: Analyzer = { - new CarbonAnalyzer(catalog, - conf, - sparkSession, - super.analyzer) - } -} diff --git a/integration/spark/src/main/spark3.1/org/apache/spark/sql/SparkSqlAdapter.scala b/integration/spark/src/main/spark3.1/org/apache/spark/sql/SparkSqlAdapter.scala deleted file mode 100644 index 8be562322f6..00000000000 --- a/integration/spark/src/main/spark3.1/org/apache/spark/sql/SparkSqlAdapter.scala +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql - -import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.expressions.{Attribute, EmptyRow, Expression} -import org.apache.spark.sql.execution.FileSourceScanExec -import org.apache.spark.sql.execution.datasources.HadoopFsRelation -import org.apache.spark.sql.types.StructType - -object SparkSqlAdapter { - - def initSparkSQL(): Unit = { - } - - def getScanForSegments( - @transient relation: HadoopFsRelation, - output: Seq[Attribute], - outputSchema: StructType, - partitionFilters: Seq[Expression], - dataFilters: Seq[Expression], - tableIdentifier: Option[TableIdentifier] - ): FileSourceScanExec = { - FileSourceScanExec( - relation, - output, - outputSchema, - partitionFilters, - None, - None, - dataFilters, - tableIdentifier) - } -} diff --git a/integration/spark/src/main/spark3.1/org/apache/spark/sql/parser/SparkSqlAstBuilderWrapper.scala b/integration/spark/src/main/spark3.1/org/apache/spark/sql/parser/SparkSqlAstBuilderWrapper.scala deleted file mode 100644 index 7a576b16d8a..00000000000 --- a/integration/spark/src/main/spark3.1/org/apache/spark/sql/parser/SparkSqlAstBuilderWrapper.scala +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.parser - -import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ -import org.apache.spark.sql.execution.SparkSqlAstBuilder -import org.apache.spark.sql.internal.SQLConf - -/** - * use this wrapper to adapter multiple spark versions - */ -abstract class SparkSqlAstBuilderWrapper(conf: SQLConf) extends SparkSqlAstBuilder { - - def visitPropertyKeyValues(ctx: TablePropertyListContext): Map[String, String] -} diff --git a/integration/spark/src/resources/META-INF/services/org.apache.spark.sql.test.TestQueryExecutorRegister b/integration/spark/src/resources/META-INF/services/org.apache.spark.sql.test.TestQueryExecutorRegister deleted file mode 100644 index fc96db41098..00000000000 --- a/integration/spark/src/resources/META-INF/services/org.apache.spark.sql.test.TestQueryExecutorRegister +++ /dev/null @@ -1,17 +0,0 @@ -## ------------------------------------------------------------------------ -## Licensed to the Apache Software Foundation (ASF) under one or more -## contributor license agreements. See the NOTICE file distributed with -## this work for additional information regarding copyright ownership. -## The ASF licenses this file to You under the Apache License, Version 2.0 -## (the "License"); you may not use this file except in compliance with -## the License. You may obtain a copy of the License at -## -## http://www.apache.org/licenses/LICENSE-2.0 -## -## Unless required by applicable law or agreed to in writing, software -## distributed under the License is distributed on an "AS IS" BASIS, -## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -## See the License for the specific language governing permissions and -## limitations under the License. -## ------------------------------------------------------------------------ -org.apache.spark.sql.test.SparkTestQueryExecutor \ No newline at end of file diff --git a/integration/spark/src/test/resources/datawithoutheader.csv b/integration/spark/src/test/resources/datawithoutheader.csv deleted file mode 100644 index 762ec849168..00000000000 --- a/integration/spark/src/test/resources/datawithoutheader.csv +++ /dev/null @@ -1,10 +0,0 @@ -11,arvind,SE,17-01-2007,1,developer,10,network,928478,17-02-2007,29-11-2016,96,96,5040 -12,krithin,SSE,29-05-2008,1,developer,11,protocol,928378,29-06-2008,30-12-2016,85,95,7124 -13,madhan,TPL,07-07-2009,2,tester,10,network,928478,07-08-2009,30-12-2016,88,99,9054 -14,anandh,SA,29-12-2010,3,manager,11,protocol,928278,29-01-2011,29-06-2016,77,92,11248 -15,ayushi,SSA,09-11-2011,1,developer,12,security,928375,09-12-2011,29-05-2016,99,91,13245 -16,pramod,SE,14-10-2012,1,developer,13,configManagement,928478,14-11-2012,29-12-2016,86,93,5040 -17,gawrav,PL,22-09-2013,2,tester,12,security,928778,22-10-2013,15-11-2016,78,97,9574 -18,sibi,TL,15-08-2014,2,tester,14,Learning,928176,15-09-2014,29-05-2016,84,98,7245 -19,shivani,PL,12-05-2015,1,developer,10,network,928977,12-06-2015,12-11-2016,88,91,11254 -20,bill,PM,01-12-2015,3,manager,14,Learning,928479,01-01-2016,30-11-2016,75,94,13547 diff --git a/integration/spark/src/test/scala/org/apache/carbondata/index/lucene/LuceneFineGrainIndexSuite.scala b/integration/spark/src/test/scala/org/apache/carbondata/index/lucene/LuceneFineGrainIndexSuite.scala index 8200563f26a..86c580c50a3 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/index/lucene/LuceneFineGrainIndexSuite.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/index/lucene/LuceneFineGrainIndexSuite.scala @@ -356,7 +356,7 @@ class LuceneFineGrainIndexSuite extends QueryTest with BeforeAndAfterAll { sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE index_test_table OPTIONS('header'='false')") sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE index_test_table OPTIONS('header'='false')") sql("alter table index_test_table compact 'major'") - if (!sqlContext.sparkContext.version.startsWith("3.1")) { + if (!sqlContext.sparkContext.version.startsWith("3.5")) { checkAnswer(sql("SELECT COUNT(*) FROM index_test_table WHERE TEXT_MATCH('name:n10')"), sql("select COUNT(*) from index_test_table where name='n10'")) } @@ -384,7 +384,7 @@ class LuceneFineGrainIndexSuite extends QueryTest with BeforeAndAfterAll { sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE index_test_table OPTIONS('header'='false')") sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE index_test_table OPTIONS('header'='false')") sql("alter table index_test_table compact 'minor'") - if (!sqlContext.sparkContext.version.startsWith("3.1")) { + if (!sqlContext.sparkContext.version.startsWith("3.5")) { checkAnswer(sql("SELECT COUNT(*) FROM index_test_table WHERE TEXT_MATCH('name:n10')"), sql("select count(*) from index_test_table where name='n10'")) } @@ -433,7 +433,7 @@ class LuceneFineGrainIndexSuite extends QueryTest with BeforeAndAfterAll { """.stripMargin) sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE index_test_table OPTIONS('header'='false')") sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE index_test_table OPTIONS('header'='false')") - if (!sqlContext.sparkContext.version.startsWith("3.1")) { + if (!sqlContext.sparkContext.version.startsWith("3.5")) { checkAnswer(sql("SELECT count(*) FROM index_test_table WHERE TEXT_MATCH('name:n99*')"), sql("select count(*) from index_test_table where name like 'n99%'")) sql("delete from table index_test_table where SEGMENT.ID in (0) ") @@ -496,7 +496,7 @@ class LuceneFineGrainIndexSuite extends QueryTest with BeforeAndAfterAll { | AS | Select * from source_table where TEXT_MATCH('name:n1*') """.stripMargin) - if (!sqlContext.sparkContext.version.startsWith("3.1")) { + if (!sqlContext.sparkContext.version.startsWith("3.5")) { checkAnswer(sql("SELECT count(*) FROM target_table"), sql("select count(*) from source_table where name like 'n1%'")) } @@ -520,7 +520,7 @@ class LuceneFineGrainIndexSuite extends QueryTest with BeforeAndAfterAll { """.stripMargin) sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE index_test_limit OPTIONS('header'='false')") - if (!sqlContext.sparkContext.version.startsWith("3.1")) { + if (!sqlContext.sparkContext.version.startsWith("3.5")) { checkAnswer(sql( "select count(*) from index_test_limit where TEXT_MATCH_WITH_LIMIT('name:n10*',10)"), Seq(Row(10))) diff --git a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/binary/TestBinaryDataType.scala b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/binary/TestBinaryDataType.scala index 52b1b0b2063..7c1a4d4eef4 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/binary/TestBinaryDataType.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/binary/TestBinaryDataType.scala @@ -702,7 +702,7 @@ class TestBinaryDataType extends QueryTest with BeforeAndAfterAll { val base64CarbonResult = sql("SELECT base64(binaryField) FROM carbontable") checkAnswer(base64HiveResult, base64CarbonResult) base64CarbonResult.collect().foreach { each => - val result = new String(Base64.decodeBase64((each.getAs[Array[Char]](0)).toString)) + val result = new String(Base64.decodeBase64(each.getAs[Array[Byte]](0))) assert("\u0001history\u0002".equals(result) || "\u0001biology\u0002".equals(result) || "\u0001education\u0002".equals(result)) diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/cleanfiles/TestCleanFileCommand.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/cleanfiles/TestCleanFileCommand.scala index 1694bb2c7b8..9283c5ccb7d 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/cleanfiles/TestCleanFileCommand.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/cleanfiles/TestCleanFileCommand.scala @@ -21,7 +21,7 @@ import java.io.{File, PrintWriter} import scala.io.Source -import org.apache.commons.lang.StringUtils +import org.apache.commons.lang3.StringUtils import org.apache.spark.sql.{CarbonEnv, Row} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.test.util.QueryTest diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala index 1adb13d6cb6..907d38993ea 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala @@ -33,7 +33,7 @@ import org.apache.avro.file.DataFileWriter import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DecoderFactory, Encoder} import org.apache.commons.io.FileUtils -import org.apache.commons.lang.RandomStringUtils +import org.apache.commons.lang3.RandomStringUtils import org.apache.spark.sql.{AnalysisException, CarbonEnv, Row} import org.apache.spark.sql.test.util.QueryTest import org.junit.Assert diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/MajorCompactionWithMeasureSortColumns.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/MajorCompactionWithMeasureSortColumns.scala index 80952a14945..14a7d609e13 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/MajorCompactionWithMeasureSortColumns.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/MajorCompactionWithMeasureSortColumns.scala @@ -93,7 +93,7 @@ class MajorCompactionWithMeasureSortColumns extends QueryTest with BeforeAndAfte val answer = sql("select * from store ").orderBy("code1") assert(csvRows.count() == answer.distinct().count()) - if (!sqlContext.sparkContext.version.startsWith("3.1")) { + if (!sqlContext.sparkContext.version.startsWith("3.5")) { checkAnswer(answer.distinct(), Seq(Row("51job, Inc.", "21695-534", "FR", 610, 60, Date.valueOf("2017-11-27"), 4483, 0, 510), Row("Intercontinental Exchange Inc.", "22100-020", "TH", 87, 4, diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala index 216bc447789..b9179d31efb 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala @@ -533,25 +533,21 @@ test("Creation of partition table should fail if the colname in table schema and sql("drop table if exists onlyPart") } - if (CarbonProperties.getInstance() - .getProperty(CarbonCommonConstants.CARBON_SPARK_VERSION_SPARK3, - CarbonCommonConstants.CARBON_SPARK_VERSION_SPARK3_DEFAULT).toBoolean) { - test("test create partition on existing table columns") { - sql("drop table if exists partitionTable") - sql("create table partitionTable(c1 int, c2 int, v1 string, v2 string) " + - "stored as carbondata partitioned by (v2,c2)") - val descTable = sql(s"describe formatted partitionTable").collect - descTable.find(_.get(0).toString.contains("Partition Columns")) match { - case Some(row) => assert(row.get(1).toString.contains("v2:STRING, c2:INT")) - case None => assert(false) - } - sql("insert into partitionTable select 1,'sd','sd',2") - sql("alter table partitionTable add partition (v2='ke', c2=3) location 'loc1'") - checkAnswer(sql("show partitions partitionTable"), - Seq(Row("v2=sd/c2=2"), Row("v2=ke/c2=3"))) - checkAnswer(sql("select *from partitionTable"), Seq(Row(1, "sd", "sd", 2))) - sql("drop table if exists partitionTable") + test("test create partition on existing table columns") { + sql("drop table if exists partitionTable") + sql("create table partitionTable(c1 int, c2 int, v1 string, v2 string) " + + "stored as carbondata partitioned by (v2,c2)") + val descTable = sql(s"describe formatted partitionTable").collect + descTable.find(_.get(0).toString.contains("Partition Columns")) match { + case Some(row) => assert(row.get(1).toString.contains("v2:STRING, c2:INT")) + case None => assert(false) } + sql("insert into partitionTable select 1,'sd','sd',2") + sql("alter table partitionTable add partition (v2='ke', c2=3) location 'loc1'") + checkAnswer(sql("show partitions partitionTable"), + Seq(Row("v2=sd/c2=2"), Row("v2=ke/c2=3"))) + checkAnswer(sql("select *from partitionTable"), Seq(Row(1, "sd", "sd", 2))) + sql("drop table if exists partitionTable") } private def verifyPartitionInfo(frame: DataFrame, partitionNames: Seq[String]) = { diff --git a/integration/spark/src/test/scala/org/apache/carbondata/view/rewrite/TestAllOperationsOnMV.scala b/integration/spark/src/test/scala/org/apache/carbondata/view/rewrite/TestAllOperationsOnMV.scala index 20586dab88b..9907f6eee06 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/view/rewrite/TestAllOperationsOnMV.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/view/rewrite/TestAllOperationsOnMV.scala @@ -47,6 +47,8 @@ class TestAllOperationsOnMV extends QueryTest with BeforeAndAfterEach { sql("drop table IF EXISTS testtable") sql("create table testtable(name string, c_code int, price int) STORED AS carbondata") sql("insert into table testtable select 'abc',21,2000") + sql("drop table if exists dm1") + sql("drop view if exists dm1") sql("drop materialized view if exists dm1") sql("create materialized view dm1 with deferred refresh as select name,sum(price) " + "from maintable group by name") diff --git a/integration/spark/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceBinaryTest.scala b/integration/spark/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceBinaryTest.scala index 070c00623eb..6091b4c5dff 100644 --- a/integration/spark/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceBinaryTest.scala +++ b/integration/spark/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceBinaryTest.scala @@ -343,7 +343,7 @@ class SparkCarbonDataSourceBinaryTest extends QueryTest with BeforeAndAfterAll { val base64CarbonResult = sql("SELECT base64(image) FROM carbon_table") checkAnswer(base64HiveResult, base64CarbonResult) base64CarbonResult.collect().foreach { each => - val result = new String(Base64.decodeBase64((each.getAs[Array[Char]](0)).toString)) + val result = new String(Base64.decodeBase64(each.getAs[Array[Byte]](0))) assert("binary".equals(result) || "test".equals(result)) } diff --git a/integration/spark/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala b/integration/spark/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala index 909fc36ee3f..266a0c1c310 100644 --- a/integration/spark/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala +++ b/integration/spark/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala @@ -1297,7 +1297,7 @@ class SparkCarbonDataSourceTest extends QueryTest with BeforeAndAfterAll { sql("create table par (c1 string, c2 double, n int) using parquet") sql("create table car (c1 string, c2 double, n int) using carbon") - if (!sqlContext.sparkContext.version.startsWith("3.1")) { + if (!sqlContext.sparkContext.version.startsWith("3.5")) { sql("insert into par select 'a', 1.7986931348623157E308, 215565665556") sql("insert into car select 'a', 1.7986931348623157E308, 215565665556") } else { diff --git a/integration/spark/src/test/scala/org/apache/spark/sql/carbondata/datasource/TestCreateTableUsingSparkCarbonFileFormat.scala b/integration/spark/src/test/scala/org/apache/spark/sql/carbondata/datasource/TestCreateTableUsingSparkCarbonFileFormat.scala index 773798a1302..d3d2c380042 100644 --- a/integration/spark/src/test/scala/org/apache/spark/sql/carbondata/datasource/TestCreateTableUsingSparkCarbonFileFormat.scala +++ b/integration/spark/src/test/scala/org/apache/spark/sql/carbondata/datasource/TestCreateTableUsingSparkCarbonFileFormat.scala @@ -24,7 +24,7 @@ import java.util.{Date, Random} import scala.collection.JavaConverters._ import org.apache.commons.io.FileUtils -import org.apache.commons.lang.RandomStringUtils +import org.apache.commons.lang3.RandomStringUtils import org.apache.spark.sql.Row import org.apache.spark.sql.test.util.QueryTest import org.apache.spark.util.SparkUtil diff --git a/mv/plan/pom.xml b/mv/plan/pom.xml index 42353658d4d..7cd0bbf609b 100644 --- a/mv/plan/pom.xml +++ b/mv/plan/pom.xml @@ -26,7 +26,7 @@ ../../pom.xml - carbondata-mv-plan_${spark.binary.version} + carbondata-mv-plan Apache CarbonData :: Materialized View Plan @@ -108,7 +108,8 @@ com.ning.maven.plugins maven-duplicate-finder-plugin - + 1.0.9 + ` true @@ -142,130 +143,5 @@ - - - spark-2.3 - - 2.3 - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - src/main/spark3.1 - src/main/spark2.4 - - - - - org.codehaus.mojo - build-helper-maven-plugin - 3.0.0 - - - add-source - generate-sources - - add-source - - - - src/main/spark2.3 - src/main/common2.3and2.4 - - - - - - - - - - spark-2.4 - - true - - - 2.4 - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - src/main/spark3.1 - src/main/spark2.3 - - - - - org.codehaus.mojo - build-helper-maven-plugin - 3.0.0 - - - add-source - generate-sources - - add-source - - - - src/main/spark2.4 - src/main/common2.3and2.4 - - - - - - - - - - spark-3.1 - - 3.1 - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - src/main/spark2.3 - src/main/spark2.4 - src/main/common2.3and2.4 - - - - - org.codehaus.mojo - build-helper-maven-plugin - 3.0.0 - - - add-source - generate-sources - - add-source - - - - src/main/spark3.1 - - - - - - - - - diff --git a/mv/plan/src/main/common2.3and2.4/org/apache/carbondata/mv/plans/modular/SparkVersionHelper.scala b/mv/plan/src/main/common2.3and2.4/org/apache/carbondata/mv/plans/modular/SparkVersionHelper.scala deleted file mode 100644 index 78002f1198a..00000000000 --- a/mv/plan/src/main/common2.3and2.4/org/apache/carbondata/mv/plans/modular/SparkVersionHelper.scala +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.carbondata.mv.plans.modular - -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, AttributeSeq, Expression, ExprId, NamedExpression, SubqueryExpression} -import org.apache.spark.sql.catalyst.expressions.aggregate._ -import org.apache.spark.sql.catalyst.optimizer.{BooleanSimplification, CollapseProject, CollapseRepartition, CollapseWindow, ColumnPruning, CombineFilters, CombineLimits, CombineUnions, ConstantFolding, EliminateOuterJoin, EliminateSerialization, EliminateSorts, FoldablePropagation, NullPropagation, PushDownPredicate, PushPredicateThroughJoin, PushProjectionThroughUnion, RemoveDispensableExpressions, RemoveRedundantAliases, RemoveRedundantProject, ReorderAssociativeOperator, ReorderJoin, RewriteCorrelatedScalarSubquery, SimplifyBinaryComparison, SimplifyCaseConversionExpressions, SimplifyCasts, SimplifyConditionals} -import org.apache.spark.sql.catalyst.plans.{logical, JoinType, QueryPlan} -import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Join, LogicalPlan, Statistics, Subquery} -import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.types.{DataType, Metadata} - -import org.apache.carbondata.mv.plans.util.BirdcageOptimizer - - -object SparkVersionHelper { - - def getStatisticsObj(outputList: Seq[NamedExpression], - plan: LogicalPlan, stats: Statistics, - aliasMap: Option[AttributeMap[Attribute]] = None): Statistics = { - val output = outputList.map(_.toAttribute) - val mapSeq = plan.collect { case n: logical.LeafNode => n }.map { - table => AttributeMap(table.output.zip(output)) - } - val rewrites = mapSeq.head - val attributes: AttributeMap[ColumnStat] = stats.attributeStats - var attributeStats = AttributeMap(attributes.iterator - .map { pair => (rewrites(pair._1), pair._2) }.toSeq) - if (aliasMap.isDefined) { - attributeStats = AttributeMap( - attributeStats.map(pair => (aliasMap.get(pair._1), pair._2)).toSeq) - } - Statistics(stats.sizeInBytes, stats.rowCount, attributeStats, stats.hints) - } - - def getOptimizedPlan(s: SubqueryExpression): LogicalPlan = { - val Subquery(newPlan) = BirdcageOptimizer.execute(Subquery(s.plan)) - newPlan - } - - def normalizeExpressions(r: NamedExpression, attrs: AttributeSeq): NamedExpression = { - QueryPlan.normalizeExprId(r, attrs) - } - - def attributeMap(rAliasMap: AttributeMap[Attribute]) : AttributeMap[Expression] = { - rAliasMap.asInstanceOf[AttributeMap[Expression]] - } - - def seqOfRules : Seq[Rule[LogicalPlan]] = { - Seq( - // Operator push down - PushProjectionThroughUnion, - ReorderJoin, - EliminateOuterJoin, - PushPredicateThroughJoin, - PushDownPredicate, - ColumnPruning, - // Operator combine - CollapseRepartition, - CollapseProject, - CollapseWindow, - CombineFilters, - CombineLimits, - CombineUnions, - // Constant folding and strength reduction - NullPropagation, - FoldablePropagation, - ConstantFolding, - ReorderAssociativeOperator, - // No need to apply LikeSimplification rule while creating MV - // as modular plan asCompactSql will be set in schema - // LikeSimplification, - BooleanSimplification, - SimplifyConditionals, - RemoveDispensableExpressions, - SimplifyBinaryComparison, - EliminateSorts, - SimplifyCasts, - SimplifyCaseConversionExpressions, - RewriteCorrelatedScalarSubquery, - EliminateSerialization, - RemoveRedundantAliases, - RemoveRedundantProject) - } -} - -trait GetVerboseString extends LeafNode { -} - -trait GroupByUnaryNode extends UnaryNode { -} - -trait SelectModularPlan extends ModularPlan { -} - -trait UnionModularPlan extends ModularPlan { -} - -trait OneRowTableLeafNode extends LeafNode { -} - -object MatchJoin { - def unapply(plan : LogicalPlan): Option[(LogicalPlan, LogicalPlan, JoinType, Option[Expression], - Option[Any])] = { - plan match { - case j@Join(left, right, joinType, condition) => - Some(left, right, joinType, condition, None) - case _ => None - } - } -} - -object MatchAggregateExpression { - def unapply(expr : AggregateExpression): Option[(AggregateFunction, AggregateMode, Boolean, - Option[Expression], ExprId)] = { - expr match { - case j@AggregateExpression(aggregateFunction, mode, isDistinct, resultId) => - Some(aggregateFunction, mode, isDistinct, None, resultId) - case _ => None - } - } -} diff --git a/mv/plan/src/main/scala/org/apache/carbondata/mv/expressions/modular/subquery.scala b/mv/plan/src/main/scala/org/apache/carbondata/mv/expressions/modular/subquery.scala index 43551bba3c2..cfd1eab145c 100644 --- a/mv/plan/src/main/scala/org/apache/carbondata/mv/expressions/modular/subquery.scala +++ b/mv/plan/src/main/scala/org/apache/carbondata/mv/expressions/modular/subquery.scala @@ -39,16 +39,10 @@ abstract class ModularSubquery( override def withNewPlan(plan: ModularPlan): ModularSubquery - override def semanticEquals(o: Expression): Boolean = { - o match { - case p: ModularSubquery => - this.getClass.getName.equals(p.getClass.getName) && plan.sameResult(p.plan) && - children.length == p.children.length && - children.zip(p.children).forall(p => p._1.semanticEquals(p._2)) - case _ => false - } + override protected def withNewChildrenInternal( + newChildren: IndexedSeq[Expression]): Expression = { + this } - def canonicalize(attrs: AttributeSeq): ModularSubquery = { // Normalize the outer references in the subquery plan. val normalizedPlan = plan.transformAllExpressions { diff --git a/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/modular/AggregatePushDown.scala b/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/modular/AggregatePushDown.scala index cbcd4248466..d03c7c0064e 100644 --- a/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/modular/AggregatePushDown.scala +++ b/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/modular/AggregatePushDown.scala @@ -128,7 +128,7 @@ trait AggregatePushDown { // self: ModularPlan => } else { Map.empty[Int, (NamedExpression, Seq[NamedExpression])] } - case sum@MatchAggregateExpression(Sum(cast@MatchCast(expr, dataType)), _, false, _, _) => + case sum@MatchAggregateExpression(Sum(cast@MatchCast(expr), _), _, false, _, _) => val tAttr = selAliasMap.get(expr.asInstanceOf[Attribute]).getOrElse(expr) .asInstanceOf[Attribute] if (fact.outputSet.contains(tAttr)) { @@ -190,7 +190,7 @@ trait AggregatePushDown { // self: ModularPlan => } else { Map.empty[Int, (NamedExpression, Seq[NamedExpression])] } - case avg@MatchAggregateExpression(Average(cast@MatchCast(expr, dataType)), _, false, _, _) => + case avg@MatchAggregateExpression(Average(cast@MatchCast(expr, _), _), _, false, _, _) => val tAttr = selAliasMap.get(expr.asInstanceOf[Attribute]).getOrElse(expr) .asInstanceOf[Attribute] if (fact.outputSet.contains(tAttr)) { diff --git a/mv/plan/src/main/spark2.4/org/apache/carbondata/mv/plans/modular/ExpressionHelper.scala b/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/modular/ExpressionHelper.scala similarity index 85% rename from mv/plan/src/main/spark2.4/org/apache/carbondata/mv/plans/modular/ExpressionHelper.scala rename to mv/plan/src/main/scala/org/apache/carbondata/mv/plans/modular/ExpressionHelper.scala index d1c9d8ab0d0..2bd1f6b639d 100644 --- a/mv/plan/src/main/spark2.4/org/apache/carbondata/mv/plans/modular/ExpressionHelper.scala +++ b/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/modular/ExpressionHelper.scala @@ -23,13 +23,13 @@ import org.apache.spark.sql.types.{DataType, Metadata} object ExpressionHelper { def createReference( - name: String, - dataType: DataType, - nullable: Boolean, - metadata: Metadata, - exprId: ExprId, - qualifier: Option[String], - attrRef : NamedExpression = null): AttributeReference = { + name: String, + dataType: DataType, + nullable: Boolean, + metadata: Metadata, + exprId: ExprId, + qualifier: Option[String], + attrRef: NamedExpression = null): AttributeReference = { val qf = if (qualifier.nonEmpty) Seq(qualifier.get) else Seq.empty AttributeReference(name, dataType, nullable, metadata)(exprId, qf) } @@ -38,7 +38,7 @@ object ExpressionHelper { child: Expression, name: String, exprId: ExprId, - qualifier: Option[String]) : Alias = { + qualifier: Option[String]): Alias = { val qf = if (qualifier.nonEmpty) Seq(qualifier.get) else Seq.empty Alias(child, name)(exprId, qf, None) } diff --git a/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/modular/ModularPlan.scala b/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/modular/ModularPlan.scala index e39ef7c96e3..1103114c1a7 100644 --- a/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/modular/ModularPlan.scala +++ b/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/modular/ModularPlan.scala @@ -206,6 +206,11 @@ abstract class ModularPlan protected def preHarmonized: ModularPlan = { this } + + override protected def withNewChildrenInternal( + newChildren: IndexedSeq[ModularPlan]): ModularPlan = { + this + } } object ModularPlan extends PredicateHelper { diff --git a/mv/plan/src/main/spark3.1/org/apache/carbondata/mv/plans/modular/SparkVersionHelper.scala b/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/modular/SparkVersionHelper.scala similarity index 90% rename from mv/plan/src/main/spark3.1/org/apache/carbondata/mv/plans/modular/SparkVersionHelper.scala rename to mv/plan/src/main/scala/org/apache/carbondata/mv/plans/modular/SparkVersionHelper.scala index e52efc9d688..fc34c6f1630 100644 --- a/mv/plan/src/main/spark3.1/org/apache/carbondata/mv/plans/modular/SparkVersionHelper.scala +++ b/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/modular/SparkVersionHelper.scala @@ -21,7 +21,7 @@ import scala.reflect.ClassTag import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeSeq, Expression, ExprId, NamedExpression, SubqueryExpression} import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, AggregateMode} -import org.apache.spark.sql.catalyst.optimizer.{BooleanSimplification, CollapseProject, CollapseRepartition, CollapseWindow, ColumnPruning, CombineFilters, CombineUnions, ConstantFolding, EliminateLimits, EliminateOuterJoin, EliminateSerialization, EliminateSorts, FoldablePropagation, NullPropagation, PushDownPredicates, PushPredicateThroughJoin, PushProjectionThroughUnion, RemoveDispensableExpressions, RemoveRedundantAliases, ReorderAssociativeOperator, ReorderJoin, RewriteCorrelatedScalarSubquery, SimplifyBinaryComparison, SimplifyCaseConversionExpressions, SimplifyCasts, SimplifyConditionals} +import org.apache.spark.sql.catalyst.optimizer._ import org.apache.spark.sql.catalyst.plans.{logical, JoinType, QueryPlan} import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Join, LogicalPlan, Statistics, Subquery} import org.apache.spark.sql.catalyst.rules.Rule diff --git a/mv/plan/src/main/spark2.3/org/apache/carbondata/mv/plans/modular/ExpressionHelper.scala b/mv/plan/src/main/spark2.3/org/apache/carbondata/mv/plans/modular/ExpressionHelper.scala deleted file mode 100644 index 8fa9be4a593..00000000000 --- a/mv/plan/src/main/spark2.3/org/apache/carbondata/mv/plans/modular/ExpressionHelper.scala +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.carbondata.mv.plans.modular - -import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Expression, ExprId, NamedExpression} -import org.apache.spark.sql.types.{DataType, Metadata} - -object ExpressionHelper { - - def createReference( - name: String, - dataType: DataType, - nullable: Boolean, - metadata: Metadata, - exprId: ExprId, - qualifier: Option[String], - attrRef : NamedExpression = null): AttributeReference = { - AttributeReference(name, dataType, nullable, metadata)(exprId, qualifier) - } - - def createAlias( - child: Expression, - name: String, - exprId: ExprId = NamedExpression.newExprId, - qualifier: Option[String] = None, - explicitMetadata: Option[Metadata] = None, - namedExpr : Option[NamedExpression] = None ) : Alias = { - Alias(child, name)(exprId, qualifier, explicitMetadata) - } - - def getTheLastQualifier(reference: AttributeReference): String = { - reference.qualifier.head - } - -} diff --git a/mv/plan/src/main/spark3.1/org/apache/carbondata/mv/plans/modular/ExpressionHelper.scala b/mv/plan/src/main/spark3.1/org/apache/carbondata/mv/plans/modular/ExpressionHelper.scala deleted file mode 100644 index 3814ccaaebb..00000000000 --- a/mv/plan/src/main/spark3.1/org/apache/carbondata/mv/plans/modular/ExpressionHelper.scala +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.carbondata.mv.plans.modular - -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, AttributeSeq, Expression, ExprId, NamedExpression, SubqueryExpression} -import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, AggregateMode} -import org.apache.spark.sql.catalyst.optimizer.{BooleanSimplification, CollapseProject, CollapseRepartition, CollapseWindow, ColumnPruning, CombineFilters, CombineUnions, ConstantFolding, EliminateLimits, EliminateOuterJoin, EliminateSerialization, EliminateSorts, FoldablePropagation, NullPropagation, PushDownPredicates, PushPredicateThroughJoin, PushProjectionThroughUnion, RemoveDispensableExpressions, RemoveRedundantAliases, ReorderAssociativeOperator, ReorderJoin, RewriteCorrelatedScalarSubquery, SimplifyBinaryComparison, SimplifyCaseConversionExpressions, SimplifyCasts, SimplifyConditionals} -import org.apache.spark.sql.catalyst.plans.{logical, JoinType, QueryPlan} -import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Join, LogicalPlan, Statistics, Subquery} -import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.types.{DataType, Metadata} -import scala.reflect.ClassTag - -import org.apache.carbondata.mv.plans.util.BirdcageOptimizer - -object ExpressionHelper { - - def createReference( - name: String, - dataType: DataType, - nullable: Boolean, - metadata: Metadata, - exprId: ExprId, - qualifier: Option[String], - attrRef: NamedExpression = null): AttributeReference = { - val qf = if (qualifier.nonEmpty) Seq(qualifier.get) else Seq.empty - AttributeReference(name, dataType, nullable, metadata)(exprId, qf) - } - - def createAlias( - child: Expression, - name: String, - exprId: ExprId, - qualifier: Option[String]): Alias = { - val qf = if (qualifier.nonEmpty) Seq(qualifier.get) else Seq.empty - Alias(child, name)(exprId, qf, None) - } - - def getTheLastQualifier(reference: AttributeReference): String = { - reference.qualifier.reverse.head - } - -} diff --git a/pom.xml b/pom.xml index 3ae76809270..fdd6750b3db 100644 --- a/pom.xml +++ b/pom.xml @@ -105,17 +105,12 @@ index/lucene index/examples streaming - + geo integration/spark integration/hive - integration/flink - integration/flink-build - integration/flink-proxy - integration/presto sdk/sdk tools/cli examples/spark - examples/flink assembly @@ -124,21 +119,21 @@ UTF-8 8 8 - 1.1.2.6 - 2.7.2 - 4.3.4 - 4.3-alpha1 - 2.11 - 2.11.12 + 1.1.10.3 + 3.3.4 + 4.5.14 + 4.4.16 + 2.12 + 2.12.18 compile - 2.4.5 - 2.4 - 4.8 + 3.5.1 + 3.5 + 4.9.3 compile compile ${basedir}/dev local[2] - 2.6.5 + 2.15.2 local localhost:8086 @@ -454,9 +449,6 @@ false ${basedir}/src/main/scala - ${basedir}/src/main/spark${spark.binary.version} - ${basedir}/src/main/common2.3and2.4 - ${basedir}/src/main/common2.4and3.1 ${basedir}/src/test/scala scalastyle-config.xml @@ -575,230 +567,6 @@ 4.5.2 - - spark-2.3 - - 2.3 - 2.3.4 - 2.11 - 2.11.8 - - - - - org.codehaus.mojo - flatten-maven-plugin - 1.2.2 - - ${project.build.directory} - - - - - flatten - process-resources - - flatten - - - - - flatten.clean - clean - - clean - - - - - - org.eluder.coveralls - coveralls-maven-plugin - 4.3.0 - - opPwqWW41vYppv6KISea3u1TJvE1ugJ5Y - UTF-8 - - ${basedir}/target/carbondata-coverage-report/carbondata-coverage-report.xml - - - - ${basedir}/common/src/main/java - ${basedir}/core/src/main/java - - ${basedir}/processing/src/main/java - ${basedir}/hadoop/src/main/java - ${basedir}/integration/spark/src/main/scala - ${basedir}/integration/spark/src/main/spark2.3 - ${basedir}/integration/spark/src/main/common2.3and2.4 - ${basedir}/integration/spark/src/main/java - ${basedir}/integration/hive/src/main/scala - ${basedir}/integration/hive/src/main/java - ${basedir}/streaming/src/main/java - ${basedir}/streaming/src/main/scala - ${basedir}/sdk/sdk/src/main/java - ${basedir}/index/bloom/src/main/java - ${basedir}/index/lucene/src/main/java - ${basedir}/index/secondary-index/src/main/scala - ${basedir}/index/secondary-index/src/main/java - - - - - - - - spark-2.4 - - true - - - 2.4 - 2.4.5 - 2.11 - 2.11.12 - - - - - org.codehaus.mojo - flatten-maven-plugin - 1.2.2 - - ${project.build.directory} - - - - - flatten - process-resources - - flatten - - - - - flatten.clean - clean - - clean - - - - - - org.eluder.coveralls - coveralls-maven-plugin - 4.3.0 - - opPwqWW41vYppv6KISea3u1TJvE1ugJ5Y - UTF-8 - - ${basedir}/target/carbondata-coverage-report/carbondata-coverage-report.xml - - - - ${basedir}/common/src/main/java - ${basedir}/core/src/main/java - - ${basedir}/processing/src/main/java - ${basedir}/hadoop/src/main/java - ${basedir}/integration/spark/src/main/scala - ${basedir}/integration/spark/src/main/spark2.4 - ${basedir}/integration/spark/src/main/common2.3and2.4 - ${basedir}/integration/spark/src/main/common2.4and3.1 - ${basedir}/integration/spark/src/main/java - ${basedir}/integration/hive/src/main/scala - ${basedir}/integration/hive/src/main/java - ${basedir}/streaming/src/main/java - ${basedir}/streaming/src/main/scala - ${basedir}/sdk/sdk/src/main/java - ${basedir}/index/bloom/src/main/java - ${basedir}/index/lucene/src/main/java - ${basedir}/index/secondary-index/src/main/scala - ${basedir}/index/secondary-index/src/main/java - - - - - - - - spark-3.1 - - 3.1 - 3.1.1 - 2.12 - 2.12.8 - 2.10.0 - - - - - org.codehaus.mojo - flatten-maven-plugin - 1.2.2 - - ${project.build.directory} - - - - - flatten - process-resources - - flatten - - - - - flatten.clean - clean - - clean - - - - - - org.eluder.coveralls - coveralls-maven-plugin - 4.3.0 - - opPwqWW41vYppv6KISea3u1TJvE1ugJ5Y - UTF-8 - - ${basedir}/target/carbondata-coverage-report/carbondata-coverage-report.xml - - - - ${basedir}/common/src/main/java - ${basedir}/core/src/main/java - - ${basedir}/processing/src/main/java - ${basedir}/hadoop/src/main/java - ${basedir}/integration/spark/src/main/scala - ${basedir}/integration/spark/src/main/spark3.1 - ${basedir}/integration/spark/src/main/common2.4and3.1 - ${basedir}/integration/spark/src/main/java - ${basedir}/integration/hive/src/main/scala - ${basedir}/integration/hive/src/main/java - ${basedir}/streaming/src/main/java - ${basedir}/streaming/src/main/scala - ${basedir}/sdk/sdk/src/main/java - ${basedir}/index/bloom/src/main/java - ${basedir}/index/lucene/src/main/java - ${basedir}/index/secondary-index/src/main/scala - ${basedir}/index/secondary-index/src/main/java - - - - - - include-all @@ -815,7 +583,7 @@ org.apache.rat apache-rat-plugin - 0.12 + 0.16.1 verify diff --git a/processing/pom.xml b/processing/pom.xml index 9c67ec8295a..237560fb824 100644 --- a/processing/pom.xml +++ b/processing/pom.xml @@ -40,6 +40,28 @@ carbondata-core ${project.version} + + org.codehaus.jackson + jackson-core-asl + 1.9.13 + ${hadoop.deps.scope} + + + org.codehaus.jackson + jackson-mapper-asl + 1.9.13 + ${hadoop.deps.scope} + + + com.fasterxml.jackson.core + jackson-core + 2.15.2 + + + com.fasterxml.jackson.core + jackson-databind + 2.15.2 + org.apache.spark spark-unsafe_${scala.binary.version} @@ -59,12 +81,12 @@ com.univocity univocity-parsers - 2.2.1 + 2.9.1 org.apache.commons commons-lang3 - 3.5 + 3.12.0 org.jmockit diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/csvinput/CSVInputFormat.java b/processing/src/main/java/org/apache/carbondata/processing/loading/csvinput/CSVInputFormat.java index 21987d518dd..74effab8a8a 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/loading/csvinput/CSVInputFormat.java +++ b/processing/src/main/java/org/apache/carbondata/processing/loading/csvinput/CSVInputFormat.java @@ -30,8 +30,8 @@ import com.univocity.parsers.csv.CsvParser; import com.univocity.parsers.csv.CsvParserSettings; import org.apache.commons.io.input.BOMInputStream; -import org.apache.commons.lang.BooleanUtils; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.BooleanUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java b/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java index 36ef3ed7fb4..d44aa444f8f 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java +++ b/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java @@ -45,7 +45,7 @@ import org.apache.carbondata.processing.util.CarbonLoaderUtil; import org.apache.carbondata.processing.util.TableOptionConstant; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.log4j.Logger; diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java b/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java index 98c2d3d7660..a37bcf414b5 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java +++ b/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java @@ -37,7 +37,7 @@ import org.apache.carbondata.processing.util.CarbonDataProcessorUtil; import org.apache.carbondata.processing.util.CarbonLoaderUtil; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.log4j.Logger; diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/ArrayParserImpl.java b/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/ArrayParserImpl.java index 0af99358556..e453b2a02cb 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/ArrayParserImpl.java +++ b/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/ArrayParserImpl.java @@ -25,7 +25,7 @@ import org.apache.carbondata.processing.loading.parser.ComplexParser; import org.apache.carbondata.processing.loading.parser.GenericParser; -import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang3.ArrayUtils; /** * It parses the string to @{@link ArrayObject} using delimiter. diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/JsonRowParser.java b/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/JsonRowParser.java index cdc8e011976..903be088fcd 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/JsonRowParser.java +++ b/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/JsonRowParser.java @@ -32,8 +32,8 @@ import org.apache.carbondata.processing.loading.complexobjects.StructObject; import org.apache.carbondata.processing.loading.parser.RowParser; -import org.apache.htrace.fasterxml.jackson.core.type.TypeReference; -import org.apache.htrace.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; public class JsonRowParser implements RowParser { diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/MapParserImpl.java b/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/MapParserImpl.java index 9cc35240d01..c3bc94f86e9 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/MapParserImpl.java +++ b/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/MapParserImpl.java @@ -24,7 +24,7 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.processing.loading.complexobjects.ArrayObject; -import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang3.ArrayUtils; public class MapParserImpl extends ArrayParserImpl { diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/StructParserImpl.java b/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/StructParserImpl.java index cf8db97bc7a..a22fa45431a 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/StructParserImpl.java +++ b/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/StructParserImpl.java @@ -26,7 +26,7 @@ import org.apache.carbondata.processing.loading.parser.ComplexParser; import org.apache.carbondata.processing.loading.parser.GenericParser; -import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang3.ArrayUtils; /** * It parses the string to @{@link StructObject} using delimiter. diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/row/CarbonRowBatch.java b/processing/src/main/java/org/apache/carbondata/processing/loading/row/CarbonRowBatch.java index d41ca144dd6..e9886746aba 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/loading/row/CarbonRowBatch.java +++ b/processing/src/main/java/org/apache/carbondata/processing/loading/row/CarbonRowBatch.java @@ -22,7 +22,7 @@ import org.apache.carbondata.common.CarbonIterator; import org.apache.carbondata.core.datastore.row.CarbonRow; -import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang3.ArrayUtils; /** * Batch of rows. diff --git a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java index 42445e7b750..01b731d8dd6 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java +++ b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java @@ -62,7 +62,7 @@ import org.apache.carbondata.processing.loading.model.CarbonLoadModel; import org.apache.carbondata.processing.util.CarbonLoaderUtil; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Logger; /** diff --git a/processing/src/main/java/org/apache/carbondata/processing/util/CarbonBadRecordUtil.java b/processing/src/main/java/org/apache/carbondata/processing/util/CarbonBadRecordUtil.java index 26e838c60c1..3bfed5cdbdc 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/util/CarbonBadRecordUtil.java +++ b/processing/src/main/java/org/apache/carbondata/processing/util/CarbonBadRecordUtil.java @@ -37,7 +37,7 @@ import org.apache.carbondata.processing.loading.converter.BadRecordLogHolder; import org.apache.carbondata.processing.loading.model.CarbonLoadModel; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Logger; /** diff --git a/processing/src/test/java/org/apache/carbondata/lcm/locks/ZooKeeperLockingTest.java b/processing/src/test/java/org/apache/carbondata/lcm/locks/ZooKeeperLockingTest.java index 02c529d7df4..c0ecf8631d1 100644 --- a/processing/src/test/java/org/apache/carbondata/lcm/locks/ZooKeeperLockingTest.java +++ b/processing/src/test/java/org/apache/carbondata/lcm/locks/ZooKeeperLockingTest.java @@ -25,6 +25,7 @@ import org.apache.carbondata.core.locks.ZookeeperInit; import org.apache.zookeeper.server.ServerConfig; import org.apache.zookeeper.server.ZooKeeperServerMain; +import org.apache.zookeeper.server.admin.AdminServer; import org.apache.zookeeper.server.quorum.QuorumPeerConfig; import org.junit.After; import org.junit.Assert; @@ -67,7 +68,7 @@ public class ZooKeeperLockingTest { public void run() { try { zooKeeperServer.runFromConfig(configuration); - } catch (IOException e) { + } catch (IOException | AdminServer.AdminServerException e) { System.out.println("ZooKeeper failure"); } } diff --git a/sdk/sdk/pom.xml b/sdk/sdk/pom.xml index 4280c6c40cf..c01a4d9a1cf 100644 --- a/sdk/sdk/pom.xml +++ b/sdk/sdk/pom.xml @@ -14,6 +14,7 @@ ${basedir}/../../dev + 12.0.1 @@ -72,7 +73,7 @@ org.apache.arrow arrow-format - 0.12.0 + ${arrow.version} ch.qos.logback @@ -83,7 +84,8 @@ org.apache.arrow arrow-memory - 0.12.0 + ${arrow.version} + pom ch.qos.logback @@ -102,7 +104,7 @@ org.apache.arrow arrow-vector - 0.12.0 + ${arrow.version} ch.qos.logback @@ -118,36 +120,10 @@ - - org.apache.arrow - arrow-plasma - 0.12.0 - - - ch.qos.logback - logback-classic - - - - - org.apache.arrow - arrow-flight - 0.12.0 - - - ch.qos.logback - logback-classic - - - io.netty - netty-buffer - - - org.apache.arrow arrow-tools - 0.12.0 + ${arrow.version} ch.qos.logback diff --git a/sdk/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java b/sdk/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java index 3059336c725..a28ac518560 100644 --- a/sdk/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java +++ b/sdk/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java @@ -58,7 +58,7 @@ import org.apache.carbondata.sdk.file.utils.SDKUtil; import org.apache.commons.collections.CollectionUtils; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.log4j.Logger; diff --git a/sdk/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVCarbonWriterTest.java b/sdk/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVCarbonWriterTest.java index 883e6fc0eb6..05dce11ff9c 100644 --- a/sdk/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVCarbonWriterTest.java +++ b/sdk/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVCarbonWriterTest.java @@ -758,7 +758,7 @@ public void testWritingAndReadingArrayString() throws IOException { @Test public void testWritingAndReadingArrayStruct() throws IOException { - String path = "./testWriteFilesArrayStruct"; + String path = "./target/testWriteFilesArrayStruct"; FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[4]; diff --git a/sdk/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java b/sdk/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java index 1433bb5c93f..fa3bfaf6e58 100644 --- a/sdk/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java +++ b/sdk/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java @@ -66,7 +66,7 @@ public class CarbonReaderTest extends TestCase { @Test public void testWriteAndReadFiles() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); IndexStoreManager.getInstance() .clearIndexCache(AbsoluteTableIdentifier.from(path), false); @@ -111,7 +111,7 @@ public void testWriteAndReadFiles() throws IOException, InterruptedException { @Test public void testWriteAndReadJson() throws IOException, InterruptedException { int numRows = 100; - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); String json = "{\"name\":\"bob\", \"age\":10}"; @@ -163,7 +163,7 @@ public boolean accept(File pathname) { } @Test public void testReadWithZeroBatchSize() throws Exception { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); IndexStoreManager.getInstance().clearIndexCache(AbsoluteTableIdentifier.from(path), false); Field[] fields = new Field[2]; @@ -188,7 +188,7 @@ public boolean accept(File pathname) { @Test public void testReadBatchWithZeroBatchSize() throws Exception { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); IndexStoreManager.getInstance().clearIndexCache(AbsoluteTableIdentifier.from(path), false); Field[] fields = new Field[2]; @@ -211,7 +211,7 @@ public void testReadBatchWithZeroBatchSize() throws Exception { @Test public void testReadWithFilterOfNonTransactionalSimple() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); IndexStoreManager.getInstance() .clearIndexCache(AbsoluteTableIdentifier.from(path), false); @@ -258,7 +258,7 @@ public void testReadWithFilterOfNonTransactionalSimple() throws IOException, Int @Test public void testReadWithFilterOfNonTransactional2() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); IndexStoreManager.getInstance() .clearIndexCache(AbsoluteTableIdentifier.from(path), false); @@ -295,7 +295,7 @@ public void testReadWithFilterOfNonTransactional2() throws IOException, Interrup @Test public void testReadWithFilterOfNonTransactionalAnd() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); IndexStoreManager.getInstance() .clearIndexCache(AbsoluteTableIdentifier.from(path), false); @@ -338,7 +338,7 @@ public void testReadWithFilterOfNonTransactionalAnd() throws IOException, Interr @Test public void testReadWithFilterOfNonTransactionalOr() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); IndexStoreManager.getInstance() .clearIndexCache(AbsoluteTableIdentifier.from(path), false); @@ -381,7 +381,7 @@ public void testReadWithFilterOfNonTransactionalOr() throws IOException, Interru @Test public void testReadWithFilterOfNonTransactionalGreaterThan() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); IndexStoreManager.getInstance() .clearIndexCache(AbsoluteTableIdentifier.from(path), false); @@ -424,7 +424,7 @@ public void testReadWithFilterOfNonTransactionalGreaterThan() throws IOException @Test public void testReadWithFilterEqualSet() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[3]; fields[0] = new Field("name", DataTypes.STRING); @@ -573,7 +573,7 @@ public void testReadWithFilterEqualSet() throws IOException, InterruptedExceptio @Test public void testReadWithFilterOfNonTransactionalLessThan() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); IndexStoreManager.getInstance() .clearIndexCache(AbsoluteTableIdentifier.from(path), false); @@ -616,7 +616,7 @@ public void testReadWithFilterOfNonTransactionalLessThan() throws IOException, I @Test public void testReadWithFilterOfNonTransactionalNotEqual() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); IndexStoreManager.getInstance() .clearIndexCache(AbsoluteTableIdentifier.from(path), false); @@ -659,7 +659,7 @@ public void testReadWithFilterOfNonTransactionalNotEqual() throws IOException, I @Test public void testReadWithFilterOfNonTransactionalIn() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[3]; @@ -702,7 +702,7 @@ public void testReadWithFilterOfNonTransactionalIn() throws IOException, Interru @Test public void testReadWithFilterOfNonTransactionalNotIn() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); IndexStoreManager.getInstance() .clearIndexCache(AbsoluteTableIdentifier.from(path), false); @@ -745,8 +745,8 @@ public void testReadWithFilterOfNonTransactionalNotIn() throws IOException, Inte @Test public void testWriteAndReadFilesWithReaderBuildFail() throws IOException, InterruptedException { - String path1 = "./testWriteFiles"; - String path2 = "./testWriteFiles2"; + String path1 = "./target/testWriteFiles"; + String path2 = "./target/testWriteFiles2"; FileUtils.deleteDirectory(new File(path1)); FileUtils.deleteDirectory(new File(path2)); IndexStoreManager.getInstance() @@ -811,7 +811,7 @@ public void testWriteAndReadFilesWithReaderBuildFail() throws IOException, Inter @Test public void testReadColumnTwice() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); IndexStoreManager.getInstance() .clearIndexCache(AbsoluteTableIdentifier.from(path), false); @@ -848,7 +848,7 @@ public void testReadColumnTwice() throws IOException, InterruptedException { // and currently flat folder will never check for schema files. @Ignore public void readFilesParallel() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); IndexStoreManager.getInstance() .clearIndexCache(AbsoluteTableIdentifier.from(path), false); @@ -884,7 +884,7 @@ public void readFilesParallel() throws IOException, InterruptedException { @Test public void testReadAfterClose() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); IndexStoreManager.getInstance() .clearIndexCache(AbsoluteTableIdentifier.from(path), false); @@ -925,7 +925,7 @@ public void testReadAfterClose() throws IOException, InterruptedException { @Test public void testWriteAndReadFilesWithoutTableName() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); IndexStoreManager.getInstance() .clearIndexCache(AbsoluteTableIdentifier.from(path), false); @@ -955,7 +955,7 @@ public void testWriteAndReadFilesWithoutTableName() throws IOException, Interrup @Test public void testWriteAndReadFilesWithoutTableName2() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); IndexStoreManager.getInstance() .clearIndexCache(AbsoluteTableIdentifier.from(path), false); @@ -982,7 +982,7 @@ public void testWriteAndReadFilesWithoutTableName2() throws IOException, Interru @Test public void testReadSchemaFromDataFile() throws IOException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[2]; @@ -1011,7 +1011,7 @@ public boolean accept(File dir, String name) { @Test public void testWriteAndReadFilesNonTransactional() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[2]; @@ -1062,7 +1062,7 @@ public void testTimeStampAndBadRecord() throws IOException, InterruptedException .addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC, storeLocation) .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy-MM-dd hh:mm:ss") .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "REDIRECT"); - String path = "./testWriteFiles"; + String path = storeLocation + "testWriteFiles1"; FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[9]; @@ -1082,18 +1082,6 @@ public void testTimeStampAndBadRecord() throws IOException, InterruptedException CarbonWriter writer = builder.withCsvInput(new Schema(fields)).writtenBy("CarbonReaderTest").build(); for (int i = 0; i < 100; i++) { - String[] row = new String[]{ - "robot" + (i % 10), - String.valueOf(i), - String.valueOf(i), - String.valueOf(Long.MAX_VALUE - i), - String.valueOf((double) i / 2), - String.valueOf(true), - "2018-05-12", - "2018-05-12", - "12.345" - }; - writer.write(row); String[] row2 = new String[]{ "robot" + (i % 10), String.valueOf(i), @@ -1115,12 +1103,8 @@ public void testTimeStampAndBadRecord() throws IOException, InterruptedException File folder = new File(path); Assert.assertTrue(folder.exists()); - File[] dataFiles = folder.listFiles(new FileFilter() { - @Override - public boolean accept(File pathname) { - return pathname.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT); - } - }); + File[] dataFiles = folder.listFiles( + pathname -> pathname.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT)); Assert.assertNotNull(dataFiles); Assert.assertTrue(dataFiles.length > 0); @@ -1178,7 +1162,7 @@ public void testReadSchemaInDataFileAndSort() throws IOException, InterruptedExc .addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC, storeLocation) .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy-MM-dd hh:mm:ss") .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "REDIRECT"); - String path = "./testWriteFiles"; + String path = storeLocation + "testWriteFiles"; FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[9]; @@ -1288,7 +1272,7 @@ public void testReadUserSchema() throws IOException, InterruptedException { .addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC, storeLocation) .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy-MM-dd hh:mm:ss") .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "REDIRECT"); - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[9]; @@ -1386,7 +1370,7 @@ public boolean accept(File dir, String name) { @Test public void testReadFilesWithProjectAllColumns() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[2]; @@ -1412,7 +1396,7 @@ public void testReadFilesWithProjectAllColumns() throws IOException, Interrupted @Test public void testReadFilesWithDefaultProjection() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[2]; @@ -1436,7 +1420,7 @@ public void testReadFilesWithDefaultProjection() throws IOException, Interrupted @Test public void testReadFilesWithNullProjection() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[2]; @@ -1481,7 +1465,7 @@ private void WriteAvroComplexData(String mySchema, String json, String path) // TODO: support get schema of complex data type @Ignore public void testReadUserSchemaOfComplex() throws IOException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); String mySchema = @@ -1552,7 +1536,7 @@ public boolean accept(File dir, String name) { @Test public void testReadMapType() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); String mySchema = @@ -1615,10 +1599,10 @@ public void testReadMapType() throws IOException, InterruptedException { @Test public void testReadWithFilterOfnonTransactionalwithsubfolders() throws IOException, InterruptedException { - String path1 = "./testWriteFiles/1/" + System.nanoTime(); - String path2 = "./testWriteFiles/2/" + System.nanoTime(); - String path3 = "./testWriteFiles/3/" + System.nanoTime(); - FileUtils.deleteDirectory(new File("./testWriteFiles")); + String path1 = "./target/testWriteFiles/1/" + System.nanoTime(); + String path2 = "./target/testWriteFiles/2/" + System.nanoTime(); + String path3 = "./target/testWriteFiles/3/" + System.nanoTime(); + FileUtils.deleteDirectory(new File("./target/testWriteFiles")); Field[] fields = new Field[2]; fields[0] = new Field("name", DataTypes.STRING); @@ -1632,7 +1616,7 @@ public void testReadWithFilterOfnonTransactionalwithsubfolders() throws IOExcept new ColumnExpression("name", DataTypes.STRING), new LiteralExpression("robot1", DataTypes.STRING)); CarbonReader reader = CarbonReader - .builder("./testWriteFiles", "_temp") + .builder("./target/testWriteFiles", "_temp") .projection(new String[]{"name", "age"}) .filter(equalToExpression) .build(); @@ -1648,12 +1632,12 @@ public void testReadWithFilterOfnonTransactionalwithsubfolders() throws IOExcept reader.close(); - FileUtils.deleteDirectory(new File("./testWriteFiles")); + FileUtils.deleteDirectory(new File("./target/testWriteFiles")); } @Test public void testReadSchemaFromDataFileArrayString() { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; try { FileUtils.deleteDirectory(new File(path)); @@ -1744,7 +1728,7 @@ public boolean accept(File dir, String name) { @Test public void testReadDateAndTimestampColumnInMap() { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles2"; try { FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[6]; @@ -1810,7 +1794,7 @@ public boolean accept(File dir, String name) { @Test public void testReadDateAndTimestampColumnInArray() { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; try { FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[7]; @@ -1878,7 +1862,7 @@ public boolean accept(File dir, String name) { @Test public void testReadDateAndTimestampColumnInStruct() { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; try { FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[3]; @@ -1939,7 +1923,7 @@ public boolean accept(File dir, String name) { @Test public void testReadingDateAndTimestampColumnInArrayOfStruct() throws IOException { - String path = "./testWriteFilesArrayStruct"; + String path = "./target/testWriteFilesArrayStruct"; FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[4]; fields[0] = new Field("id", DataTypes.STRING); @@ -2219,7 +2203,7 @@ public void testReadNextRowWithProjectionAndRowUtil() { @Test public void testVectorReader() { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; try { FileUtils.deleteDirectory(new File(path)); @@ -2485,7 +2469,7 @@ public void testReadNextBatchRowWithVectorReader() { @Test public void testReadingNullValues() { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; try { FileUtils.deleteDirectory(new File(path)); @@ -2706,7 +2690,7 @@ public void testValidateQuoteCharWithImproperValue() throws IOException { @Test public void testValidateQuoteCharWithProperValue() throws IOException { - String path = "./testValidateQuoteCharWithProperValue"; + String path = "./target/testValidateQuoteCharWithProperValue"; Field[] fields = new Field[2]; fields[0] = new Field("stringField", DataTypes.STRING); fields[1] = new Field("varcharField", DataTypes.VARCHAR); @@ -2761,7 +2745,7 @@ public void testValidateEscapeCharWithImproperValue() throws IOException { @Test public void testValidateEscapeCharWithProperValue() throws IOException { - String path = "./testValidateEscapeCharWithProperValue"; + String path = "./target/testValidateEscapeCharWithProperValue"; Field[] fields = new Field[2]; fields[0] = new Field("stringField", DataTypes.STRING); fields[1] = new Field("varcharField", DataTypes.VARCHAR); @@ -2881,7 +2865,7 @@ public void testWriteWithDifferentDataType() { @Test public void testReadBlocklet() throws IOException, InterruptedException { - String path = "./testWriteFiles/" + System.nanoTime(); + String path = "./target/testWriteFiles/" + System.nanoTime(); FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[2]; @@ -2913,7 +2897,7 @@ public void testReadBlocklet() throws IOException, InterruptedException { @Test public void testGetSplits() throws IOException, InterruptedException { - String path = "./testWriteFiles/" + System.nanoTime(); + String path = "./target/testWriteFiles/" + System.nanoTime(); FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[2]; @@ -2934,7 +2918,7 @@ public void testGetSplits() throws IOException, InterruptedException { @Test public void testReadWithFilterNonResult() throws IOException, InterruptedException { - String path = "./testWriteFiles"; + String path = "./target/testWriteFiles"; FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[2]; fields[0] = new Field("name", DataTypes.STRING); diff --git a/sdk/sdk/src/test/java/org/apache/carbondata/sdk/file/PaginationCarbonReaderTest.java b/sdk/sdk/src/test/java/org/apache/carbondata/sdk/file/PaginationCarbonReaderTest.java index d0d60bd170f..0f9dfc96253 100644 --- a/sdk/sdk/src/test/java/org/apache/carbondata/sdk/file/PaginationCarbonReaderTest.java +++ b/sdk/sdk/src/test/java/org/apache/carbondata/sdk/file/PaginationCarbonReaderTest.java @@ -34,7 +34,7 @@ import org.apache.carbondata.core.util.CarbonProperties; import org.apache.commons.io.FileUtils; -import org.apache.commons.lang.RandomStringUtils; +import org.apache.commons.lang3.RandomStringUtils; import org.junit.Assert; import org.junit.Test; diff --git a/streaming/pom.xml b/streaming/pom.xml index e815d86ec98..93bdd7df717 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -24,7 +24,7 @@ 4.0.0 - carbondata-streaming_${spark.binary.version} + carbondata-streaming Apache CarbonData :: Streaming http://maven.apache.org @@ -132,123 +132,5 @@ true - - spark-2.3 - - 2.3 - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - src/main/spark3.1 - - - - - org.codehaus.mojo - build-helper-maven-plugin - 3.0.0 - - - add-source - generate-sources - - add-source - - - - src/main/spark2.x - - - - - - - - - - spark-2.4 - - true - - - 2.4 - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - src/main/spark3.1 - - - - - org.codehaus.mojo - build-helper-maven-plugin - 3.0.0 - - - add-source - generate-sources - - add-source - - - - src/main/spark2.x - - - - - - - - - - spark-3.1 - - 3.1.1 - 3.1 - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - src/main/spark2.x - - - - - org.codehaus.mojo - build-helper-maven-plugin - 3.0.0 - - - add-source - generate-sources - - add-source - - - - src/main/spark3.1 - - - - - - - - diff --git a/streaming/src/main/scala/org/apache/carbondata/streaming/parser/RowStreamParserImp.scala b/streaming/src/main/scala/org/apache/carbondata/streaming/parser/RowStreamParserImp.scala index 145520edd70..6adb29cbe93 100644 --- a/streaming/src/main/scala/org/apache/carbondata/streaming/parser/RowStreamParserImp.scala +++ b/streaming/src/main/scala/org/apache/carbondata/streaming/parser/RowStreamParserImp.scala @@ -18,12 +18,11 @@ package org.apache.carbondata.streaming.parser import java.text.SimpleDateFormat -import java.util import org.apache.hadoop.conf.Configuration -import org.apache.spark.sql.Row +import org.apache.spark.sql.{Encoders, Row} import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder} +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.types.StructType import org.apache.carbondata.core.constants.CarbonCommonConstants @@ -43,14 +42,14 @@ class RowStreamParserImp extends CarbonStreamParser { var timeStampFormat: SimpleDateFormat = _ var dateFormat: SimpleDateFormat = _ - val complexDelimiters: util.ArrayList[String] = new util.ArrayList[String]() + val complexDelimiters: java.util.ArrayList[String] = new java.util.ArrayList[String]() var serializationNullFormat: String = _ override def initialize(configuration: Configuration, structType: StructType, isVarcharTypeMapping: Array[Boolean]): Unit = { this.configuration = configuration this.structType = structType - this.encoder = RowEncoder.apply(this.structType).resolveAndBind() + this.encoder = Encoders.row(this.structType).asInstanceOf[ExpressionEncoder[Row]] this.isVarcharTypeMapping = isVarcharTypeMapping this.timeStampFormat = new SimpleDateFormat( diff --git a/streaming/src/main/spark3.1/org/apache/carbondata/util/SparkStreamingUtil.scala b/streaming/src/main/scala/org/apache/carbondata/util/SparkStreamingUtil.scala similarity index 100% rename from streaming/src/main/spark3.1/org/apache/carbondata/util/SparkStreamingUtil.scala rename to streaming/src/main/scala/org/apache/carbondata/util/SparkStreamingUtil.scala diff --git a/streaming/src/main/spark2.x/org.apache.carbondata.util/SparkStreamingUtil.scala b/streaming/src/main/spark2.x/org.apache.carbondata.util/SparkStreamingUtil.scala deleted file mode 100644 index 399d2f42514..00000000000 --- a/streaming/src/main/spark2.x/org.apache.carbondata.util/SparkStreamingUtil.scala +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.carbondata.util - -import java.text.SimpleDateFormat - -import org.apache.spark.sql.Row -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder - -object SparkStreamingUtil { - - def convertInternalRowToRow(expressionEncoder: ExpressionEncoder[Row]): InternalRow => Row = { - expressionEncoder.fromRow - } - - def checkInstant(value: Any, timeStampFormat: SimpleDateFormat): String = { - value.toString - } -}