Java源码示例:org.apache.kylin.common.util.StringUtil

示例1
@Override
public void addStepPhase4_Cleanup(DefaultChainedExecutable jobFlow) {
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    org.apache.kylin.source.hive.GarbageCollectionStep step = new org.apache.kylin.source.hive.GarbageCollectionStep();
    step.setName(ExecutableConstants.STEP_NAME_HIVE_CLEANUP);

    List<String> deleteTables = new ArrayList<>();
    deleteTables.add(getIntermediateTableIdentity());

    // mr-hive dict and inner table do not need delete hdfs
    String[] mrHiveDicts = flatDesc.getSegment().getConfig().getMrHiveDictColumns();
    if (Objects.nonNull(mrHiveDicts) && mrHiveDicts.length > 0) {
        String dictDb = flatDesc.getSegment().getConfig().getMrHiveDictDB();
        String tableName = dictDb + "." + flatDesc.getTableName() + "_"
                + MRHiveDictUtil.DictHiveType.GroupBy.getName();
        deleteTables.add(tableName);
    }
    step.setIntermediateTables(deleteTables);

    step.setExternalDataPaths(Collections.singletonList(JoinedFlatTable.getTableDir(flatDesc, jobWorkingDir)));
    step.setHiveViewIntermediateTableIdentities(StringUtil.join(hiveViewIntermediateTables, ","));
    jobFlow.addTask(step);
}
 
示例2
public MapReduceExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    MapReduceExecutable mergeDictionaryStep = new MapReduceExecutable();
    mergeDictionaryStep.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY);
    StringBuilder cmd = new StringBuilder();
    appendMapReduceParameters(cmd, JobEngineConfig.CUBE_MERGE_JOB_CONF_SUFFIX);

    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getCubeInstance().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_META_URL, getSegmentMetadataUrl(seg.getConfig(), jobID));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME, "Kylin_Merge_Dictionary_" + seg.getCubeInstance().getName() + "_Step");

    mergeDictionaryStep.setMapReduceParams(cmd.toString());
    mergeDictionaryStep.setMapReduceJobClass(MergeDictionaryJob.class);

    return mergeDictionaryStep;
}
 
示例3
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    flinkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE);
}
 
示例4
public SparkExecutable createFactDistinctColumnsSparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));

    sparkExecutable.setClassName(SparkFactDistinct.class.getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_PATH.getOpt(), tablePath);
    sparkExecutable.setParam(SparkFactDistinct.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS + ":" + seg.toString());
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    return sparkExecutable;
}
 
示例5
public FlinkExecutable createMergeDictionaryFlinkStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    final FlinkExecutable flinkExecutable = new FlinkExecutable();
    flinkExecutable.setClassName(FlinkMergingDictionary.class.getName());

    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));

    flinkExecutable.setJobId(jobID);
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY);
    flinkExecutable.setFlinkConfigName(ExecutableConstants.FLINK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());

    return flinkExecutable;
}
 
示例6
public void check(List<String> segFullNameList) {
    issueExistHTables = Lists.newArrayList();
    inconsistentHTables = Lists.newArrayList();

    for (String segFullName : segFullNameList) {
        String[] sepNameList = StringUtil.splitByComma(segFullName);
        try {
            HTableDescriptor hTableDescriptor = hbaseAdmin.getTableDescriptor(TableName.valueOf(sepNameList[0]));
            String host = hTableDescriptor.getValue(IRealizationConstants.HTableTag);
            if (!dstCfg.getMetadataUrlPrefix().equalsIgnoreCase(host)) {
                inconsistentHTables.add(segFullName);
            }
        } catch (IOException e) {
            issueExistHTables.add(segFullName);
            continue;
        }
    }
}
 
示例7
/**
 * Regenerate table cardinality
 *
 * @return Table metadata array
 * @throws IOException
 */
@RequestMapping(value = "/{project}/{tableNames}/cardinality", method = { RequestMethod.PUT }, produces = {
        "application/json" })
@ResponseBody
public CardinalityRequest generateCardinality(@PathVariable String tableNames,
        @RequestBody CardinalityRequest request, @PathVariable String project) throws Exception {
    String submitter = SecurityContextHolder.getContext().getAuthentication().getName();
    String[] tables = StringUtil.splitByComma(tableNames);
    try {
        for (String table : tables) {
            tableService.calculateCardinality(table.trim().toUpperCase(Locale.ROOT), submitter, project);
        }
    } catch (IOException e) {
        logger.error("Failed to calculate cardinality", e);
        throw new InternalErrorException(e.getLocalizedMessage(), e);
    }
    return request;
}
 
示例8
void init(DataModelDesc model) {
    table = table.toUpperCase(Locale.ROOT);
    if (columns != null) {
        StringUtil.toUpperCaseArray(columns, columns);
    }

    if (model != null) {
        table = model.findTable(table).getAlias();
        if (columns != null) {
            for (int i = 0; i < columns.length; i++) {
                TblColRef column = model.findColumn(table, columns[i]);

                if (column.getColumnDesc().isComputedColumn() && !model.isFactTable(column.getTableRef())) {
                    throw new RuntimeException("Computed Column on lookup table is not allowed");
                }

                columns[i] = column.getName();
            }
        }
    }
}
 
示例9
public MapReduceExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    MapReduceExecutable mergeDictionaryStep = new MapReduceExecutable();
    mergeDictionaryStep.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY);
    StringBuilder cmd = new StringBuilder();
    appendMapReduceParameters(cmd, JobEngineConfig.CUBE_MERGE_JOB_CONF_SUFFIX);

    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getCubeInstance().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_META_URL, getSegmentMetadataUrl(seg.getConfig(), jobID));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME, "Kylin_Merge_Dictionary_" + seg.getCubeInstance().getName() + "_Step");

    mergeDictionaryStep.setMapReduceParams(cmd.toString());
    mergeDictionaryStep.setMapReduceJobClass(MergeDictionaryJob.class);

    return mergeDictionaryStep;
}
 
示例10
@Override
protected void doSetup(Context context) throws IOException, InterruptedException {
    super.doSetup(context);

    final SerializableConfiguration sConf = new SerializableConfiguration(context.getConfiguration());
    final String metaUrl = context.getConfiguration().get(BatchConstants.ARG_META_URL);
    final String cubeName = context.getConfiguration().get(BatchConstants.ARG_CUBE_NAME);
    final String segmentIds = context.getConfiguration().get(MergeDictionaryJob.OPTION_MERGE_SEGMENT_IDS.getOpt());

    final KylinConfig kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);
    final CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName);
    final CubeDesc cubeDesc = CubeDescManager.getInstance(kylinConfig).getCubeDesc(cubeInstance.getDescName());

    mergingSegments = getMergingSegments(cubeInstance, StringUtil.splitByComma(segmentIds));
    tblColRefs = cubeDesc.getAllColumnsNeedDictionaryBuilt().toArray(new TblColRef[0]);
    dictMgr = DictionaryManager.getInstance(kylinConfig);
}
 
示例11
protected void handleMetadataPersistException(ExecutableContext context, Throwable exception) {
    final String[] adminDls = context.getConfig().getAdminDls();
    if (adminDls == null || adminDls.length < 1) {
        logger.warn(NO_NEED_TO_SEND_EMAIL_USER_LIST_IS_EMPTY);
        return;
    }
    List<String> users = Lists.newArrayList(adminDls);

    Map<String, Object> dataMap = Maps.newHashMap();
    dataMap.put("job_name", getName());
    dataMap.put("env_name", context.getConfig().getDeployEnv());
    dataMap.put(SUBMITTER, StringUtil.noBlank(getSubmitter(), "missing submitter"));
    dataMap.put("job_engine", MailNotificationUtil.getLocalHostName());
    dataMap.put("error_log",
            Matcher.quoteReplacement(StringUtil.noBlank(exception.getMessage(), "no error message")));

    String content = MailNotificationUtil.getMailContent(MailNotificationUtil.METADATA_PERSIST_FAIL, dataMap);
    String title = MailNotificationUtil.getMailTitle("METADATA PERSIST", "FAIL",
            context.getConfig().getDeployEnv());

    new MailService(context.getConfig()).sendMail(users, title, content);
}
 
示例12
public SparkExecutable createBuildDictionarySparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());

    sparkExecutable.setClassName(SparkBuildDictionary.class.getName());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_DICT_PATH.getOpt(), getDictRootPath(jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId);

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_DICTIONARY);
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());

    sparkExecutable.setJars(jars.toString());

    return sparkExecutable;
}
 
示例13
public MapReduceExecutable createMergeCuboidDataStep(CubeSegment seg, List<CubeSegment> mergingSegments,
        String jobID, Class<? extends AbstractHadoopJob> clazz) {
    final List<String> mergingCuboidPaths = Lists.newArrayList();
    for (CubeSegment merging : mergingSegments) {
        mergingCuboidPaths.add(getCuboidRootPath(merging) + "*");
    }
    String formattedPath = StringUtil.join(mergingCuboidPaths, ",");
    String outputPath = getCuboidRootPath(jobID);

    MapReduceExecutable mergeCuboidDataStep = new MapReduceExecutable();
    mergeCuboidDataStep.setName(ExecutableConstants.STEP_NAME_MERGE_CUBOID);
    StringBuilder cmd = new StringBuilder();

    appendMapReduceParameters(cmd);
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getCubeInstance().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_INPUT, formattedPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_OUTPUT, outputPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME,
            "Kylin_Merge_Cuboid_" + seg.getCubeInstance().getName() + "_Step");

    mergeCuboidDataStep.setMapReduceParams(cmd.toString());
    mergeCuboidDataStep.setMapReduceJobClass(clazz);
    return mergeCuboidDataStep;
}
 
示例14
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable,
                              final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    sparkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE + ":" + seg.toString());
}
 
示例15
public void update() {
    logger.info("Reloading Cube Metadata from store: " + store.getReadableResourcePath(ResourceStore.CUBE_DESC_RESOURCE_ROOT));
    CubeDescManager cubeDescManager = CubeDescManager.getInstance(config);
    List<CubeDesc> cubeDescs;
    if (ArrayUtils.isEmpty(cubeNames)) {
        cubeDescs = cubeDescManager.listAllDesc();
    } else {
        String[] names = StringUtil.splitByComma(cubeNames[0]);
        if (ArrayUtils.isEmpty(names))
            return;
        cubeDescs = Lists.newArrayListWithCapacity(names.length);
        for (String name : names) {
            cubeDescs.add(cubeDescManager.getCubeDesc(name));
        }
    }
    for (CubeDesc cubeDesc : cubeDescs) {
        updateCubeDesc(cubeDesc);
    }

    verify();
}
 
示例16
public SparkExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    sparkExecutable.setClassName(SparkMergingDictionary.class.getName());

    sparkExecutable.setParam(SparkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));

    sparkExecutable.setJobId(jobID);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY + ":" + seg.toString());
    sparkExecutable.setSparkConfigName(ExecutableConstants.SPARK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());

    return sparkExecutable;
}
 
示例17
/**
 * Read the given path as a Java RDD; The path can have second level sub folder.
 * @param inputPath
 * @param fs
 * @param sc
 * @param keyClass
 * @param valueClass
 * @return
 * @throws IOException
 */
public static JavaPairRDD parseInputPath(String inputPath, FileSystem fs, JavaSparkContext sc, Class keyClass,
        Class valueClass) throws IOException {
    List<String> inputFolders = Lists.newArrayList();
    Path inputHDFSPath = new Path(inputPath);
    FileStatus[] fileStatuses = fs.listStatus(inputHDFSPath);
    boolean hasDir = false;
    for (FileStatus stat : fileStatuses) {
        if (stat.isDirectory() && !stat.getPath().getName().startsWith("_")) {
            hasDir = true;
            inputFolders.add(stat.getPath().toString());
        }
    }

    if (!hasDir) {
        return sc.sequenceFile(inputHDFSPath.toString(), keyClass, valueClass);
    }

    return sc.sequenceFile(StringUtil.join(inputFolders, ","), keyClass, valueClass);
}
 
示例18
public SparkExecutable createFactDistinctColumnsSparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));

    sparkExecutable.setClassName(SparkFactDistinct.class.getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_PATH.getOpt(), tablePath);
    sparkExecutable.setParam(SparkFactDistinct.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS + ":" + seg.toString());
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    return sparkExecutable;
}
 
示例19
public SparkExecutable createBuildUHCDictSparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());

    sparkExecutable.setClassName(SparkUHCDictionary.class.getName());
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_OUTPUT_PATH.getOpt(), getDictRootPath(jobId));
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId);
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_UHC_DICTIONARY);
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    return sparkExecutable;
}
 
示例20
public SparkExecutable createBuildDictionarySparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());

    sparkExecutable.setClassName(SparkBuildDictionary.class.getName());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_DICT_PATH.getOpt(), getDictRootPath(jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId);

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_DICTIONARY);
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());

    sparkExecutable.setJars(jars.toString());

    return sparkExecutable;
}
 
示例21
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    sparkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE + ":" + seg.toString());
}
 
示例22
@RequestMapping(value = "/{tables}/{project}", method = { RequestMethod.DELETE }, produces = { "application/json" })
@ResponseBody
public Map<String, String[]> unLoadHiveTables(@PathVariable String tables, @PathVariable String project) {
    Set<String> unLoadSuccess = Sets.newHashSet();
    Set<String> unLoadFail = Sets.newHashSet();
    Map<String, String[]> result = new HashMap<String, String[]>();
    try {
        for (String tableName : StringUtil.splitByComma(tables)) {
            tableACLService.deleteFromTableACLByTbl(project, tableName);
            if (tableService.unloadHiveTable(tableName, project)) {
                unLoadSuccess.add(tableName);
            } else {
                unLoadFail.add(tableName);
            }
        }
    } catch (Throwable e) {
        logger.error("Failed to unload Hive Table", e);
        throw new InternalErrorException(e.getLocalizedMessage(), e);
    }
    result.put("result.unload.success", (String[]) unLoadSuccess.toArray(new String[unLoadSuccess.size()]));
    result.put("result.unload.fail", (String[]) unLoadFail.toArray(new String[unLoadFail.size()]));
    return result;
}
 
示例23
/**
 * Regenerate table cardinality
 *
 * @return Table metadata array
 * @throws IOException
 */
@RequestMapping(value = "/{project}/{tableNames}/cardinality", method = { RequestMethod.PUT }, produces = {
        "application/json" })
@ResponseBody
public CardinalityRequest generateCardinality(@PathVariable String tableNames,
        @RequestBody CardinalityRequest request, @PathVariable String project) throws Exception {
    String submitter = SecurityContextHolder.getContext().getAuthentication().getName();
    String[] tables = StringUtil.splitByComma(tableNames);
    try {
        for (String table : tables) {
            tableService.calculateCardinality(table.trim().toUpperCase(Locale.ROOT), submitter, project);
        }
    } catch (IOException e) {
        logger.error("Failed to calculate cardinality", e);
        throw new InternalErrorException(e.getLocalizedMessage(), e);
    }
    return request;
}
 
示例24
protected void handleMetadataPersistException(ExecutableContext context, Throwable exception) {
    final String[] adminDls = context.getConfig().getAdminDls();
    if (adminDls == null || adminDls.length < 1) {
        logger.warn(NO_NEED_TO_SEND_EMAIL_USER_LIST_IS_EMPTY);
        return;
    }
    List<String> users = Lists.newArrayList(adminDls);

    Map<String, Object> dataMap = Maps.newHashMap();
    dataMap.put("job_name", getName());
    dataMap.put("env_name", context.getConfig().getDeployEnv());
    dataMap.put(SUBMITTER, StringUtil.noBlank(getSubmitter(), "missing submitter"));
    dataMap.put("job_engine", MailNotificationUtil.getLocalHostName());
    dataMap.put("error_log",
            Matcher.quoteReplacement(StringUtil.noBlank(exception.getMessage(), "no error message")));

    String content = MailNotificationUtil.getMailContent(MailNotificationUtil.METADATA_PERSIST_FAIL, dataMap);
    String title = MailNotificationUtil.getMailTitle("METADATA PERSIST", "FAIL",
            context.getConfig().getDeployEnv());

    new MailService(context.getConfig()).sendMail(users, title, content);
}
 
示例25
public static DataSet parseInputPath(String inputPath, FileSystem fs, ExecutionEnvironment env, Class keyClass,
        Class valueClass) throws IOException {
    List<String> inputFolders = Lists.newArrayList();
    Path inputHDFSPath = new Path(inputPath);
    FileStatus[] fileStatuses = fs.listStatus(inputHDFSPath);
    boolean hasDir = false;
    for (FileStatus stat : fileStatuses) {
        if (stat.isDirectory() && !stat.getPath().getName().startsWith("_")) {
            hasDir = true;
            inputFolders.add(stat.getPath().toString());
        }
    }

    if (!hasDir) {
        return env.createInput(HadoopInputs.readSequenceFile(keyClass, valueClass, inputHDFSPath.toString()));
    }

    Job job = Job.getInstance();
    FileInputFormat.setInputPaths(job, StringUtil.join(inputFolders, ","));
    return env.createInput(HadoopInputs.createHadoopInput(new SequenceFileInputFormat(), keyClass, valueClass, job));
}
 
示例26
public FlinkExecutable createFactDistinctColumnsFlinkStep(String jobId) {
    final FlinkExecutable flinkExecutable = new FlinkExecutable();
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));

    flinkExecutable.setClassName(FlinkFactDistinctColumns.class.getName());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_INPUT_PATH.getOpt(), tablePath);
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));

    flinkExecutable.setJobId(jobId);
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS);
    flinkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());

    flinkExecutable.setJars(jars.toString());

    return flinkExecutable;
}
 
示例27
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    flinkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE);
}
 
示例28
public FlinkExecutable createMergeCuboidDataFlinkStep(CubeSegment seg, List<CubeSegment> mergingSegments, String jobID) {
    final List<String> mergingCuboidPaths = Lists.newArrayList();
    for (CubeSegment merging : mergingSegments) {
        mergingCuboidPaths.add(getCuboidRootPath(merging));
    }
    String formattedPath = StringUtil.join(mergingCuboidPaths, ",");
    String outputPath = getCuboidRootPath(jobID);

    final FlinkExecutable flinkExecutable = new FlinkExecutable();
    flinkExecutable.setClassName(FlinkCubingMerge.class.getName());
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_INPUT_PATH.getOpt(), formattedPath);
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_OUTPUT_PATH.getOpt(), outputPath);

    flinkExecutable.setJobId(jobID);
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_CUBOID);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());

    return flinkExecutable;
}
 
示例29
public void check(List<String> segFullNameList) {
    issueExistHTables = Lists.newArrayList();
    inconsistentHTables = Lists.newArrayList();

    for (String segFullName : segFullNameList) {
        String[] sepNameList = StringUtil.splitByComma(segFullName);
        try {
            HTableDescriptor hTableDescriptor = hbaseAdmin.getTableDescriptor(TableName.valueOf(sepNameList[0]));
            String host = hTableDescriptor.getValue(IRealizationConstants.HTableTag);
            if (!dstCfg.getMetadataUrlPrefix().equalsIgnoreCase(host)) {
                inconsistentHTables.add(segFullName);
            }
        } catch (IOException e) {
            issueExistHTables.add(segFullName);
            continue;
        }
    }
}
 
示例30
void init(DataModelDesc model) {
    table = table.toUpperCase(Locale.ROOT);
    if (columns != null) {
        StringUtil.toUpperCaseArray(columns, columns);
    }

    if (model != null) {
        table = model.findTable(table).getAlias();
        if (columns != null) {
            for (int i = 0; i < columns.length; i++) {
                TblColRef column = model.findColumn(table, columns[i]);

                if (column.getColumnDesc().isComputedColumn() && !model.isFactTable(column.getTableRef())) {
                    throw new RuntimeException("Computed Column on lookup table is not allowed");
                }

                columns[i] = column.getName();
            }
        }
    }
}