Java源码示例:org.apache.hadoop.hive.ql.hooks.ReadEntity

示例1
@Test
public void testCTAS() throws Exception {
    String tableName     = createTable();
    String ctasTableName = "table" + random();
    String query         = "create table " + ctasTableName + " as select * from " + tableName;

    runCommand(query);

    final Set<ReadEntity> readEntities = getInputs(tableName, Entity.Type.TABLE);
    final Set<WriteEntity> writeEntities = getOutputs(ctasTableName, Entity.Type.TABLE);

    HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.CREATETABLE_AS_SELECT, readEntities,
            writeEntities);
    AtlasEntity processEntity1 = validateProcess(hiveEventContext);
    AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, hiveEventContext);
    AtlasObjectId process = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
            BaseHiveEvent.ATTRIBUTE_PROCESS));
    Assert.assertEquals(process.getGuid(), processEntity1.getGuid());

    Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
    assertTableIsRegistered(DEFAULT_DB, ctasTableName);
}
 
示例2
private void assertProcessIsNotRegistered(HiveEventContext event) throws Exception {
    try {
        SortedSet<ReadEntity>  sortedHiveInputs  = event.getInputs() == null ? null : new TreeSet<ReadEntity>(entityComparator);
        SortedSet<WriteEntity> sortedHiveOutputs = event.getOutputs() == null ? null : new TreeSet<WriteEntity>(entityComparator);

        if (event.getInputs() != null) {
            sortedHiveInputs.addAll(event.getInputs());
        }

        if (event.getOutputs() != null) {
            sortedHiveOutputs.addAll(event.getOutputs());
        }

        String processQFName = getProcessQualifiedName(hiveMetaStoreBridge, event, sortedHiveInputs, sortedHiveOutputs, getSortedProcessDataSets(event.getInputs()), getSortedProcessDataSets(event.getOutputs()));

        LOG.debug("Searching for process with query {}", processQFName);

        assertEntityIsNotRegistered(HiveDataTypes.HIVE_PROCESS.getName(), ATTRIBUTE_QUALIFIED_NAME, processQFName);
    } catch(Exception e) {
        LOG.error("Exception : ", e);
    }
}
 
示例3
private static void addInputs(HiveMetaStoreBridge hiveBridge, HiveOperation op, SortedSet<ReadEntity> sortedInputs, StringBuilder buffer, final Map<ReadEntity, Referenceable> refs, final boolean ignoreHDFSPathsInQFName) throws HiveException {
    if (refs != null) {
        if (sortedInputs != null) {
            Set<String> dataSetsProcessed = new LinkedHashSet<>();
            for (Entity input : sortedInputs) {

                if (!dataSetsProcessed.contains(input.getName().toLowerCase())) {
                    //HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations
                    if (ignoreHDFSPathsInQFName &&
                        (Type.DFS_DIR.equals(input.getType()) || Type.LOCAL_DIR.equals(input.getType()))) {
                        LOG.debug("Skipping dfs dir input addition to process qualified name {} ", input.getName());
                    } else if (refs.containsKey(input)) {
                        if ( input.getType() == Type.PARTITION || input.getType() == Type.TABLE) {
                            final Date createTime = HiveMetaStoreBridge.getTableCreatedTime(hiveBridge.hiveClient.getTable(input.getTable().getDbName(), input.getTable().getTableName()));
                            addDataset(buffer, refs.get(input), createTime.getTime());
                        } else {
                            addDataset(buffer, refs.get(input));
                        }
                    }
                    dataSetsProcessed.add(input.getName().toLowerCase());
                }
            }

        }
    }
}
 
示例4
private Set<ReadEntity> getInputs(String inputName, Entity.Type entityType) throws HiveException {
    final ReadEntity entity = new ReadEntity();

    if ( Entity.Type.DFS_DIR.equals(entityType)) {
        entity.setName(lower(new Path(inputName).toString()));
        entity.setTyp(Entity.Type.DFS_DIR);
    } else {
        entity.setName(getQualifiedTblName(inputName));
        entity.setTyp(entityType);
    }

    if (entityType == Entity.Type.TABLE) {
        entity.setT(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, inputName));
    }

    return new LinkedHashSet<ReadEntity>() {{ add(entity); }};
}
 
示例5
private Referenceable validateProcess(HiveHook.HiveEventContext event, Set<ReadEntity> inputTables, Set<WriteEntity> outputTables) throws Exception {
    String processId = assertProcessIsRegistered(event, inputTables, outputTables);
    Referenceable process = atlasClient.getEntity(processId);
    if (inputTables == null) {
        Assert.assertNull(process.get(INPUTS));
    } else {
        Assert.assertEquals(((List<Referenceable>) process.get(INPUTS)).size(), inputTables.size());
        validateInputTables(process, inputTables);
    }

    if (outputTables == null) {
        Assert.assertNull(process.get(OUTPUTS));
    } else {
        Assert.assertEquals(((List<Id>) process.get(OUTPUTS)).size(), outputTables.size());
        validateOutputTables(process, outputTables);
    }

    return process;
}
 
示例6
@Test(enabled = false)
public void testInsertIntoTempTable() throws Exception {
    String tableName = createTable();
    String insertTableName = createTable(false, false, true);
    assertTableIsRegistered(DEFAULT_DB, tableName);
    assertTableIsNotRegistered(DEFAULT_DB, insertTableName, true);

    String query =
        "insert into " + insertTableName + " select id, name from " + tableName;

    runCommand(query);

    Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
    Set<WriteEntity> outputs = getOutputs(insertTableName, Entity.Type.TABLE);
    outputs.iterator().next().setName(getQualifiedTblName(insertTableName + HiveMetaStoreBridge.TEMP_TABLE_PREFIX + SessionState.get().getSessionId()));
    outputs.iterator().next().setWriteType(WriteEntity.WriteType.INSERT);

    validateProcess(constructEvent(query,  HiveOperation.QUERY, inputs, outputs));

    assertTableIsRegistered(DEFAULT_DB, tableName);
    assertTableIsRegistered(DEFAULT_DB, insertTableName, null, true);
}
 
示例7
private String assertProcessIsRegistered(final HiveHook.HiveEventContext event, final Set<ReadEntity> inputTbls, final Set<WriteEntity> outputTbls) throws Exception {
    try {
        SortedSet<ReadEntity> sortedHiveInputs = event.getInputs() == null ? null : new TreeSet<ReadEntity>(entityComparator);
        SortedSet<WriteEntity> sortedHiveOutputs = event.getOutputs() == null ? null : new TreeSet<WriteEntity>(entityComparator);
        if ( event.getInputs() != null) {
            sortedHiveInputs.addAll(event.getInputs());
        }
        if ( event.getOutputs() != null) {
            sortedHiveOutputs.addAll(event.getOutputs());
        }
        String processQFName = getProcessQualifiedName(hiveMetaStoreBridge, event, sortedHiveInputs, sortedHiveOutputs, getSortedProcessDataSets(inputTbls), getSortedProcessDataSets(outputTbls));
        LOG.debug("Searching for process with query {}", processQFName);
        return assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, processQFName, new AssertPredicate() {
            @Override
            public void assertOnEntity(final Referenceable entity) throws Exception {
                List<String> recentQueries = (List<String>) entity.get("recentQueries");
                Assert.assertEquals(recentQueries.get(0), lower(event.getQueryStr()));
            }
        });
    } catch(Exception e) {
        LOG.error("Exception : ", e);
        throw e;
    }
}
 
示例8
private void assertProcessIsNotRegistered(HiveHook.HiveEventContext event) throws Exception {
    try {
        SortedSet<ReadEntity> sortedHiveInputs = event.getInputs() == null ? null : new TreeSet<ReadEntity>(entityComparator);
        SortedSet<WriteEntity> sortedHiveOutputs = event.getOutputs() == null ? null : new TreeSet<WriteEntity>(entityComparator);
        if ( event.getInputs() != null) {
            sortedHiveInputs.addAll(event.getInputs());
        }
        if ( event.getOutputs() != null) {
            sortedHiveOutputs.addAll(event.getOutputs());
        }
        String processQFName = getProcessQualifiedName(hiveMetaStoreBridge, event, sortedHiveInputs, sortedHiveOutputs, getSortedProcessDataSets(event.getInputs()), getSortedProcessDataSets(event.getOutputs()));
        LOG.debug("Searching for process with query {}", processQFName);
        assertEntityIsNotRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, processQFName);
    } catch( Exception e) {
        LOG.error("Exception : ", e);
    }
}
 
示例9
public SentryOnFailureHookContextImpl(String command,
    Set<ReadEntity> inputs, Set<WriteEntity> outputs, HiveOperation hiveOp,
    Database db, Table tab, AccessURI udfURI, AccessURI partitionURI,
    String userName, String ipAddress, AuthorizationException e,
    Configuration conf) {
  this.command = command;
  this.inputs = inputs;
  this.outputs = outputs;
  this.hiveOp = hiveOp;
  this.userName = userName;
  this.ipAddress = ipAddress;
  this.database = db;
  this.table = tab;
  this.udfURI = udfURI;
  this.partitionURI = partitionURI;
  this.authException = e;
  this.conf = conf;
}
 
示例10
@Override
public Task<? extends Serializable> createShowRoleGrantTask(ASTNode ast, Path resultFile,
    HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs) throws SemanticException {
  ASTNode child = (ASTNode) ast.getChild(0);
  PrincipalType principalType = PrincipalType.USER;
  switch (child.getType()) {
  case HiveParser.TOK_USER:
    principalType = PrincipalType.USER;
    break;
  case HiveParser.TOK_GROUP:
    principalType = PrincipalType.GROUP;
    break;
  case HiveParser.TOK_ROLE:
    principalType = PrincipalType.ROLE;
    break;
  }
  if (principalType != PrincipalType.GROUP) {
    String msg = SentryHiveConstants.GRANT_REVOKE_NOT_SUPPORTED_FOR_PRINCIPAL + principalType;
    throw new SemanticException(msg);
  }
  String principalName = BaseSemanticAnalyzer.unescapeIdentifier(child.getChild(0).getText());
  RoleDDLDesc roleDesc = new RoleDDLDesc(principalName, principalType,
      RoleDDLDesc.RoleOperation.SHOW_ROLE_GRANT, null);
  roleDesc.setResFile(resultFile.toString());
  return createTask(new DDLWork(inputs, outputs,  roleDesc));
}
 
示例11
@Override
public Task<? extends Serializable> createRevokeTask(ASTNode ast, HashSet<ReadEntity> inputs,
    HashSet<WriteEntity> outputs) throws SemanticException {
  List<PrivilegeDesc> privilegeDesc = analyzePrivilegeListDef((ASTNode) ast.getChild(0));
  List<PrincipalDesc> principalDesc = analyzePrincipalListDef((ASTNode) ast.getChild(1));
  PrivilegeObjectDesc privilegeObj = null;
  if (ast.getChildCount() > 2) {
    ASTNode astChild = (ASTNode) ast.getChild(2);
    privilegeObj = analyzePrivilegeObject(astChild);
  }
  if (privilegeObj != null && privilegeObj.getPartSpec() != null) {
    throw new SemanticException(SentryHiveConstants.PARTITION_PRIVS_NOT_SUPPORTED);
  }
  for (PrincipalDesc princ : principalDesc) {
    if (princ.getType() != PrincipalType.ROLE) {
      String msg = SentryHiveConstants.GRANT_REVOKE_NOT_SUPPORTED_FOR_PRINCIPAL + princ.getType();
      throw new SemanticException(msg);
    }
  }
  RevokeDesc revokeDesc = new RevokeDesc(privilegeDesc, principalDesc, privilegeObj);
  return createTask(new DDLWork(inputs, outputs, revokeDesc));
}
 
示例12
private Task<? extends Serializable> analyzeGrantRevokeRole(boolean isGrant, ASTNode ast,
    HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs) throws SemanticException {
  List<PrincipalDesc> principalDesc = analyzePrincipalListDef(
      (ASTNode) ast.getChild(0));

  List<String> roles = new ArrayList<String>();
  for (int i = 1; i < ast.getChildCount(); i++) {
    roles.add(BaseSemanticAnalyzer.unescapeIdentifier(ast.getChild(i).getText()));
  }
  String roleOwnerName = "";
  if (SessionState.get() != null
      && SessionState.get().getAuthenticator() != null) {
    roleOwnerName = SessionState.get().getAuthenticator().getUserName();
  }
  for (PrincipalDesc princ : principalDesc) {
    if (princ.getType() != PrincipalType.GROUP) {
      String msg = SentryHiveConstants.GRANT_REVOKE_NOT_SUPPORTED_ON_OBJECT + princ.getType();
      throw new SemanticException(msg);
    }
  }
  GrantRevokeRoleDDL grantRevokeRoleDDL = new GrantRevokeRoleDDL(isGrant,
      roles, principalDesc, roleOwnerName, PrincipalType.USER, false);
  return createTask(new DDLWork(inputs, outputs, grantRevokeRoleDDL));
}
 
示例13
@Override
public Task<? extends Serializable> createShowRolePrincipalsTask(ASTNode ast, Path resFile,
    HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs) throws SemanticException {
  String roleName;

  if (ast.getChildCount() == 1) {
    roleName = ast.getChild(0).getText();
  } else {
    // the parser should not allow this
    throw new AssertionError("Unexpected Tokens in SHOW ROLE PRINCIPALS");
  }

  RoleDDLDesc roleDDLDesc = new RoleDDLDesc(roleName, PrincipalType.ROLE,
   RoleDDLDesc.RoleOperation.SHOW_ROLE_PRINCIPALS, null);
  roleDDLDesc.setResFile(resFile.toString());
  return createTask(new DDLWork(inputs, outputs, roleDDLDesc));
  //return TaskFactory.get(new DDLWork(inputs, outputs, roleDDLDesc), conf);
}
 
示例14
/**
 * Add column level hierarchy to inputHierarchy
 *
 * @param inputHierarchy
 * @param entity
 * @param sentryContext
 */
private void addColumnHierarchy(List<List<DBModelAuthorizable>> inputHierarchy,
    ReadEntity entity) {
  List<DBModelAuthorizable> entityHierarchy = new ArrayList<DBModelAuthorizable>();
  entityHierarchy.add(hiveAuthzBinding.getAuthServer());
  entityHierarchy.addAll(getAuthzHierarchyFromEntity(entity));

  switch (entity.getType()) {
  case TABLE:
  case PARTITION:
    List<String> cols = entity.getAccessedColumns();
    for (String col : cols) {
      List<DBModelAuthorizable> colHierarchy = new ArrayList<DBModelAuthorizable>(entityHierarchy);
      colHierarchy.add(new Column(col));
      inputHierarchy.add(colHierarchy);
    }
    break;
  default:
    inputHierarchy.add(entityHierarchy);
  }
}
 
示例15
/**
 * Get Authorizable from inputs and put into inputHierarchy
 *
 * @param inputHierarchy
 * @param entity
 * @param sentryContext
 */
private void getInputHierarchyFromInputs(List<List<DBModelAuthorizable>> inputHierarchy,
    Set<ReadEntity> inputs) {
  for (ReadEntity readEntity: inputs) {
    // skip the tables/view that are part of expanded view definition
    // skip the Hive generated dummy entities created for queries like 'select <expr>'
    if (isChildTabForView(readEntity) || isDummyEntity(readEntity)) {
      continue;
    }
    if (readEntity.getAccessedColumns() != null && !readEntity.getAccessedColumns().isEmpty()) {
      addColumnHierarchy(inputHierarchy, readEntity);
    } else {
      List<DBModelAuthorizable> entityHierarchy = new ArrayList<DBModelAuthorizable>();
      entityHierarchy.add(hiveAuthzBinding.getAuthServer());
      entityHierarchy.addAll(getAuthzHierarchyFromEntity(readEntity));
      inputHierarchy.add(entityHierarchy);
    }
  }
}
 
示例16
/**
 * Check if the given read entity is a table that has parents of type Table
 * Hive compiler performs a query rewrite by replacing view with its definition. In the process, tt captures both
 * the original view and the tables/view that it selects from .
 * The access authorization is only interested in the top level views and not the underlying tables.
 * @param readEntity
 * @return
 */
private boolean isChildTabForView(ReadEntity readEntity) {
  // If this is a table added for view, then we need to skip that
  if (!readEntity.getType().equals(Type.TABLE) && !readEntity.getType().equals(Type.PARTITION)) {
    return false;
  }
  if (readEntity.getParents() != null && readEntity.getParents().size() > 0) {
    for (ReadEntity parentEntity : readEntity.getParents()) {
      if (!parentEntity.getType().equals(Type.TABLE)) {
        return false;
      }
    }
    return true;
  } else {
    return false;
  }
}
 
示例17
private boolean skipProcess() {
    Set<ReadEntity>  inputs  = getInputs();
    Set<WriteEntity> outputs = getOutputs();

    boolean ret = CollectionUtils.isEmpty(inputs) && CollectionUtils.isEmpty(outputs);

    if (!ret) {
        if (getContext().getHiveOperation() == HiveOperation.QUERY) {
            // Select query has only one output
            if (outputs.size() == 1) {
                WriteEntity output = outputs.iterator().next();

                if (output.getType() == Entity.Type.DFS_DIR || output.getType() == Entity.Type.LOCAL_DIR) {
                    if (output.getWriteType() == WriteEntity.WriteType.PATH_WRITE && output.isTempURI()) {
                        ret = true;
                    }
                }
                // DELETE and UPDATE initially have one input and one output.
                // Since they do not support sub-query, they won't create a lineage that have one input and one output. (One input only)
                // It's safe to filter them out here.
                if (output.getWriteType() == WriteEntity.WriteType.DELETE || output.getWriteType() == WriteEntity.WriteType.UPDATE) {
                    ret = true;
                }
            }
        }
    }

    return ret;
}
 
示例18
private Set<ReadEntity> getInputs(String inputName, Entity.Type entityType) throws HiveException {
    final ReadEntity entity;

    if (Entity.Type.DFS_DIR.equals(entityType)) {
        entity = new TestReadEntity(lower(new Path(inputName).toString()), entityType);
    } else {
        entity = new TestReadEntity(getQualifiedTblName(inputName), entityType);
    }

    if (entityType == Entity.Type.TABLE) {
        entity.setT(hiveMetaStoreBridge.getHiveClient().getTable(DEFAULT_DB, inputName));
    }

    return new LinkedHashSet<ReadEntity>() {{ add(entity); }};
}
 
示例19
private HiveEventContext constructEvent(String query, HiveOperation op, Set<ReadEntity> inputs, Set<WriteEntity> outputs) {
    HiveEventContext event = new HiveEventContext();

    event.setQueryStr(query);
    event.setOperation(op);
    event.setInputs(inputs);
    event.setOutputs(outputs);

    return event;
}
 
示例20
private AtlasEntity validateProcess(HiveEventContext event, Set<ReadEntity> inputTables, Set<WriteEntity> outputTables) throws Exception {
    String      processId     = assertProcessIsRegistered(event, inputTables, outputTables);
    AtlasEntity processEntity = atlasClientV2.getEntityByGuid(processId).getEntity();

    validateInputTables(processEntity, inputTables);
    validateOutputTables(processEntity, outputTables);

    return processEntity;
}
 
示例21
@Test(enabled = false)
public void testInsertIntoTempTable() throws Exception {
    String tableName       = createTable();
    String insertTableName = createTable(false, false, true);

    assertTableIsRegistered(DEFAULT_DB, tableName);
    assertTableIsNotRegistered(DEFAULT_DB, insertTableName, true);

    String query = "insert into " + insertTableName + " select id, name from " + tableName;

    runCommand(query);

    Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
    Set<WriteEntity> outputs = getOutputs(insertTableName, Entity.Type.TABLE);

    outputs.iterator().next().setWriteType(WriteEntity.WriteType.INSERT);

    HiveEventContext event = constructEvent(query,  HiveOperation.QUERY, inputs, outputs);
    AtlasEntity hiveProcess = validateProcess(event);
    AtlasEntity hiveProcessExecution = validateProcessExecution(hiveProcess, event);
    AtlasObjectId process = toAtlasObjectId(hiveProcessExecution.getRelationshipAttribute(
            BaseHiveEvent.ATTRIBUTE_PROCESS));
    Assert.assertEquals(process.getGuid(), hiveProcess.getGuid());
    Assert.assertEquals(numberOfProcessExecutions(hiveProcess), 1);

    assertTableIsRegistered(DEFAULT_DB, tableName);
    assertTableIsRegistered(DEFAULT_DB, insertTableName, null, true);
}
 
示例22
private String sortEventsAndGetProcessQualifiedName(final HiveEventContext event) throws HiveException{
    SortedSet<ReadEntity>  sortedHiveInputs  = event.getInputs() == null ? null : new TreeSet<ReadEntity>(entityComparator);
    SortedSet<WriteEntity> sortedHiveOutputs = event.getOutputs() == null ? null : new TreeSet<WriteEntity>(entityComparator);

    if (event.getInputs() != null) {
        sortedHiveInputs.addAll(event.getInputs());
    }

    if (event.getOutputs() != null) {
        sortedHiveOutputs.addAll(event.getOutputs());
    }

    return getProcessQualifiedName(hiveMetaStoreBridge, event, sortedHiveInputs, sortedHiveOutputs, getSortedProcessDataSets(event.getInputs()), getSortedProcessDataSets(event.getOutputs()));
}
 
示例23
private String assertProcessIsRegistered(final HiveEventContext event, final Set<ReadEntity> inputTbls, final Set<WriteEntity> outputTbls) throws Exception {
    try {
        SortedSet<ReadEntity>  sortedHiveInputs  = event.getInputs() == null ? null : new TreeSet<ReadEntity>(entityComparator);
        SortedSet<WriteEntity> sortedHiveOutputs = event.getOutputs() == null ? null : new TreeSet<WriteEntity>(entityComparator);

        if (event.getInputs() != null) {
            sortedHiveInputs.addAll(event.getInputs());
        }

        if (event.getOutputs() != null) {
            sortedHiveOutputs.addAll(event.getOutputs());
        }

        String processQFName = getProcessQualifiedName(hiveMetaStoreBridge, event, sortedHiveInputs, sortedHiveOutputs, getSortedProcessDataSets(inputTbls), getSortedProcessDataSets(outputTbls));

        LOG.debug("Searching for process with query {}", processQFName);

        return assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), ATTRIBUTE_QUALIFIED_NAME, processQFName, new AssertPredicate() {
            @Override
            public void assertOnEntity(final AtlasEntity entity) throws Exception {
                List<String> recentQueries = (List<String>) entity.getAttribute(BaseHiveEvent.ATTRIBUTE_RECENT_QUERIES);

                Assert.assertEquals(recentQueries.get(0), lower(event.getQueryStr()));
            }
        });
    } catch(Exception e) {
        LOG.error("Exception : ", e);
        throw e;
    }
}
 
示例24
@VisibleForTesting
protected static String getProcessQualifiedName(HiveMetaStoreBridge dgiBridge, HiveEventContext eventContext,
                                      final SortedSet<ReadEntity> sortedHiveInputs,
                                      final SortedSet<WriteEntity> sortedHiveOutputs,
                                      SortedMap<ReadEntity, AtlasEntity> hiveInputsMap,
                                      SortedMap<WriteEntity, AtlasEntity> hiveOutputsMap) throws HiveException {
    HiveOperation op = eventContext.getOperation();
    if (isCreateOp(eventContext)) {
        Entity entity = getEntityByType(sortedHiveOutputs, Entity.Type.TABLE);

        if (entity != null) {
            Table outTable = entity.getTable();
            //refresh table
            outTable = dgiBridge.getHiveClient().getTable(outTable.getDbName(), outTable.getTableName());
            return HiveMetaStoreBridge.getTableProcessQualifiedName(dgiBridge.getMetadataNamespace(), outTable);
        }
    }

    StringBuilder buffer = new StringBuilder(op.getOperationName());

    boolean ignoreHDFSPathsinQFName = ignoreHDFSPathsinQFName(op, sortedHiveInputs, sortedHiveOutputs);
    if ( ignoreHDFSPathsinQFName && LOG.isDebugEnabled()) {
        LOG.debug("Ignoring HDFS paths in qualifiedName for {} {} ", op, eventContext.getQueryStr());
    }

    addInputs(dgiBridge, op, sortedHiveInputs, buffer, hiveInputsMap, ignoreHDFSPathsinQFName);
    buffer.append(IO_SEP);
    addOutputs(dgiBridge, op, sortedHiveOutputs, buffer, hiveOutputsMap, ignoreHDFSPathsinQFName);
    LOG.info("Setting process qualified name to {}", buffer);
    return buffer.toString();
}
 
示例25
protected static boolean ignoreHDFSPathsinQFName(final HiveOperation op, final Set<ReadEntity> inputs, final Set<WriteEntity> outputs) {
    switch (op) {
        case LOAD:
        case IMPORT:
            return isPartitionBasedQuery(outputs);
        case EXPORT:
            return isPartitionBasedQuery(inputs);
        case QUERY:
            return true;
    }
    return false;
}
 
示例26
protected static void addInputs(HiveMetaStoreBridge hiveBridge, HiveOperation op, SortedSet<ReadEntity> sortedInputs, StringBuilder buffer, final Map<ReadEntity, AtlasEntity> refs, final boolean ignoreHDFSPathsInQFName) throws HiveException {
    if (refs != null) {
        if (sortedInputs != null) {
            Set<String> dataSetsProcessed = new LinkedHashSet<>();
            for (Entity input : sortedInputs) {

                if (!dataSetsProcessed.contains(input.getName().toLowerCase())) {
                    //HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations
                    if (ignoreHDFSPathsInQFName &&
                            (Entity.Type.DFS_DIR.equals(input.getType()) || Entity.Type.LOCAL_DIR.equals(input.getType()))) {
                        LOG.debug("Skipping dfs dir input addition to process qualified name {} ", input.getName());
                    } else if (refs.containsKey(input)) {
                        if ( input.getType() == Entity.Type.PARTITION || input.getType() == Entity.Type.TABLE) {
                            Table inputTable = refreshTable(hiveBridge, input.getTable().getDbName(), input.getTable().getTableName());

                            if (inputTable != null) {
                                addDataset(buffer, refs.get(input), HiveMetaStoreBridge.getTableCreatedTime(inputTable));
                            }
                        } else {
                            addDataset(buffer, refs.get(input));
                        }
                    }

                    dataSetsProcessed.add(input.getName().toLowerCase());
                }
            }

        }
    }
}
 
示例27
private void handleExternalTables(final HiveMetaStoreBridge dgiBridge, final HiveEventContext event, final LinkedHashMap<Type, Referenceable> tables) throws HiveException, MalformedURLException {
    List<Referenceable> entities = new ArrayList<>();
    final WriteEntity hiveEntity = (WriteEntity) getEntityByType(event.getOutputs(), Type.TABLE);

    Table hiveTable = hiveEntity == null ? null : hiveEntity.getTable();

    //Refresh to get the correct location
    if(hiveTable != null) {
        hiveTable = dgiBridge.hiveClient.getTable(hiveTable.getDbName(), hiveTable.getTableName());
    }

    if (hiveTable != null && TableType.EXTERNAL_TABLE.equals(hiveTable.getTableType())) {
        LOG.info("Registering external table process {} ", event.getQueryStr());
        final String location = lower(hiveTable.getDataLocation().toString());
        final ReadEntity dfsEntity = new ReadEntity();
        dfsEntity.setTyp(Type.DFS_DIR);
        dfsEntity.setD(new Path(location));

        SortedMap<ReadEntity, Referenceable> hiveInputsMap = new TreeMap<ReadEntity, Referenceable>(entityComparator) {{
            put(dfsEntity, dgiBridge.fillHDFSDataSet(location));
        }};

        SortedMap<WriteEntity, Referenceable> hiveOutputsMap = new TreeMap<WriteEntity, Referenceable>(entityComparator) {{
            put(hiveEntity, tables.get(Type.TABLE));
        }};

        SortedSet<ReadEntity> sortedIps = new TreeSet<>(entityComparator);
        sortedIps.addAll(hiveInputsMap.keySet());
        SortedSet<WriteEntity> sortedOps = new TreeSet<>(entityComparator);
        sortedOps.addAll(hiveOutputsMap.keySet());

        Referenceable processReferenceable = getProcessReferenceable(dgiBridge, event,
            sortedIps, sortedOps, hiveInputsMap, hiveOutputsMap);

        entities.addAll(tables.values());
        entities.add(processReferenceable);
        event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities));
    }
}
 
示例28
private Referenceable getProcessReferenceable(HiveMetaStoreBridge dgiBridge, HiveEventContext hiveEvent,
    final SortedSet<ReadEntity> sortedHiveInputs, final SortedSet<WriteEntity> sortedHiveOutputs, SortedMap<ReadEntity, Referenceable> source, SortedMap<WriteEntity, Referenceable> target)
        throws HiveException {
    Referenceable processReferenceable = new Referenceable(HiveDataTypes.HIVE_PROCESS.getName());

    String queryStr = lower(hiveEvent.getQueryStr());
    processReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
            getProcessQualifiedName(dgiBridge, hiveEvent, sortedHiveInputs, sortedHiveOutputs, source, target));

    LOG.debug("Registering query: {}", queryStr);
    List<Referenceable> sourceList = new ArrayList<>(source.values());
    List<Referenceable> targetList = new ArrayList<>(target.values());

    //The serialization code expected a list
    if (sourceList != null && !sourceList.isEmpty()) {
        processReferenceable.set("inputs", sourceList);
    }
    if (targetList != null && !targetList.isEmpty()) {
        processReferenceable.set("outputs", targetList);
    }
    processReferenceable.set(AtlasClient.NAME, queryStr);

    processReferenceable.set("operationType", hiveEvent.getOperation().getOperationName());
    processReferenceable.set("startTime", new Date(hiveEvent.getQueryStartTime()));
    processReferenceable.set("userName", hiveEvent.getUser());
    processReferenceable.set("queryText", queryStr);
    processReferenceable.set("queryId", hiveEvent.getQueryId());
    processReferenceable.set("queryPlan", "Not Supported");
    processReferenceable.set(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, dgiBridge.getClusterName());

    List<String> recentQueries = new ArrayList<>(1);
    recentQueries.add(queryStr);
    processReferenceable.set("recentQueries", recentQueries);

    processReferenceable.set("endTime", new Date(System.currentTimeMillis()));
    //TODO set queryGraph
    return processReferenceable;
}
 
示例29
@VisibleForTesting
static String getProcessQualifiedName(HiveMetaStoreBridge dgiBridge, HiveEventContext eventContext,
                                      final SortedSet<ReadEntity> sortedHiveInputs,
                                      final SortedSet<WriteEntity> sortedHiveOutputs,
                                      SortedMap<ReadEntity, Referenceable> hiveInputsMap,
                                      SortedMap<WriteEntity, Referenceable> hiveOutputsMap) throws HiveException {
    HiveOperation op = eventContext.getOperation();
    if (isCreateOp(eventContext)) {
        Entity entity = getEntityByType(sortedHiveOutputs, Type.TABLE);

        if (entity != null) {
            Table outTable = entity.getTable();
            //refresh table
            outTable = dgiBridge.hiveClient.getTable(outTable.getDbName(), outTable.getTableName());
            return HiveMetaStoreBridge.getTableProcessQualifiedName(dgiBridge.getClusterName(), outTable);
        }
    }

    StringBuilder buffer = new StringBuilder(op.getOperationName());

    boolean ignoreHDFSPathsinQFName = ignoreHDFSPathsinQFName(op, sortedHiveInputs, sortedHiveOutputs);
    if ( ignoreHDFSPathsinQFName && LOG.isDebugEnabled()) {
        LOG.debug("Ignoring HDFS paths in qualifiedName for {} {} ", op, eventContext.getQueryStr());
    }

    addInputs(dgiBridge, op, sortedHiveInputs, buffer, hiveInputsMap, ignoreHDFSPathsinQFName);
    buffer.append(IO_SEP);
    addOutputs(dgiBridge, op, sortedHiveOutputs, buffer, hiveOutputsMap, ignoreHDFSPathsinQFName);
    LOG.info("Setting process qualified name to {}", buffer);
    return buffer.toString();
}
 
示例30
private static boolean ignoreHDFSPathsinQFName(final HiveOperation op, final Set<ReadEntity> inputs, final Set<WriteEntity> outputs) {
    switch (op) {
    case LOAD:
    case IMPORT:
        return isPartitionBasedQuery(outputs);
    case EXPORT:
        return isPartitionBasedQuery(inputs);
    case QUERY:
        return true;
    }
    return false;
}