Java源码示例:htsjdk.variant.variantcontext.VariantContextBuilder

示例1
@Test
public void filterOutHetSinglton2() {
 VariantContextBuilder b = new VariantContextBuilder();
 Allele a1 = Allele.create("A", true);
 Allele a2 = Allele.create("T", false);

 final List<Allele> allelesHet = new ArrayList<>(Arrays.asList(a1,a2));
 final List<Allele> allelesRef = new ArrayList<>(Arrays.asList(a1,a1));
 final List<Allele> allelesAlt = new ArrayList<>(Arrays.asList(a2,a2));

 // this does not have a het singleton.
 Collection<Genotype> genotypes = new ArrayList<>();
 genotypes.add(GenotypeBuilder.create("donor1", allelesRef));
 genotypes.add(GenotypeBuilder.create("donor2", allelesHet));
 genotypes.add(GenotypeBuilder.create("donor3", allelesHet));

 VariantContext vc = b.alleles(allelesHet).start(1).stop(1).chr("1").genotypes(genotypes).make();
 Assert.assertNotNull(vc);

 Iterator<VariantContext> underlyingIterator = Collections.emptyIterator();

 VariantContextSingletonFilter f = new VariantContextSingletonFilter(underlyingIterator, true);
 boolean t1 = f.filterOut(vc);
 Assert.assertTrue(t1);

}
 
示例2
/**
 * Filters out variants by testing against provided
 * filter key, threshold.
 *
 * Variants with value below specified threshold (or null value)
 * are filtered out citing given reason.
 *
 * @throws ClassCastException if the value corresponding to provided key cannot be casted as a {@link Double}
 */
private static VariantContext applyFilters(final VariantContext variantContext,
                                           final List<StructuralVariantFilter> filters) {

    final Set<String> appliedFilters = new HashSet<>();
    for (final StructuralVariantFilter filter : filters) {
        if ( !filter.test(variantContext) )
            appliedFilters.add(filter.getName());
    }

    if (appliedFilters.isEmpty())
        return variantContext;
    else {
        return new VariantContextBuilder(variantContext).filters(appliedFilters).make();
    }
}
 
示例3
/**
 * Loads an external cnv call list and returns the results in an SVIntervalTree. NB: the contig indices in the SVIntervalTree
 * are based on the sequence indices in the SAM header, _NOT_ the ReadMetadata (which we might not have access to at this
 * time).
 */
public static SVIntervalTree<VariantContext> loadCNVCalls(final String cnvCallsFile,
                                                           final SAMFileHeader headerForReads) {
    Utils.validate(cnvCallsFile != null, "Can't load null CNV calls file");
    try ( final FeatureDataSource<VariantContext> dataSource = new FeatureDataSource<>(cnvCallsFile, null, 0, null) ) {

        final String sampleId = SVUtils.getSampleId(headerForReads);
        validateCNVcallDataSource(headerForReads, sampleId, dataSource);

        final SVIntervalTree<VariantContext> cnvCallTree = new SVIntervalTree<>();
        Utils.stream(dataSource.iterator())
                .map(vc -> new VariantContextBuilder(vc).genotypes(vc.getGenotype(sampleId)).make()) // forces a decode of the genotype for serialization purposes
                .map(vc -> new Tuple2<>(new SVInterval(headerForReads.getSequenceIndex(vc.getContig()), vc.getStart(), vc.getEnd()),vc))
                .forEach(pv -> cnvCallTree.put(pv._1(), pv._2()));
        return cnvCallTree;
    }
}
 
示例4
@Test
public void testLeftAlignWithVaryingMaxDistances() {

    final byte[] refSequence = new byte[200];
    Arrays.fill(refSequence, 0, 100, (byte) 'C');
    Arrays.fill(refSequence, 100, 200, (byte) 'A');

    final String contig = "1";
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, refSequence.length);
    final SimpleInterval interval = new SimpleInterval(contig, 1, refSequence.length);


    final ReferenceBases refBases = new ReferenceBases(refSequence, interval);
    final ReferenceDataSource referenceDataSource = new ReferenceMemorySource(refBases, header.getSequenceDictionary());
    final ReferenceContext referenceContext = new ReferenceContext(referenceDataSource, interval);

    final List<Allele> alleles = Arrays.asList(Allele.create("AA", true), Allele.create("A", false));
    for (int maxShift : new int[] {0, 1, 5, 10, 30}) {
        for (int start : new int[]{101, 105, 109, 110, 111, 115, 120, 130, 141}) {
            final VariantContext vc = new VariantContextBuilder("source", contig, start, start + 1, alleles).make();
            final VariantContext realignedV = LeftAlignAndTrimVariants.leftAlignAndTrim(vc, referenceContext, maxShift, true);

            Assert.assertEquals(realignedV.getStart(), Math.max(start - maxShift, 100));
        }
    }
}
 
示例5
@DataProvider(name = "testUsingADDataProvider")
public Object[][] testUsingADDataProvider() {
    final Allele A = Allele.create("A", true);
    final Allele C = Allele.create("C");

    final List<Allele> AA = Arrays.asList(A, A);
    final List<Allele> AC = Arrays.asList(A, C);

    final Genotype gAC = new GenotypeBuilder("1", AC).AD(new int[]{5,5}).make();
    final Genotype gAA = new GenotypeBuilder("2", AA).AD(new int[]{10,0}).make();

    final VariantContext vc = new VariantContextBuilder("test", "20", 10, 10, AC).genotypes(Arrays.asList(gAA, gAC)).make();

    return new Object[][] {
            {vc, gAC, new double[]{0.5}},
            {vc, gAA, new double[]{0.0}}
        };
}
 
示例6
@Test(dataProvider = "MaxMinIndelSize")
public void maxIndelSizeTest(final int size, final int otherSize, final int max, final int min, final String op) {

    final byte[] largerAllele = Utils.dupBytes((byte) 'A', size+1);
    final byte[] smallerAllele = Utils.dupBytes((byte) 'A', 1);

    final List<Allele> alleles = new ArrayList<>(2);
    final Allele ref = Allele.create(op.equals("I") ? smallerAllele : largerAllele, true);
    final Allele alt = Allele.create(op.equals("D") ? smallerAllele : largerAllele, false);
    alleles.add(ref);
    alleles.add(alt);

    final VariantContext vc = new VariantContextBuilder("test", "1", 10, 10 + ref.length() - 1, alleles).make();

    final boolean hasIndelTooLargeOrSmall = SelectVariants.containsIndelLargerOrSmallerThan(vc, max, min);
    Assert.assertEquals(hasIndelTooLargeOrSmall, size > max || size < min);
}
 
示例7
@Test
public void testDoesReadHaveN() {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
    final String contig = "chr1";

    final int pos = 100;
    final VariantContext vc = new VariantContextBuilder().chr(contig).start(pos).stop(pos).alleles("A","C").make();

    final GATKRead nonOverlappingUpstreamRead = ArtificialReadUtils.createArtificialRead(header, "read", 0, 96, new byte[] {'A','N','G'}, new byte[] {20,20,20}, "3M");
    Assert.assertFalse(CountNs.doesReadHaveN(nonOverlappingUpstreamRead, vc));

    final GATKRead nonOverlappingDownstreamRead = ArtificialReadUtils.createArtificialRead(header, "read", 0, 101, new byte[] {'A','N','G'}, new byte[] {20,20,20}, "3M");
    Assert.assertFalse(CountNs.doesReadHaveN(nonOverlappingDownstreamRead, vc));

    final GATKRead spanningDeletionRead = ArtificialReadUtils.createArtificialRead(header, "read", 0, 95, new byte[] {'N','N','N','N','N','N'}, new byte[] {20,20,20,20,20,20}, "3M10D3M");
    Assert.assertFalse(CountNs.doesReadHaveN(spanningDeletionRead, vc));

    final GATKRead notN = ArtificialReadUtils.createArtificialRead(header, "read", 0, 99, new byte[] {'A','C','G'}, new byte[] {20,20,20}, "3M");
    Assert.assertFalse(CountNs.doesReadHaveN(notN, vc));

    final GATKRead yesN = ArtificialReadUtils.createArtificialRead(header, "read", 0, 99, new byte[] {'A','N','G'}, new byte[] {20,20,20}, "3M");
    Assert.assertTrue(CountNs.doesReadHaveN(yesN, vc));

}
 
示例8
/**
 * Convert a segment (as annotated interval) to a {@link VariantContext}.  The variant context will have a ref allele
 *  of the starting base of the segment.  The end position will be in the attribute {@link VCFConstants#END_KEY}.
 *  Any remaining annotations in the annotated interval will be converted to string attributes.  The segment call
 *  will be the alternate allele.  Currently, the alternate allele will be one of neutral, amplification, or deletion.
 *
 * The variant context will use symbolic alleles for the calls (copy neutral, amplification, or deletion).
 * See {@link CalledCopyRatioSegment.Call} for insertions and and deletions.
 *
 * @param segment Never {@code null}
 * @param referenceContext Never {@code null}
 * @return Never {@code null}
 */
public static VariantContext convert(final AnnotatedInterval segment, final ReferenceContext referenceContext) {
    Utils.nonNull(segment);
    Utils.nonNull(referenceContext);
    final SimpleInterval startPointInterval = new SimpleInterval(segment.getContig(), segment.getStart(), segment.getStart());
    final Allele refAlleleAtStart = Allele.create(referenceContext.getBases(startPointInterval), true);

    final CalledCopyRatioSegment.Call call = retrieveCall(segment);

    final VariantContextBuilder variantContextBuilder = new VariantContextBuilder().chr(segment.getContig())
            .start(segment.getStart())
            .stop(segment.getEnd())
            .alleles(Arrays.asList(
                    Allele.create(refAlleleAtStart, false),
                    convertActualSegCallToAllele(call)
            ))
            .attribute(VCFConstants.END_KEY, segment.getEnd());

    // Grab every field in the annotated interval and make it a string attribute
    segment.getAnnotations().keySet().stream().forEach(k -> variantContextBuilder.attribute(k, segment.getAnnotationValue(k)));

    return variantContextBuilder
            .make();
}
 
示例9
/**
 * Convert short, i.e. duplicated range is < 50 bp, duplication call to insertion call.
 */
@VisibleForTesting
static VariantContext postProcessConvertShortDupToIns(final VariantContext simple) {
    final String type = simple.getAttributeAsString(SVTYPE, "");
    if ( type.equals(SimpleSVType.SupportedType.DUP.name()) ) {
        final SimpleInterval duplicatedRegion = new SimpleInterval(simple.getAttributeAsString(DUP_REPEAT_UNIT_REF_SPAN, ""));
        if (duplicatedRegion.size() > EVENT_SIZE_THRESHOLD) {
            return simple;
        } else {
            return new VariantContextBuilder(simple)
                    .alleles(Arrays.asList(simple.getReference(), altSymbAlleleIns))
                    .rmAttribute(SVTYPE)
                    .attribute(SVTYPE, SimpleSVType.SupportedType.INS.name())
                    .make();
        }
    } else
        return simple;
}
 
示例10
public void writeOutRecalibrationTable(final VariantContextWriter recalWriter, final SAMSequenceDictionary seqDictionary) {
    // we need to sort in coordinate order in order to produce a valid VCF
    Collections.sort( data, VariantDatum.getComparator(seqDictionary) );

    // create dummy alleles to be used
    List<Allele> alleles = Arrays.asList(Allele.create("N", true), Allele.create("<VQSR>", false));

    for( final VariantDatum datum : data ) {
        if (VRAC.useASannotations)
            alleles = Arrays.asList(datum.referenceAllele, datum.alternateAllele); //use the alleles to distinguish between multiallelics in AS mode
        VariantContextBuilder builder = new VariantContextBuilder("VQSR", datum.loc.getContig(), datum.loc.getStart(), datum.loc.getEnd(), alleles);
        builder.attribute(VCFConstants.END_KEY, datum.loc.getEnd());
        builder.attribute(GATKVCFConstants.VQS_LOD_KEY, String.format("%.4f", datum.lod));
        builder.attribute(GATKVCFConstants.CULPRIT_KEY, (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL"));

        if ( datum.atTrainingSite ) builder.attribute(GATKVCFConstants.POSITIVE_LABEL_KEY, true);
        if ( datum.atAntiTrainingSite ) builder.attribute(GATKVCFConstants.NEGATIVE_LABEL_KEY, true);

        recalWriter.add(builder.make());
    }
}
 
示例11
@Override
protected void secondPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) {
    final VariantContextBuilder builder = new VariantContextBuilder(variant);

    if (removeOldFilters) {
        builder.unfiltered();
    }

    if (variant.hasAttribute(infoKey)) {
        final double score = Double.parseDouble((String) variant.getAttribute(infoKey));
        if (variant.isSNP() && snpCutoffs.size() != 0 && isTrancheFiltered(score, snpCutoffs)) {
            builder.filter(filterStringFromScore(SNPString, score, snpTranches, snpCutoffs));
            filteredSnps++;
        } else if (variant.isIndel() && indelCutoffs.size() != 0 && isTrancheFiltered(score, indelCutoffs)) {
            builder.filter(filterStringFromScore(INDELString, score, indelTranches, indelCutoffs));
            filteredIndels++;
        }
    }

    if (builder.getFilters() == null || builder.getFilters().size() == 0){
        builder.passFilters();
    }
    
    vcfWriter.add(builder.make());
}
 
示例12
@Override
protected void secondPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) {
    String sv = scoreScan.nextLine();
    String[] scoredVariant = sv.split("\\t");

    if (variant.getContig().equals(scoredVariant[CONTIG_INDEX])
            && Integer.toString(variant.getStart()).equals(scoredVariant[POS_INDEX])
            && variant.getReference().getBaseString().equals(scoredVariant[REF_INDEX])
            && variant.getAlternateAlleles().toString().equals(scoredVariant[ALT_INDEX])) {

        final VariantContextBuilder builder = new VariantContextBuilder(variant);
        if (scoredVariant.length > KEY_INDEX) {
            builder.attribute(scoreKey, scoredVariant[KEY_INDEX]);
        }
        vcfWriter.add(builder.make());

    } else {
        String errorMsg = "Score file out of sync with original VCF. Score file has:" + sv;
        errorMsg += "\n But VCF has:" + variant.toStringWithoutGenotypes();
        throw new GATKException(errorMsg);
    }
}
 
示例13
private VariantContext buildVariantContextWithDroppedAnnotationsRemoved(final VariantContext vc) {
    if (infoAnnotationsToDrop.isEmpty() && genotypeAnnotationsToDrop.isEmpty()) {
        return vc;
    }
    final VariantContextBuilder rmAnnotationsBuilder = new VariantContextBuilder(vc);
    for (String infoField : infoAnnotationsToDrop) {
        rmAnnotationsBuilder.rmAttribute(infoField);
    }
    if (!genotypeAnnotationsToDrop.isEmpty()) {
        final ArrayList<Genotype> genotypesToWrite = new ArrayList<>();
        for (Genotype genotype : vc.getGenotypes()) {
            final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).noAttributes();
            final Map<String, Object> attributes = new HashMap<>(genotype.getExtendedAttributes());
            for (String genotypeAnnotation : genotypeAnnotationsToDrop) {
                attributes.remove(genotypeAnnotation);
            }
            genotypeBuilder.attributes(attributes);
            genotypesToWrite.add(genotypeBuilder.make());
        }
        rmAnnotationsBuilder.genotypes(GenotypesContext.create(genotypesToWrite));
    }
    return rmAnnotationsBuilder.make();
}
 
示例14
/**
 * Adds the new allele filters to the existing allele filters in the vc. Computes whether there are
 * new site filters and updates the filter in the vc. If there are no site filters, sets filters to pass
 * Sets the filters for each allele and calculates the intersection of the allele filters to set on the variant.
 * PASS if the intersection is empty.
 * @param vc The variant context to add the filters to, both at the allele and site level
 * @param newAlleleFilters filters to be applied to each allele, the intersection of these filters are applied at the site level
 * @param invalidatePreviousFilters whether existing filters should be removed
 * @return The updated variant context
 */
public static VariantContext addAlleleAndSiteFilters(VariantContext vc, List<Set<String>> newAlleleFilters, boolean invalidatePreviousFilters) {
    if (newAlleleFilters.isEmpty()) {
        return vc;
    }
    List<List<String>> currentAlleleFilters = decodeASFilters(vc);
    if (!currentAlleleFilters.isEmpty() && newAlleleFilters.size() != currentAlleleFilters.size()) {
        // log an error
        return vc;
    }

    if (currentAlleleFilters.isEmpty() || invalidatePreviousFilters) {
        currentAlleleFilters = new ArrayList<>(Collections.nCopies(newAlleleFilters.size(), Collections.singletonList(GATKVCFConstants.SITE_LEVEL_FILTERS)));
    }
    ListIterator<List<String>> currentAlleleFiltersIt = currentAlleleFilters.listIterator();
    List<List<String>> updatedAlleleFilters = newAlleleFilters.stream().map(newfilters -> addAlleleFilters(currentAlleleFiltersIt.next(), newfilters.stream().collect(Collectors.toList()))).collect(Collectors.toList());
    String encodedFilters = encodeASFilters(updatedAlleleFilters);
    VariantContextBuilder vcb = new VariantContextBuilder(vc).attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, encodedFilters);

    if (invalidatePreviousFilters) {
        vcb.unfiltered();
    }
    Set<String> siteFiltersToAdd = newAlleleFilters.stream().skip(1)
            .collect(()->new HashSet<>(newAlleleFilters.get(0)), Set::retainAll, Set::retainAll);
    siteFiltersToAdd.stream().forEach(filter -> vcb.filter(filter));
    if ((vcb.getFilters() == null || vcb.getFilters().isEmpty()) && !invalidatePreviousFilters) {
        vcb.passFilters();
    }
    return vcb.make();
}
 
示例15
private void run() {
    fileWriter.writeHeader(fileReader.getFileHeader());
    for (final VariantContext context : fileReader) {

        final VariantContextBuilder builder = new VariantContextBuilder(context);
        final List<Genotype> genotypes = Lists.newArrayList();

        for (int genotypeIndex = 0; genotypeIndex < context.getGenotypes().size(); genotypeIndex++) {
            Genotype genotype = context.getGenotype(genotypeIndex);

            if (genotype.hasAD() && genotype.hasDP()) {
                int[] ad = genotype.getAD();
                int total = 0;
                boolean updateRecord = false;
                for (int i = 0; i < ad.length; i++) {
                    int adValue = ad[i];
                    if (adValue < 0) {
                        updateRecord = true;
                        ad[i] = Math.abs(adValue);
                    }
                    total += Math.abs(adValue);
                }

                if (updateRecord) {
                    LOGGER.info("Updated entry at {}:{}", context.getContig(), context.getStart());
                    Genotype updated = new GenotypeBuilder(genotype).AD(ad).DP(total).make();
                    genotypes.add(updated);
                }
                else {
                    genotypes.add(genotype);
                }
            }
        }

        builder.genotypes(genotypes);
        fileWriter.add(builder.make());
    }
}
 
示例16
/**
 * Depending on how the ref segment is present in alt arrangement (if at all), logic as follows (order is important):
 * <ul>
 *     <li>
 *         if ref segment appear inverted and large enough
 *         <ul>
 *             <li> INV call is warranted </li>
 *             <li> INS call(s) before and after the INV, if inserted sequence long enough </li>
 *         </ul>
 *     </li>
 *
 *     <li>
 *         otherwise if ref segment is present as-is, i.e. no deletion call can be made,
 *         make insertion calls when possible
 *     </li>
 *     <li>
 *         otherwise
 *         <ul>
 *             <li> if the segment is large enough, make a DEL call, and insertion calls when possible </li>
 *             <li> otherwise a single fat INS call</li>
 *         </ul>
 *     </li>
 * </ul>
 *
 * <p>
 *     Note that the above logic has a bias towards getting INV calls, because
 *     when the (large enough) reference segment appears both as-is and inverted,
 *     the above logic will emit at least an INV call,
 *     whereas the (inverted) duplication(s) could also be reported as an DUP call as well, but...
 * </p>
 */
@Override
List<VariantContext> extract(final VariantContext complexVC, final ReferenceMultiSparkSource reference) {

    final List<String> segments = SVUtils.getAttributeAsStringList(complexVC, CPX_SV_REF_SEGMENTS);
    if (segments.isEmpty()) return whenZeroSegments(complexVC, reference);

    final SimpleInterval refSegment = new SimpleInterval(segments.get(0));
    final List<String> altArrangement = SVUtils.getAttributeAsStringList(complexVC, CPX_EVENT_ALT_ARRANGEMENTS);
    final int altSeqLength = complexVC.getAttributeAsString(SEQ_ALT_HAPLOTYPE, "").length();

    final List<VariantContextBuilder> result = new ArrayList<>();

    final int asIsAppearanceIdx = altArrangement.indexOf("1");
    final int invertedAppearanceIdx = altArrangement.indexOf("-1");
    if (invertedAppearanceIdx != -1 && refSegment.size() > EVENT_SIZE_THRESHOLD) { // inversion call
        whenInversionIsWarranted(refSegment, invertedAppearanceIdx, altArrangement, reference, result);
    } else if (asIsAppearanceIdx != -1) { // no inverted appearance or appear inverted but not large enough, and in the mean time appear as-is, so no deletion
        whenNoDeletionIsAllowed(refSegment, asIsAppearanceIdx, altArrangement, altSeqLength, reference, result);
    } else { // no as-is appearance && (inverted appearance might present not not large enough)
        whenNoInvAndNoAsIsAppearance(refSegment, altSeqLength, reference, result);
    }

    final String sourceID = complexVC.getID();
    final List<String> evidenceContigs = SVUtils.getAttributeAsStringList(complexVC, CONTIG_NAMES);
    final List<String> mappingQualities = SVUtils.getAttributeAsStringList(complexVC, MAPPING_QUALITIES);
    final int maxAlignLength = complexVC.getAttributeAsInt(MAX_ALIGN_LENGTH, 0);
    return result.stream()
            .map(vc -> vc.attribute(CPX_EVENT_KEY, sourceID).attribute(CONTIG_NAMES, evidenceContigs)
                         .attribute(MAPPING_QUALITIES, mappingQualities)
                         .attribute(MAX_ALIGN_LENGTH, maxAlignLength).make())
            .collect(Collectors.toList());
}
 
示例17
@NotNull
private VariantContext enrich(@NotNull final VariantContext context) {
    VariantContextBuilder builder =
            new VariantContextBuilder(context).attribute(HOTSPOT_FLAG, false).attribute(NEAR_HOTSPOT_FLAG, false);

    if (hotspotEnrichment.isOnHotspot(context)) {
        return builder.attribute(HOTSPOT_FLAG, true).make();
    } else if (hotspotEnrichment.isNearHotspot(context)) {
        return builder.attribute(NEAR_HOTSPOT_FLAG, true).make();
    }

    return builder.make();

}
 
示例18
/**
 * Note that {@code delRange} is expected to be pre-process to VCF spec compatible,
 * e.g. if chr1:101-200 is deleted, then {@code delRange} should be chr1:100-200
 * @param delRange
 */
@VisibleForTesting
static VariantContextBuilder makeDeletion(final SimpleInterval delRange, final Allele refAllele) {

    return new VariantContextBuilder()
            .chr(delRange.getContig()).start(delRange.getStart()).stop(delRange.getEnd())
            .alleles(Arrays.asList(refAllele, altSymbAlleleDel))
            .id(makeID(SimpleSVType.SupportedType.DEL.name(), delRange.getContig(), delRange.getStart(), delRange.getEnd()))
            .attribute(VCFConstants.END_KEY, delRange.getEnd())
            .attribute(SVLEN, - delRange.size() + 1)
            .attribute(SVTYPE, SimpleSVType.SupportedType.DEL.name());
}
 
示例19
private static void whenInversionIsWarranted(final SimpleInterval refSegment, final int invertedAppearanceIdx,
                                             final List<String> altArrangement, final ReferenceMultiSparkSource reference,
                                             final List<VariantContextBuilder> result) {

    final Allele anchorBaseRefAllele = getAnchorBaseRefAllele(refSegment.getContig(), refSegment.getStart(), reference);
    result.add( makeInversion(refSegment, anchorBaseRefAllele) );

    // further check if alt seq length is long enough to trigger an insertion as well,
    // but guard against case smallIns1 + INV + smallIns2, in theory one could annotate the inversion
    // with micro-insertions if that's the case, but we try to have minimal annotations here
    final Allele anchorBaseRefAlleleFront = getAnchorBaseRefAllele(refSegment.getContig(), refSegment.getStart() - 1, reference);
    final Allele anchorBaseRefAlleleRear  = getAnchorBaseRefAllele(refSegment.getContig(), refSegment.getEnd(), reference);
    extractFrontAndRearInsertions(refSegment, invertedAppearanceIdx, altArrangement,
            anchorBaseRefAlleleFront, anchorBaseRefAlleleRear, result);
}
 
示例20
@NotNull
private static VariantContext addRecoveryDetails(@NotNull final VariantContext context, @NotNull final String recoveryMethod,
        @NotNull final List<String> recoveryFilters) {
    return new VariantContextBuilder(context).unfiltered()
            .attribute(RECOVERED, true)
            .attribute(RECOVERY_METHOD, recoveryMethod)
            .attribute(RECOVERY_FILTER, recoveryFilters)
            .make();
}
 
示例21
@NotNull
private static VariantContext dummyContext() {
    final Collection<Allele> alleles = Lists.newArrayList(Allele.create("N", true));

    return new VariantContextBuilder("purple", "2", 1, 1, alleles)
            .noGenotypes()
            .make();
}
 
示例22
private void writeHeaderAndBadVariant(final VariantContextWriter writer) {
    final VariantContextBuilder vcBuilder = new VariantContextBuilder(
            "chr1","1", 1, 1, Arrays.asList(Allele.create("A", true)));
    vcBuilder.attribute("fake", new Object());
    final VariantContext vc = vcBuilder.make();
    final VCFHeader vcfHeader = new VCFHeader();
    writer.writeHeader(vcfHeader);
    writer.add(vc);
}
 
示例23
private VariantContext transformAttributesToStandardNames(final VariantContext vc) {

        final Map<String,Object> transformedAttributes = vc.getAttributes().entrySet().stream()
                .collect(Collectors.toMap(e-> aliasToKeyMapping.getOrDefault(e.getKey(), e.getKey()), e -> e.getValue()));
        final VariantContextBuilder vcb = new VariantContextBuilder(vc).attributes(transformedAttributes);
        return vcb.make();
    }
 
示例24
private VariantContext makeVC() {
    final GenotypesContext testGC = GenotypesContext.create(2);
    final Allele refAllele = Allele.create("A", true);
    final Allele altAllele = Allele.create("T");

    return (new VariantContextBuilder())
            .alleles(Arrays.asList(refAllele, altAllele)).chr("1").start(15L).stop(15L).genotypes(testGC).make();
}
 
示例25
private static void writeHotspots(@NotNull String hotspotVcf, @NotNull Map<ViccEntry, ViccExtractionResult> resultsPerEntry) {
    VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(hotspotVcf)
            .setOutputFileType(VariantContextWriterBuilder.OutputType.VCF)
            .setOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER)
            .modifyOption(Options.INDEX_ON_THE_FLY, false)
            .build();

    VCFHeader header = new VCFHeader(Sets.newHashSet(), Lists.newArrayList());
    writer.writeHeader(header);

    for (Map.Entry<VariantHotspot, HotspotAnnotation> entry : convertAndSort(resultsPerEntry).entrySet()) {
        VariantHotspot hotspot = entry.getKey();
        HotspotAnnotation annotation = entry.getValue();
        List<Allele> hotspotAlleles = buildAlleles(hotspot);

        VariantContext variantContext = new VariantContextBuilder().noGenotypes()
                .source("VICC")
                .chr(hotspot.chromosome())
                .start(hotspot.position())
                .alleles(hotspotAlleles)
                .computeEndFromAlleles(hotspotAlleles, (int) hotspot.position())
                .attribute("sources", annotation.sources())
                .attribute("feature",
                        ProteinKeyFormatter.toProteinKey(annotation.gene(), annotation.transcript(), annotation.proteinAnnotation()))
                .make();

        LOGGER.debug("Writing {}", variantContext);
        writer.add(variantContext);

    }
    writer.close();
}
 
示例26
@NotNull
static VariantContext simplifyVariant(@NotNull final VariantContext variant, @NotNull final String sampleName) {
    // Force GT to 0/1 even for variants with multiple alts
    final List<Allele> outputVariantAlleles = variant.getAlleles().subList(0, 2);
    final Genotype genotype = new GenotypeBuilder(sampleName, outputVariantAlleles).DP(getDP(variant)).AD(getAD(variant)).make();
    return new VariantContextBuilder(variant).genotypes(genotype).make();
}
 
示例27
private VariantContext makeVC() {
    final GenotypesContext testGC = GenotypesContext.create(2);
    final Allele refAllele = Allele.create("A", true);
    final Allele altAllele = Allele.create("T");

    return (new VariantContextBuilder())
            .alleles(Arrays.asList(refAllele, altAllele)).chr("1").start(15L).stop(15L).genotypes(testGC).make();
}
 
示例28
@Test(dataProvider = "gvcfCases")
public void testEnableDisableGVCFWriting(boolean writeGvcf, String extension) throws IOException {
    List<VariantContext> vcs = new ArrayList<>();
    for(int i = 1; i <= 10; i++) {
        final Allele A = Allele.create("A", true);
        final VariantContext vc = new VariantContextBuilder("hand crafted", "1", i, i,
                                                            Arrays.asList(A, Allele.NON_REF_ALLELE))
                .genotypes(new GenotypeBuilder(SAMPLE).alleles(Arrays.asList(A, A)).DP(10).GQ(10).PL(new int[]{0, 60, 10}).make())
                .make();
        vcs.add(vc);
    }

    final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    final File output = createTempFile(outputFileName, extension);
    VariantsSparkSink.writeVariants(ctx, output.toString(), ctx.parallelize(vcs), getHeader(), writeGvcf, Arrays.asList(100), 2, 1, true, true);

    checkFileExtensionConsistentWithContents(output.toString(), true);

    new CommandLineProgramTest(){
        @Override
        public String getTestedToolName(){
            return IndexFeatureFile.class.getSimpleName();
        }
    }.runCommandLine(new String[]{"-I", output.getAbsolutePath()});

    final List<VariantContext> writtenVcs = readVariants(output.toString());
    //if we are actually writing a gvcf, all the variant blocks will be merged into a single homref block with
    Assert.assertEquals(writtenVcs.size(), writeGvcf ? 1 : 10);
    Assert.assertEquals(writtenVcs.stream().mapToInt(VariantContext::getStart).min().getAsInt(), 1);
    Assert.assertEquals(writtenVcs.stream().mapToInt(VariantContext::getEnd).max().getAsInt(), 10);

}
 
示例29
@Override
public void apply(final AnnotatedInterval segment, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) {

    // Convert segment into a VariantContext while honoring the funcotation engine's necessary conversions.
    final VariantContext segmentVariantContext = funcotatorEngine.getDefaultVariantTransformer()
            .andThen(getTransformAttributesToStandardNames())
            .apply(AnnotatedIntervalToSegmentVariantContextConverter.convert(segment, referenceContext));

    // Get the correct reference for B37/HG19 compliance:
    // This is necessary because of the variant transformation that gets applied in VariantWalkerBase::apply.
    final ReferenceContext correctReferenceContext = funcotatorEngine.getCorrectReferenceContext(segmentVariantContext, referenceContext);

    // funcotate
    //  The resulting funcotation map should only have one transcript ID (which is the "no transcript" ID).
    final FuncotationMap funcotationMap = funcotatorEngine.createFuncotationMapForSegment(segmentVariantContext, correctReferenceContext, featureContext);

    // This will propagate input variant context attributes to the output
    for (final String txId : funcotationMap.getTranscriptList()) {
        funcotationMap.add(txId, FuncotatorUtils.createFuncotationsFromMetadata(segmentVariantContext, createMetadata(), "FUNCOTATE_SEGMENTS"));
    }

    // Force the final output to have the same contig convention as the input.
    final VariantContext finalVC = new VariantContextBuilder(segmentVariantContext)
            .chr(segment.getContig())
            .make();

    // write the variant context
    outputRenderer.write(finalVC, funcotationMap);
}
 
示例30
/**
 * Note that {@code delRange} is expected to be VCF spec compatible,
 * e.g. if chr1:101-200 is deleted, then {@code delRange} should be chr1:100-200
 */
static final VariantContextBuilder makeDeletion(final SimpleInterval delRange, final Allele refAllele, final boolean isFromDupContraction) {

    return new VariantContextBuilder()
            .chr(delRange.getContig()).start(delRange.getStart()).stop(delRange.getEnd())
            .alleles(Arrays.asList(refAllele, DEL_SYMB_ALLELE))
            .id(makeID((isFromDupContraction? DUP_TAN_CONTRACTION_INTERNAL_ID_START_STRING : SimpleSVType.SupportedType.DEL.name()), delRange.getContig(), delRange.getStart(), delRange.getContig(), delRange.getEnd(), ""))
            .attribute(VCFConstants.END_KEY, delRange.getEnd())
            .attribute(SVLEN, - delRange.size() + 1)
            .attribute(SVTYPE, SimpleSVType.SupportedType.DEL.name());
}