<!-- This XML tree contains the parameters to cactus_progressive.py -->
<!-- The distanceToAddToRootAlignment parameter is how much extra divergence distance to allow when aligning children of the root genome -->
<cactusWorkflowConfig distanceToAddToRootAlignment="0.1">
	<constants defaultMemory="mediumMemory" defaultOverlargeMemory="mediumMemory" defaultCpu="1" defaultOverlargeCpu="1">
		<!-- These constants are used to control the amount of memory and cpu the different jobs in a batch are using. -->
  		<defines littleMemory="2000000000" mediumMemory="3500000000" bigMemory="5000000000"/>
  		<!-- These constants are used to control parameters that depend on phylogenetic distance. Setting
  		     useDefault to 0 will force it to use the default divergence controlled parameter -->
  		<divergences useDefault="0" one="0.1" two="0.15" three="0.2" four="0.25" five="0.35"/>
	</constants>
	<!-- The preprocessor tags are used to modify/check the input sequences before alignment -->
	<!-- The cutHeaders preprocess modifies fastsa sequence names to help them pass checkUniqueHeaders. -->
	<!-- cutBefore: all characters up to and including the last occurrence of a character in this string are clipped out -->
	<!-- cutBeforeOcc: (if specified and > 0) only consider the first maxOcc occurences of a character in cutBefore filter -->
	<!-- cutAfter: all characters after and including the first occurrence of a character in this string are clipped out -->
	<preprocessor memory="littleMemory" preprocessJob="cutHeaders" cutBefore="" cutAfter=" 	" active="1"/>
	<!-- The first preprocessor tag checks that the first word of every fasta header is unique, as this is required for HAL. It throws errors if this is not the case -->
	<!-- **NEW** It now enforces sequences are globally unique by adding "id=<event name>|" to each one -->
	<!-- The checkAssemblyHub option (if enabled) ensures that the first word contains only alphanumeric or '_', '-', ':', or '.' characters, and is unique. If you don't intend to make an assembly hub, you can turn off this option here. -->
	<preprocessor memory="littleMemory" preprocessJob="checkUniqueHeaders" checkAssemblyHub="1" active="1"/>
	<!-- The preprocessor for cactus_lastzRepeatMask masks every seed that is part of more than XX other alignments, this stops a combinatorial explosion in pairwise alignments. gpu sets the number of gpus (if >0, use segalign in stead of lastz. can be set to 'all' for all available GPUs) -->
	<preprocessor unmask="0" chunkSize="10000000" proportionToSample="0.2" memory="littleMemory" preprocessJob="lastzRepeatMask" minPeriod="50" lastzOpts='--step=3 --ambiguous=iupac,100,100 --ungapped --queryhsplimit=keep,nowarn:1500' gpu="0" active="1"/>
	<!-- Softmask alpha-satellite using the dna-brnn tool -->
	<!-- This preprocessor is off by default, and will replace the lastzRepeatMask preprocessor via command line toggle (or setting active=1)-->
	<!-- It will attempt to load its included "attcc-alpha.knm" model by default unless a different one is specified below -->
	<!-- unmask: unmask all softmasked bases before running (so only intervals masked by dna-brnn are present at end) -->
	<!-- action: one of [softmask, hardmask, clip] -->
	<!-- minLength: only mask intervals > than this length -->
	<preprocessor unmask="1" memory="littleMemory" preprocessJob="dna-brnn" dna-brnnOpts='-A' active="0" action="softmask" cpu="2" minLength="100000"/>

	<!-- Params for blast -->
	<!-- chunkSize The size of sequences in a blast job. Increase the chunkSize in the caf tag to reduce the number of blast jobs approximately quadratically -->
	<!-- overlapSize The amount of basepair overlap between chunks -->
	<!-- mapper The local alignment program to use, currently either lastz or minimap2 -->
	<!-- minimap2_params Parameters to use with minimap2 -->
	<!-- compressFiles Compress the local alignments. (I think this may be broken) -->
	<!-- filterByIdentity Filter alignments by % identity -->
	<!-- identityRatio Let L be the sum of the branch lengths in the guide tree on the path connecting the genomes to be aligned.
     Let IJC(L) be the inverse of the Jukes Cantor distance, expressed as the fraction of bases substituted. We filter all alignments
      with identity lower than 1 - IJC(L). -->
	<!-- minimumDistance A minimum for L (see above), so we don't filter out alignments with high identity. -->
	<!-- gpu Toggle on segAlign instead of lastz and set the number of gpus for each segalign job. 0: disable segalign, 'all': use all availabled GPUs-->
	<!-- lastzMemory The memory to allocate for each blast job -->
	<!-- chainMaxGapLength See paffy chain chainMaxGapLength param -->
	<!-- chainGapOpen See paffy chain chainGapOpen param -->
	<!-- chainGapExtend See paffy chain chainGapExtend param -->
	<!-- chainTrimFraction See paffy chain chainTrimFraction param -->
	<!-- pafTrimIdentity See paffy trim trimIdentity param-->
	<!-- minPrimaryChainScore Keep as primary alignments only those with a primary chain score greater than this threshold -->
	<!-- trimOutgroupFlanking: The amount of flanking sequence to leave on the ends of the trimmed outgroup fragments.
	   NB: this value should be larger than the 'splitIndelsLongerThanThis' value in the realign arguments -->
	<!-- trimFlanking: The length of flanking sequence to attach to the trimmed ingroup sequences -->
	<!-- trimMinSize: The minimum size of uncovered regions (*before* adding flanking sequence) to output from the
         trimming process -->
	<!-- trimIngroups Progressively filter ingroup sequences from alignment to successive outgroups as an alignment
	 is identified-->
	<!-- trimOutgroups Remove outgroup sequences that don't have an alignment to an ingroup sequence-->
	<!-- outputSecondaryAlignments Include secondary alignments in the output. If included CAF will use these -->
	<!-- dechunkBatchSize Parallelize paf_dechunks into batches of at most this size-->
	<!-- pickIngroupPrimaryAlignmentsSeparatelyToOutgroups Separately make ingroups pick their primary alignment to
	     other ingroups without outgroups, then get the outgroups to pick their primary alignment to the ingroups. If 0
	     get every sequence to pick its primary alignment without regard to if the other sequence is an ingroup or outgroup -->
	<blast chunkSize="90000000"
		   overlapSize="10000"
		   mapper="lastz"
		   minimap2_params="-x asm20"
		   minimumSequenceLengthForBlast="30"
		   compressFiles="1"
		   filterByIdentity="0"
		   identityRatio="3"
		   minimumDistance="0.01"
		   gpu="0"
		   lastzMemory="littleMemory"
	       chainMaxGapLength="1000000"
		   chainGapOpen="5000"
		   chainGapExtend="1"
		   chainTrimFraction="1.0"
		   pafTrimIdentity="0.2"
		   minPrimaryChainScore="10000"
		   trimIngroups="1"
		   trimOutgroups="1"
		   trimMinSize="100"
		   trimFlanking="100"
		   trimOutgroupFlanking="2000"
		   outputSecondaryAlignments="0"
		   dechunkBatchSize="200"
		   pickIngroupPrimaryAlignmentsSeparatelyToOutgroups="1"
		   >

		<!-- The following are parametrised to produce the same results as the default settings,
		within a margin of 0.2% sensitivity, should be very fast for close genomes, these were tuned using the blast/blastParametersScript.py
		We could go even faster for less than 0.05 divergence using, but want to be robust to poor branch length estimates -->
		<lastzArguments
				one="--step=2 --ambiguous=iupac,100,100 --ydrop=3000 --notransition"
				two="--step=5 --ambiguous=iupac,100,100 --ydrop=3000"
				three="--step=4 --ambiguous=iupac,100,100 --ydrop=3000"
				four="--step=3 --ambiguous=iupac,100,100 --ydrop=3000"
				five="--step=2 --ambiguous=iupac,100,100 --ydrop=3000"
				default="--step=1 --ambiguous=iupac,100,100 --ydrop=3000"
		/>
	</blast>

	<setup makeEventHeadersAlphaNumeric="0"/>

	<!-- The caf tag contains parameters for the caf algorithm. -->
	<!-- annealingRounds A string of increasing positive integers defining minimum chain lengths.
	The last value defines the length of the minimum chain in the graph.
	The earlier values define the minimum chain length as we progressively add alignments to the graph during each round.
	See DOI: 10.1101/gr.123356.111 for algorithm outline. -->
	<!-- deannealingRounds (aka melting rounds) A string of increasing positive integers defining minimum chain lengths. Each value gives the minimum chain length
	 in the graph to remove, so that we can progressively get to chains of at least a given length. See DOI: 10.1101/gr.123356.111 for algorithm outline. -->
	<!-- trim A string of positive integers, one for each annealing round. Gives the size from each match diagonal to trim off when adding to the graph. This
	is useful to remove edge-wander effects that come from transitively connecting locally inconsistent alignments-->
	<!-- blockTrim A positive integer. The amount to trim off each final alignment block in the graph, once the alignment graph is constructed (i.e. after the basic
	caf algorithm is finished).-->
	<!-- minimumBlockDegree The minimum number of sequences in a block to kept as a block in the final alignment.
	Blocks with fewer sequences are pulled apart.-->
	<!-- minimumNumberOfSpecies The minimum number of species with sequences in a block for the block
	to be kept as a block in the final alignment. -->
	<!-- minimumIngroupDegree The minimum number of ingroup species with sequences in a block for the block
	to be kept as a block in the final alignment. -->
	<!-- minimumOutgroupDegree The minimum number of outgroup species with sequences in a block for the block
	to be kept as a block in the final alignment. -->
	<!-- minimumTreeCoverage The fraction of the tree spanned by species with sequences in a block for the block
	to be included as a block in the alignment. -->
	<!-- alignmentFilter TODO -->
	<!-- maxAdjacencyComponentSizeRatio TODO -->
	<!-- minLengthForChromosome TODO -->
	<!-- proportionOfUnalignedBasesForNewChromosome TODO-->
	<!-- maximumMedianSequenceLengthBetweenLinkedEnds TODO-->
	<!-- removeRecoverableChains TODO-->
	<!-- maxRecoverableChainsIterations TODO-->
	<!-- maxRecoverableChainLength TODO-->
	<!-- minimumBlockDegreeToCheckSupport Apply support filter to blocks of more than this degree (if greater than 0) -->
	<!-- minimumBlockHomologySupport TODO-->
	<!-- writeInputAlignmentsTo Debug option to write the alignment chains fed to CAF to the specified path. Off by default.-->
	<caf
		 deannealingRounds="2 32 256"
		 trim="3"
		 blockTrim="5"
		 minimumBlockDegree="2"
		 minimumNumberOfSpecies="1"
		 minimumIngroupDegree="1"
		 minimumOutgroupDegree="0"
		 minimumTreeCoverage="0.0"
		 alignmentFilter="filterSecondariesByMultipleSequences"
		 maxAdjacencyComponentSizeRatio="50"
		 minLengthForChromosome="100000"
		 proportionOfUnalignedBasesForNewChromosome="0.8"
		 maximumMedianSequenceLengthBetweenLinkedEnds="100000"
		 removeRecoverableChains="unequalNumberOfIngroupCopies"
		 maxRecoverableChainsIterations="10"
		 maxRecoverableChainLength="500000"
		 minimumBlockDegreeToCheckSupport="-1"
		 minimumBlockHomologySupport="0.05"
		 writeInputAlignmentsTo=""
		 >
		<!--  The following govern the minimum chain lengths in the graph for a given divergence.
		   Divergence is specified as the maximum path length between any pair of ingroup nodes.
		   Each value is a string of increasing positive integers defining minimum chain lengths.
		   The last value defines the length of the minimum chain in the graph.
	       The earlier <divergences> values define the minimum chain length as we progressively
	       add alignments to the graph during each round.
	       See DOI: 10.1101/gr.123356.111 for algorithm outline.
		-->
		<annealingRounds
				one="2048"
				two="1024"
				three="1024"
				four="512"
				five="512"
				default="256"
		/>
	</caf>

	<!-- The bar tag contains parameters for the bar algorithm. -->
	<!-- runBar Toggle the bar stage on or off. Turned off you get a sparse alignment just using the CAF phase-->
	<!-- bandingLimit is the maximum sequence size fed into the multiple aligner.  Sequences longer than this are trimmed accordingly -->
    <!-- partialOrderAlignment toggles between cPecan and abpoa for the core multiple alignment algorithm.
    abpoa is much faster but not as reliable for diverged sequences -->
	<!-- minimumBlockDegree The minimum number of sequences to form a block in the ancestor -->
	<!-- minimumIngroupDegree The minimum number ingroup sequences to form a block in the ancestor -->
	<!-- minimumOutgroupDegree The minimum number of outgroup sequences to form a block in the ancestor -->
	<!-- minimumNumberOfSpecies The minimum of number of different species for an alignment block to be kept -->
	<bar
		runBar="1"
		bandingLimit="1000000"
		partialOrderAlignment="1"
		minimumBlockDegree="2"
		minimumIngroupDegree="1"
		minimumOutgroupDegree="0"
		minimumNumberOfSpecies="1"
	>
		<!-- Parameters for using cPecan to generate MSAs. -->
		<!-- spanningTrees The number of spanning trees to construct in choosing which pairwise alignments to include
		 in creating the MSA -->
		<!-- gapGamma The weight to put upon indel probabilities in forming the MSA -->
		<!-- matchGamma The weight to put upon match probabilities in forming the MSA -->
		<!-- useBanding Use banded algnment in constructing the pairwise alignments -->
		<!-- splitMatrixBiggerThanThis Split the dynamic programming (DP) matrix into two smaller matrices around matrices
		with an area larger than splitMatrixBiggerThanThis squared, so reducing the total DP -->
		<!-- anchorMatrixBiggerThanThis Search for anchors within DP matrices with an area of
		anchorMatrixBiggerThanThis squared or larger -->
		<!-- repeatMaskMatrixBiggerThanThis  -->
		<!-- diagonalExpansion Expand the DP around anchors by this many diagonal coordinates. -->
		<!-- constraintDiagonalTrim Trim anchors at their ends by this many bases. -->
		<!-- alignAmbiguityCharacters Align together Ns or other wildcards. -->
		<!-- useProgressiveMerging  Progressively merge together pairwise alignments to form MSA, otherwise use a greedy, poset
		method which may be more accurate but which is generally slower for larger numbers of sequences -->
		<!-- pruneOutStubAlignments  -->
		<!-- useMumAnchors  Use maximal unique matches to create alignment anchors, otherwise call out to Lastz-->
		<!-- recursiveMums  If using MUM anchors, recursively search for anchors in gaps. -->
		<pecan
			spanningTrees="5"
			gapGamma="0.0"
			matchGamma="0.2"
			useBanding="1"
			splitMatrixBiggerThanThis="3000"
			anchorMatrixBiggerThanThis="500"
			repeatMaskMatrixBiggerThanThis="500"
			diagonalExpansion="20"
			constraintDiagonalTrim="14"
			alignAmbiguityCharacters="1"
			useProgressiveMerging="1"
			pruneOutStubAlignments="1"
			useMumAnchors="1"
			recursiveMums="1"
		/>

		<!-- Parameters for using abPOA to generate MSAs. -->
		<!-- partialOrderAlignmentWindow a sliding window approach (with hardcoded 50% overlap) is used to perform abpoa alignments.  memory is quadratic in this.  it is applied after bandingLimit -->
		<!-- partialOrderAlignmentMaskFilter trim input sequences as soon as more than this many soft or hard masked bases are encountered (-1=disabled) -->
		<!-- partialOrderAlignmentBand abpoa adaptive band size is <partialOrderAlignmentBand> + <partialOrderAlignmentBandFraction>*<Length>.  Negative value here disables adaptive banding -->
		<!-- partialOrderAlignmentBandFraction abpoa adaptibe band second parameter (see above) -->
		<!-- partialOrderAlignmentSubMatrix (space-separated) list of 25 scores corresponding to the 5x5 ACGTN substitution matrix." -->
		<!-- partialOrderAlignmentGapOpenPenalty1 abpoa gap open penalty (linear gap if 0) -->
		<!-- partialOrderAlignmentGapExtensionPenalty1 abpoa gap extension penalty -->
		<!-- partialOrderAlignmentGapOpenPenalty2 abpoa second gap open penalty (convex mode takes the minimum of both gap models, 0 disables convex) -->
		<!-- partialOrderAlignmentGapExtensionPenalty abpoa second gap extension penalty (convex mode takes the minimum of both gap models) -->
		<!-- partialOrderAlignmentDisableSeeding abpoa disable minimizer seeding. toggling this on will slightly increase accuracy at the cost of speed -->
		<!-- partialOrderAlignmentMinimizerK abpoa kmer size for minimizer seeding. -->
		<!-- partialOrderAlignmentMinimizerW abpoa window size for minimizer seeding. -->
		<!-- partialOrderAlignmentMinimizerMinW abpoa minimum window size. -->
		<!-- partialOrderAlignmentProgressiveMode= use guide tree from jaccard distance matrix to determine poa order -->
		<poa
			partialOrderAlignmentWindow="10000"
			partialOrderAlignmentMaskFilter="-1"
			partialOrderAlignmentBandConstant="300"
			partialOrderAlignmentBandFraction="0.05"
			partialOrderAlignmentSubMatrix="91 -114 -61 -123 -100 -114 100 -125 -61 -100 -61 -125 100 -114 -100 -123 -61 -114 91 -100 -100 -100 -100 -100 100"
			partialOrderAlignmentGapOpenPenalty1="400"
			partialOrderAlignmentGapExtensionPenalty1="30"
			partialOrderAlignmentGapOpenPenalty2="1200"
			partialOrderAlignmentGapExtensionPenalty2="1"
			partialOrderAlignmentDisableSeeding="0"
			partialOrderAlignmentMinimizerK="19"
			partialOrderAlignmentMinimizerW="10"
			partialOrderAlignmentMinimizerMinW="500"
			partialOrderAlignmentProgressiveMode="1"
		/>
	</bar>

	<!-- The reference tag provides parameters to cactus_reference, a method used to construct a reference genome for a given cactus database. -->
	<!-- numberOfNs is the number of Ns to insert into an ancestral sequence when an adjacency is uncertain, think of its as the Ns in a scaffold gap -->
	<!-- minNumberOfSequencesToSupportAdjacency is the number of sequences needed to bridge an adjacency -->
	<!-- makeScaffolds is a boolean that enables the bridging of uncertain adjacencies in an ancestral sequence providing the larger scale problem (parent flower in cactus), bridges the path. -->
	<!-- phi is the coefficient used to control how much weight to place on an adjacency given its phylogenetic distance from the reference node -->
	<reference
		matchingAlgorithm="blossom5"
		reference="reference"
		useSimulatedAnnealing="1"
		theta="0.0001"
		phi="1.0"
		maxWalkForCalculatingZ="100000"
		permutations="10"
		ignoreUnalignedGaps="1"
		wiggle="0.9999"
		numberOfNs="10"
		minNumberOfSequencesToSupportAdjacency="1"
		makeScaffolds="1"
	>
	</reference>
	<!-- The check tag for debugging -->
	<check
		runCheck="0"
	>
	</check>
	<!-- The hal tag controls the creation of hal and fasta files from the pipeline. -->
	<hal
		buildHal="1"
		buildFasta="1"
	>
	</hal>
	<!-- cactus-graphmap options -->
	<!-- assemblyName: special name of "virtual" minigraph assembly, which is just the list of sequences from the graph. -->
	<!-- universalMZFilter: only use minimizers who appear in this proportion (1.0 == 100%) of sequences that map over them. -->
	<!-- nodeBasedUniversal: universal filter calculated in terms of nodes istead of mapped regions. -->
	<!-- minMZBlockLength: only use minimizers that form contiguous blocks at least this long -->
	<!-- minMAPQ: ignore minigraph alignments with mapping quality less than this -->
	<!-- minGAFBlockLength: ignore minigraph alignments with block length less than this -->
	<!-- minGAFNodeLength: ignore minigraph nodes with length less than this -->
	<!-- minGAFQueryOverlapFilter: if 2 or more query regions in blocks of at least this size, filter them out. -->
	<!--                           if 1 query region in a block of at least this size overlaps query regions in blocks smaller than this, filter out the smaller ones -->
	<!--                           Filter is disabled when set to 0.  Any non-zero value will prevent query overlaps -->
	<!-- maskFilter: any softmasked sequence intervals > than this many bp will be hardmasked before being read by the minigraph mapper [negative value = disable]-->
	<!-- delFilter: any deletions implied by split-read mappings greater than this are removed from the paf (by removing all lines of the smallest block bordering deletion)-->
	<!-- removeMinigraphFromPAF: replace all minigraph contigs with transitive alignments in cactus-align-->
	<!-- cpu: use up to this many cpus for each minigraph command. -->
	<!-- cactus-graphmap options -->
	<!-- assemblyName: special name of "virtual" minigraph assembly, which is just the list of sequences from the graph. -->
	<!-- minigraphMapOptions: flags to pass to minigraph for mapping -->
	<!-- minigraphConstructOptions: flags to pass to minigraph for construction -->
	<!-- minigraphSortInput: Method used to determine input order. Valid values are "mash", "size", "none" / "0" which refer to (decreasing) mash distance to reference, (decreasing) size or no sorting (order in seqfile), respectively -->
	<!-- minMAPQ: ignore minigraph alignments with mapping quality less than this -->
	<!-- minGAFBlockLength: ignore minigraph alignments with block length less than this -->
	<!-- minGAFQueryOverlapFilter: if 2 or more query regions in blocks of at least this size, filter them out. -->
	<!--                           if 1 query region in a block of at least this size overlaps query regions in blocks smaller than this, filter out the smaller ones -->
	<!--                           Filter is disabled when set to 0.  Any non-zero value will prevent query overlaps [NOTE:holdover from pre-cigar days; should probably remove -->		  
	<!-- GAFOverlapFilterRatio: filter overlapping (by query) gaf records. keep records if their MAPQ or query length are <ratio> bigger than overlapping interval (see gaffilter -r) [0=disable] -->
	<!-- GAFOverlapFilterMinLengthRatio: only consider overlaps (for above) whose lengths represent at least this much (ratio between 0 and 1) of the block length -->
	<!-- PAFOverlapFilterRatio: Same as GAFOverlapFilterRatio, but applied after conversion to PAF -->
	<!-- PAFOverlapFilterMinLengthRatio: Same as GAFOverlapFilterMinLengthRatio, but applies after conversion to PAF -->
	<!-- maskFilter: any softmasked sequence intervals > than this many bp will be hardmasked before being read by the minigraph mapper [negative value = disable]-->
	<!-- delFilter: any deletions implied by split-read mappings greater than this are removed from the paf (by removing all lines of the smallest block bordering deletion)-->
	<!-- delFilterThreshold: only remove deletion if it costs < delFilterThreshold * deletion-size matches. must be in range (0, 1] -->
	<!-- delFilterQuerySizeThreshold: deletion removed if the supporting query contig has length < delFilterQuerySizeThreshold * deletion-size --> 
	<!-- minIdentity: ignore PAF lines with identity (col 10 / col 11) is less than this -->	
	<!-- removeMinigraphFromPAF: replace all minigraph contigs with transitive alignments in cactus-align-->
	<!-- cpu: use up to this many cpus for each minigraph command. -->
	<graphmap
		 assemblyName="_MINIGRAPH_"
		 minigraphMapOptions="-c -xasm"
		 minigraphConstructOptions="-c -xggs"
		 minigraphSortInput="mash"
		 minMAPQ="5"
		 minGAFBlockLength="250000"
		 minGAFQueryOverlapFilter="0"
		 GAFOverlapFilterRatio="5"
		 GAFOverlapFilterMinLengthRatio="0.25"
		 PAFOverlapFilterRatio="5"
		 PAFOverlapFilterMinLengthRatio="0"
		 maskFilter="-1"
		 delFilter="10000000"
		 delFilterThreshold="0.01"
		 delFilterQuerySizeThreshold="2"
		 minIdentity="0.5"		 
		 removeMinigraphFromPAF="0"
		 cpu="6"
		 />
	<!-- cactus-graphmap-split options -->
	<!-- minQueryCoverages: (space-separated) At least this fraction of input contig must align to reference contig for it to be assigned. -->
	<!-- minQueryCoverageThresholds: (space-separated, exactly 1 fewer than minQueryCoverages). With above, form coverage bins.-->
	<!--          Example:  coverages = "0.99 0.95 0.90" / thresholds = "10000 100000" would give the following bins: -->
	<!--                    0-99999 : min_coverage = 0.99 -->
	<!--                    10000-99999 : min_coverage = 0. 95 -->
	<!--                    100000-infinity : min_coverage = 0.90 -->		  
	<!-- minQueryUniqueness: The ratio of the number of query bases aligned to the chosen ref contig vs the next best ref contig must exceed this threshold to not be considered ambigious. -->
	<!-- ambiguousName: Contigs deemed ambiguous using the above filters get added to a "contig" with this name, and are preserved in output. -->
	<!-- remapSplitOptions: extra rgfa-split options to apply when splitting after remapping (use -u 3000000 to enable autoclipping between contigs, add -s to allow within contig)-->
	<!-- maxRefContigs: The maximum number of auto-detect refContigs -->
	<!-- refContigDropoff: When detecting refContigs, sort in reverse order by size, and pick refContigs up until contig N is this many times smaller than contig 1 (or maxRefContigs) is reached.  The general idea is to have a refContig for each chromosome then lump all the tiny ones into their own graphs. -->
	<!-- refContigRegex: If specified, automatically promote any matching contig name into its own graph, regardless of how small it is.  Useful for making sure chrM doesn't get mixed with KI270322.1 etc. By defaulat it's anything beginning with chr (any case) followed by 0-3 chracters. This takes precedence over refContigDropoff -->
	<!-- otherContigName: When determining refContigs automatically, other contigs get lumped into a graph with this name. This affects the file structure but has zero bearing on whole-genome graphs/indexes -->
  	<graphmap_split
		 minQueryCoverages="0.75 0.5 0.25"
		 minQueryCoverageThresholds="100000 1000000"
		 minQueryUniqueness="2"
		 ambiguousName="_AMBIGUOUS_"
		 remapSplitOptions=""
		 maxRefContigs="128"
		 refContigDropoff="10.0"
		 refContigRegex="[c,C][h,H][r,R].?.?.?"
		 otherContigName="chrOther"
		 />
	<!-- cactus-graphmap-join options -->
	<!-- gfaffix: toggle gfaffix normalization on/off -->
	<!-- clipNonMinigraph: clip out regions if the don't align to minigraph (if disabled, clip if they don't align to anything) -->
	<!-- minimizerOptions: options for vg minimizer -->
	<!-- minFilterFragment: discard fragments that result from the vg clip frequency filter if they are smaller than this -->
	<!-- removeStubs: remove any nodes dangling off reference paths (ie softclips) -->
	<!-- GFANodeIDsInVCF: write all node IDs in the VCF (variant names, AT fields etc.) in the (unchopped) GFA namespace by way of the GBZ translation table -->
	<!-- odgiVizOptions: options to odgi viz, used for 1d viz -->
	<!-- odgiLayoutOptions: options to odgi layout, used for 2d viz -->
	<!-- odgiDrawOptions: options to odgi draw, used for 2d viz -->
	<!-- rindexOptions: options to vg gbwt -r (r-index construction) as used to make haplotype sampling index -->
	<!-- haplOptions: options to vg haplotypes as used to make haplotype sampling index -->
	<graphmap_join
		 gfaffix="1"
		 clipNonMinigraph="1"		 
		 minimizerOptions="-k 29 -w 11"
		 minFilterFragment="1000"
		 removeStubs="1"
		 GFANodeIDsInVCF="1"
		 odgiVizOptions="-x 1500 -y 500 -a 10"
		 odgiLayoutOptions="-x 50"
		 odgiDrawOptions="-H 1000"
		 rindexOptions="-p"
		 haplOptions="-v 2"
		 />
	<!-- hal2vg options -->
	<!-- includeMinigraph: include minigraph node sequences as paths in output (note that cactus-graphmap-join will still remove them by default) -->
	<!-- includeAncestor: include ancestral reference sequences as paths in output -->
	<!-- prependGenomeNames: prepend each output path with the genome name -->
	<!-- hal2vgOptions: options to use in every hal2vg command -->
	<hal2vg
		 includeMinigraph="1"
		 includeAncestor="0"
		 prependGenomeNames="1"
		 hal2vgOptions="--progress --inMemory"
		 />
  	<multi_cactus>
	        <!-- stategy: outgroup selection algorithm, can be one of greedy, greedyLeaves or greedyPreference -->
		<!-- threshold: depth increases by at most k per outgroup -->
		<!-- ancestor_quality_fraction: min fraction of children of ancestor in candidateSet in order for the ancestor to be an outgroup candidate -->
		<!-- max_num_outgroups: maximum number of outgroups per alignment job-->
		<!-- extra_chrom_outgroups: if max_num_outgroups is not sufficent to satisfy sex chromosome requirements of an ancestor while still using nearest species, allow up to this many extra (when -1 set to the number of unique sex chromosomes for the ancestor).  -->
		<outgroup
			strategy="greedyPreference"
			threshold="0"
			ancestor_quality_fraction="0.75"
			max_num_outgroups="2"
			extra_chrom_outgroups="-1"
			/>

		<!-- default_internal_node_prefix: internal nodes of the tree are labeled with this prefix then a BFS order number -->
		<!-- default_branch_len: use this branch length if information is missing from the input tree -->
		<!-- default_branch_len_pangenome: use this branch length (for implicit star tree) in pangenome mode -->
	 	<decomposition
	 		default_internal_node_prefix="T2TApesAnc"
			default_branch_len="1"
			default_branch_len_pangenome="0.025"
	 	/>
  	</multi_cactus>
	<consolidated>
	       <!-- consolidatedMemory: Specify memory requirement (in bytes) for cactus_conslidated. Each attribute corresponds to an interval start, and must be in the form seq_size_TOTAL_INPUT_SEQUENCE_SIZE=MEMORY.  So if cactus_consolidated is getting N input bytes, the first interval containing N is found, and the value of that attribute is used as the Toil memory. The idea is that it will be easier to fiddle with these than a single polynomial function while attempting to learn decent defaults by trial and error. By default, the memory requirement is linearly interpolated based on the endpoints of the interval.  If consMemory CLI option is used, then it overrides everything here.   -->
	 	<consolidatedMemory
	 		seq_size_0="1000000"
			seq_size_10000000="16000000000"
			seq_size_50000000="64000000000"
			seq_size_1000000000="128000000000"
			seq_size_4000000000="256000000000"
			seq_size_10000000000="512000000000"
	 	/>
	</consolidated>
	<consolidated2></consolidated2>
</cactusWorkflowConfig>
