################################################################################### # # # Estimate Expected Motif Frequencies # # ----------------------------------------------- # # # # This protocol will generate 50 random DNA sequences with specified length # # based on a chosen background model and then perform motif scanning in these # # sequences and calculate the occurrence frequency of each motif. # # These frequencies can be used as "expected frequencies" to calculate p-values # # for overrepresentation with the "count motif occurrences" analysis. # # Note that the same scanning method and settings should be used when scanning # # for motifs in these artificial control sequences that you used when analysing # # the actual target sequences. # # # ################################################################################### length = new Numeric Variable(100000) prompt for length "Please select a length for the 50 sequences that will be generated" background = new Background Model(Model:Uniform) prompt for background "Please select a background model for DNA sequence generation" Sequence01 = new Sequence(length) Sequence02 = new Sequence(length) Sequence03 = new Sequence(length) Sequence04 = new Sequence(length) Sequence05 = new Sequence(length) Sequence06 = new Sequence(length) Sequence07 = new Sequence(length) Sequence08 = new Sequence(length) Sequence09 = new Sequence(length) Sequence10 = new Sequence(length) Sequence11 = new Sequence(length) Sequence12 = new Sequence(length) Sequence13 = new Sequence(length) Sequence14 = new Sequence(length) Sequence15 = new Sequence(length) Sequence16 = new Sequence(length) Sequence17 = new Sequence(length) Sequence18 = new Sequence(length) Sequence19 = new Sequence(length) Sequence20 = new Sequence(length) Sequence21 = new Sequence(length) Sequence22 = new Sequence(length) Sequence23 = new Sequence(length) Sequence24 = new Sequence(length) Sequence25 = new Sequence(length) Sequence26 = new Sequence(length) Sequence27 = new Sequence(length) Sequence28 = new Sequence(length) Sequence29 = new Sequence(length) Sequence30 = new Sequence(length) Sequence31 = new Sequence(length) Sequence32 = new Sequence(length) Sequence33 = new Sequence(length) Sequence34 = new Sequence(length) Sequence35 = new Sequence(length) Sequence36 = new Sequence(length) Sequence37 = new Sequence(length) Sequence38 = new Sequence(length) Sequence39 = new Sequence(length) Sequence40 = new Sequence(length) Sequence41 = new Sequence(length) Sequence42 = new Sequence(length) Sequence43 = new Sequence(length) Sequence44 = new Sequence(length) Sequence45 = new Sequence(length) Sequence46 = new Sequence(length) Sequence47 = new Sequence(length) Sequence48 = new Sequence(length) Sequence49 = new Sequence(length) Sequence50 = new Sequence(length) DNA = new DNA Sequence Dataset(background) Motifs = new Motif Collection() prompt for Motifs "Please select motifs to calculate expected frequencies" cutoff = new Numeric Variable(95) prompt for cutoff "Please specify a cutoff threshold [0-100] for motif scanning" # Perform motif scanning. Note that you should use the same scanning method and settings # when scanning for motifs in these artificial control sequences that you used when scanning # for motifs in the actual target sequences BindingSites = motifScanning on DNA with SimpleScanner {Motif Collection=Motifs,Threshold type="Percentage",Threshold=cutoff,Score="Absolute"} # Derive the frequency of each motif and store the results in a Motif Numeric Map ExpectedFrequencies = new Motif Numeric Map(Track:BindingSites,property=Frequency) $display(ExpectedFrequencies)