Phân tích cú pháp tệp genbank Python

Định dạng có các

from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
8 lặp lại [được phân tách bằng
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
9], trong đó mỗi
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
8 là một protein. Mỗi
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
8 có một số phần trong đó có phần
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
2 với một số trường cố định, chẳng hạn như
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
0,
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
1 và
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
2, với các giá trị đề cập đến thông tin cụ thể cho
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
8 đó

Tôi quan tâm đến việc sử dụng

from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
5 của
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
4 để phân tích cú pháp tệp này thành một
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
6 liệt kê cho từng ID bản ghi, các giá trị của
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
7,
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
8 và
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
9 từ trường
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
1 của nó, các giá trị
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
01 và
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
8 từ trường
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
0 và giá trị
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
8 của nó . Ngoại trừ trường
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
06, có thể xuất hiện nhiều lần trong phần
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
2 của
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
8, các trường
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
1 và
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
0 chỉ xuất hiện một lần trong phần
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
2 của
from Bio import SeqIO
filename = "example.protein.gpff"
for record in SeqIO.parse[filename, "genbank"]:
  for feature in record.features:
    if feature.type == "CDS":
      symbol = feature.qualifiers.get["gene", ["???"]][0]
      gene_id = feature.qualifiers.get["db_xref", ["???"]][0]
      gene_id = re.sub['GeneID:', '', gene_id]
      transcript_id = feature.qualifiers.get["coded_by", ["???"]][0]
      transcript_id = re.sub[':.*', '', transcript_id]
      if feature.type == "source":
        species_name = feature.qualifiers.get["organism", ["???"]][0]
        species_id = feature.qualifiers.get["db_xref", ["???"]][0]
        species_id = re.sub['taxon:', '', species_id]
      if feature.type == "Region":
        cdd_id = feature.qualifiers.get["db_xref", ["???"]][0]
        cdd_id = re.sub['CDD:', '', cdd_id]
      print["%s,%s,%s,%s,%s,%s,%s" % [record.id, cdd_id, transcript_id, symbol, gene_id, species_name, species_id]]
8

Định dạng tệp Genbank là gì?

Định dạng Ngân hàng gen cho phép lưu trữ thông tin ngoài trình tự DNA/protein . Nó chứa nhiều thông tin hơn định dạng FASTA. Các định dạng tương tự như Genbank đã được phát triển bởi ENA [định dạng EMBL] và bởi DDBJ [định dạng DDBJ].

SeqIO là gì?

SeqIO cung cấp giao diện thống nhất đơn giản để nhập và xuất các định dạng tệp trình tự khác nhau [bao gồm nhiều sắp xếp trình tự], nhưng sẽ chỉ xử lý các trình tự . Có một giao diện chị em Bio. AlignIO để làm việc trực tiếp với các tệp căn chỉnh trình tự dưới dạng đối tượng Căn chỉnh.

Chủ Đề