<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:video="http://www.google.com/schemas/sitemap-video/1.1">
  <url>
    <loc>https://www.tatta.bio/blog</loc>
    <changefreq>daily</changefreq>
    <priority>0.75</priority>
    <lastmod>2025-10-01</lastmod>
  </url>
  <url>
    <loc>https://www.tatta.bio/blog/o0z8nb07lufe2p9vzgds0epxxigjc1</loc>
    <changefreq>monthly</changefreq>
    <priority>0.5</priority>
    <lastmod>2025-06-04</lastmod>
    <image:image>
      <image:loc>https://images.squarespace-cdn.com/content/v1/65b6ece7b9024f3035250e51/ac453e02-4a70-4a44-b0f2-3af7b40b07ce/blogpost2.png</image:loc>
      <image:title>Blog - Today's sequence data infrastructure is set up for failure in the age of AI. - Make it stand out</image:title>
      <image:caption>Whatever it is, the way you tell your story online can make all the difference.</image:caption>
    </image:image>
  </url>
  <url>
    <loc>https://www.tatta.bio/blog/gaia-agent</loc>
    <changefreq>monthly</changefreq>
    <priority>0.5</priority>
    <lastmod>2025-09-09</lastmod>
    <image:image>
      <image:loc>https://images.squarespace-cdn.com/content/v1/65b6ece7b9024f3035250e51/6372c7c2-030a-4e58-adb4-e2ba6a21c731/agent3.gif</image:loc>
      <image:title>Blog - Gaia Agent: Context-Aware Functional Insights at Scale - Make it stand out</image:title>
      <image:caption>Whatever it is, the way you tell your story online can make all the difference.</image:caption>
    </image:image>
    <image:image>
      <image:loc>https://images.squarespace-cdn.com/content/v1/65b6ece7b9024f3035250e51/219a6063-ea09-4d27-a50f-e61246c185b7/Screenshot+2024-12-14+at+3.20.19%E2%80%AFPM.png</image:loc>
      <image:title>Blog - Gaia Agent: Context-Aware Functional Insights at Scale - Make it stand out</image:title>
      <image:caption>Genomic context of Rv1841c coding gene (Query in green).</image:caption>
    </image:image>
    <image:image>
      <image:loc>https://images.squarespace-cdn.com/content/v1/65b6ece7b9024f3035250e51/bbbb7d53-87a4-4e5e-911b-3b344edb7501/Screenshot+2024-12-14+at+3.56.42%E2%80%AFPM.png</image:loc>
      <image:title>Blog - Gaia Agent: Context-Aware Functional Insights at Scale - Make it stand out</image:title>
      <image:caption>Foldseek-multimer alignment of Rv1841c-Rv1842c complex (yellow) with Methanoculleus sp. magnesium transporter complex (blue).</image:caption>
    </image:image>
    <image:image>
      <image:loc>https://images.squarespace-cdn.com/content/v1/65b6ece7b9024f3035250e51/f46b9de6-cf11-44d0-a242-f9eab21cabe2/Screenshot+2024-12-14+at+3.20.28%E2%80%AFPM.png</image:loc>
      <image:title>Blog - Gaia Agent: Context-Aware Functional Insights at Scale - Make it stand out</image:title>
      <image:caption>Genomic context of Rv1376 coding gene (Query in green). The putative lanthipeptide in red is missed in most Mtb gene calls due to its short length and alternative initiation codon.</image:caption>
    </image:image>
    <image:image>
      <image:loc>https://images.squarespace-cdn.com/content/v1/65b6ece7b9024f3035250e51/b3954089-a884-45bd-8480-f8e0699db26d/Screenshot+2024-12-14+at+2.22.16%E2%80%AFPM.png</image:loc>
      <image:title>Blog - Gaia Agent: Context-Aware Functional Insights at Scale - Make it stand out</image:title>
      <image:caption>RiPPMiner predicts the product of the newly identified ORF to be of lanthipeptide class B.</image:caption>
    </image:image>
  </url>
  <url>
    <loc>https://www.tatta.bio/blog/gaia</loc>
    <changefreq>monthly</changefreq>
    <priority>0.5</priority>
    <lastmod>2025-10-01</lastmod>
    <image:image>
      <image:loc>https://images.squarespace-cdn.com/content/v1/65b6ece7b9024f3035250e51/6e765167-0cac-4bb4-9320-d12ba9eb9fcf/gaia_preview.gif</image:loc>
      <image:title>Blog - Introducing Gaia: Context-Aware Protein Search Across Genomic Datasets - Make it stand out</image:title>
      <image:caption>Whatever it is, the way you tell your story online can make all the difference.</image:caption>
    </image:image>
    <image:image>
      <image:loc>https://images.squarespace-cdn.com/content/v1/65b6ece7b9024f3035250e51/9c9e3aba-10fa-47b4-814b-93287fcab977/gaia_workflow.png</image:loc>
      <image:title>Blog - Introducing Gaia: Context-Aware Protein Search Across Genomic Datasets - Make it stand out</image:title>
      <image:caption>Genomic language modeling for generating Gaia embeddings. Left: gLM2 is a genomic language model trained on multi-gene metagenomic contigs from the  Open MetaGenome (OMG) dataset. gLM2 learns context-aware representations of proteins. Right: To endow structural awareness in gLM representations, we fine-tune gLM2 to align with structural clusters in AlphaFold database (AFDB) clusters.</image:caption>
    </image:image>
    <image:image>
      <image:loc>https://images.squarespace-cdn.com/content/v1/65b6ece7b9024f3035250e51/25a451bd-fdc8-4956-8c2e-dc48ac88bf67/gaia_benchmark.png</image:loc>
      <image:title>Blog - Introducing Gaia: Context-Aware Protein Search Across Genomic Datasets - Make it stand out</image:title>
      <image:caption>Gaia retrieval sensitivities (Recall@K) across three axis of information: Sequence (left), Genomic Context (middle), Protein structure (right). Read our manuscript for detailed benchmarking methods and results.</image:caption>
    </image:image>
  </url>
  <url>
    <loc>https://www.tatta.bio/blog/glm2</loc>
    <changefreq>monthly</changefreq>
    <priority>0.5</priority>
    <lastmod>2024-08-14</lastmod>
    <image:image>
      <image:loc>https://images.squarespace-cdn.com/content/v1/65b6ece7b9024f3035250e51/809b4f69-ac57-4185-ace6-d692275c4d35/Figure4.png</image:loc>
      <image:title>Blog - gLM2: The First Mixed-Modality Genomic Language Model - Make it stand out</image:title>
      <image:caption>Basic schematic of the gLM2 architecture. A gene-called metagenomic contig is first preprocessed into a mixed-modal sequence consisting of coding sequence (protein) elements (in blue) and intergenic sequence (DNA) elements (in gray). The mixed-modal sequence then undergoes masking at 30% and gLM2 is trained with a masked token reconstruction objective.</image:caption>
    </image:image>
    <image:image>
      <image:loc>https://images.squarespace-cdn.com/content/v1/65b6ece7b9024f3035250e51/f7379227-4099-4fdf-8153-8e3bb4edd43a/Figure5.png</image:loc>
      <image:title>Blog - gLM2: The First Mixed-Modality Genomic Language Model - Make it stand out</image:title>
      <image:caption>DGEB Scores for gLM2 (pruned and unpruned) and ESM2. X-axis is the number of FLOPs needed for training (i.e. the amount of compute required to train the model); Y-axis is the combined score on the DGEB benchmarks.</image:caption>
    </image:image>
    <image:image>
      <image:loc>https://images.squarespace-cdn.com/content/v1/65b6ece7b9024f3035250e51/8e4eab06-8e05-4130-b502-4b6341366439/Figure6.png</image:loc>
      <image:title>Blog - gLM2: The First Mixed-Modality Genomic Language Model - Make it stand out</image:title>
      <image:caption>gLM2 Learns ModAC inter-protein contact sites. Protein-protein contact sites between ModA and ModC are learned by gLM2 and the alignment-based model GREMLIN. Evo and ESM2 do not learn these co-evolutionary patterns.</image:caption>
    </image:image>
  </url>
  <url>
    <loc>https://www.tatta.bio/blog/omg</loc>
    <changefreq>monthly</changefreq>
    <priority>0.5</priority>
    <lastmod>2024-08-14</lastmod>
    <image:image>
      <image:loc>https://images.squarespace-cdn.com/content/v1/65b6ece7b9024f3035250e51/c54a50ff-bd2a-40e3-8c08-0a20147da664/Screenshot+2024-08-09+at+5.30.17%E2%80%AFPM.png</image:loc>
      <image:title>Blog - The OMG Dataset: the CommonCrawl of Biological Sequences - Make it stand out</image:title>
      <image:caption>Whatever it is, the way you tell your story online can make all the difference.</image:caption>
    </image:image>
    <image:image>
      <image:loc>https://images.squarespace-cdn.com/content/v1/65b6ece7b9024f3035250e51/91d94c53-3b7d-47c2-bc33-9fc9e97e1ef9/figure1.png</image:loc>
      <image:title>Blog - The OMG Dataset: the CommonCrawl of Biological Sequences - Make it stand out</image:title>
      <image:caption>UMAP visualization of the OMG dataset: A) UMAP visualization of the OG dataset examples, colored by taxonomic phylum. B) Semantic deduplication of the OG dataset, with pruned points highlighted in blue. C) Comparison of the OG and OMG datasets using a random 0.1% subset of each. Notably, the metagenomic data (OMG) exhibits higher diversity. See manuscript for full figure legend.</image:caption>
    </image:image>
  </url>
  <url>
    <loc>https://www.tatta.bio/blog/dgeb</loc>
    <changefreq>monthly</changefreq>
    <priority>0.5</priority>
    <lastmod>2024-08-14</lastmod>
    <image:image>
      <image:loc>https://images.squarespace-cdn.com/content/v1/65b6ece7b9024f3035250e51/a6ed6ce4-6ea4-441e-86be-addf5568ad3d/DGEB_Figure_2.png</image:loc>
      <image:title>Blog - Introducing DGEB: the Diverse Genomic Embedding Benchmark - Make it stand out</image:title>
      <image:caption>Overview of tasks and datasets in DGEB. Nucleic acid (NA) and amino acid (AA) modality specific datasets are marked in purple and green respectively, and datasets that support both modalities are marked with both colors.</image:caption>
    </image:image>
    <image:image>
      <image:loc>https://images.squarespace-cdn.com/content/v1/65b6ece7b9024f3035250e51/9b11446f-a140-48d3-94bc-0045e5998906/DGEB_Figure_1.png</image:loc>
      <image:title>Blog - Introducing DGEB: the Diverse Genomic Embedding Benchmark - Make it stand out</image:title>
      <image:caption>Phylogenetic tree of all phyla represented in DGEB datasets. One representative 16S/18S sequence for each phylum represented in any DGEB dataset was obtained from SILVA, where available. Phylogeny was estimated using iQ-TREE 2. Widths of tree branches correspond to how well a given phylum is represented across multiple datasets.</image:caption>
    </image:image>
    <image:image>
      <image:loc>https://images.squarespace-cdn.com/content/v1/65b6ece7b9024f3035250e51/3d22059c-3175-482d-9f8b-0bbf51bc85e4/DGEB_Figure_4.png</image:loc>
      <image:title>Blog - Introducing DGEB: the Diverse Genomic Embedding Benchmark - Make it stand out</image:title>
      <image:caption>Average performance across all AA and NA tasks for models benchmarked in the DGEB manuscript. Marker size corresponds to embedding dimension and variants of same models (e.g. evo-1-8k-base, and evo-1-131k-base) are distinguished with text labels.</image:caption>
    </image:image>
    <image:image>
      <image:loc>https://images.squarespace-cdn.com/content/v1/65b6ece7b9024f3035250e51/07e9cf70-6165-4ba8-be08-d6b5366e7fb2/dgep_figure_5.png</image:loc>
      <image:title>Blog - Introducing DGEB: the Diverse Genomic Embedding Benchmark - Make it stand out</image:title>
      <image:caption>Comparison of AA and NA model representations on tasks that support both modalities. Marker color corresponds to the model type and point size corresponds to the number of parameters in the model being evaluated.</image:caption>
    </image:image>
  </url>
  <url>
    <loc>https://www.tatta.bio/home</loc>
    <changefreq>daily</changefreq>
    <priority>1.0</priority>
    <lastmod>2025-10-27</lastmod>
  </url>
  <url>
    <loc>https://www.tatta.bio/about</loc>
    <changefreq>daily</changefreq>
    <priority>0.75</priority>
    <lastmod>2025-10-29</lastmod>
  </url>
  <url>
    <loc>https://www.tatta.bio/research</loc>
    <changefreq>daily</changefreq>
    <priority>0.75</priority>
    <lastmod>2026-03-08</lastmod>
  </url>
  <url>
    <loc>https://www.tatta.bio/dgebpaper</loc>
    <changefreq>daily</changefreq>
    <priority>0.75</priority>
    <lastmod>2024-07-17</lastmod>
  </url>
  <url>
    <loc>https://www.tatta.bio/omgpaper</loc>
    <changefreq>daily</changefreq>
    <priority>0.75</priority>
    <lastmod>2024-08-19</lastmod>
  </url>
  <url>
    <loc>https://www.tatta.bio/gaia</loc>
    <changefreq>daily</changefreq>
    <priority>0.75</priority>
    <lastmod>2025-06-24</lastmod>
  </url>
  <url>
    <loc>https://www.tatta.bio/privacy-policy</loc>
    <changefreq>daily</changefreq>
    <priority>0.75</priority>
    <lastmod>2024-11-09</lastmod>
  </url>
  <url>
    <loc>https://www.tatta.bio/terms-of-use</loc>
    <changefreq>daily</changefreq>
    <priority>0.75</priority>
    <lastmod>2024-11-09</lastmod>
  </url>
  <url>
    <loc>https://www.tatta.bio/gaia-faqs</loc>
    <changefreq>daily</changefreq>
    <priority>0.75</priority>
    <lastmod>2024-11-21</lastmod>
  </url>
  <url>
    <loc>https://www.tatta.bio/gaiapaper</loc>
    <changefreq>daily</changefreq>
    <priority>0.75</priority>
    <lastmod>2024-11-21</lastmod>
  </url>
</urlset>

