<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:news="http://www.google.com/schemas/sitemap-news/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"><url><loc>https://scalingthoughts.com/</loc></url><url><loc>https://scalingthoughts.com/about/</loc></url><url><loc>https://scalingthoughts.com/blog/attention-mechanisms-compared/</loc></url><url><loc>https://scalingthoughts.com/blog/chat-templates-hidden-interface/</loc></url><url><loc>https://scalingthoughts.com/blog/continuous-batching-explained/</loc></url><url><loc>https://scalingthoughts.com/blog/flash-attention-in-practice/</loc></url><url><loc>https://scalingthoughts.com/blog/iclr-2026-prompt-translation/</loc></url><url><loc>https://scalingthoughts.com/blog/linear-attention-tradeoffs/</loc></url><url><loc>https://scalingthoughts.com/blog/maximizing-gpu-utilization/</loc></url><url><loc>https://scalingthoughts.com/blog/mixture-of-experts-scaling/</loc></url><url><loc>https://scalingthoughts.com/blog/normalization-placement-matters/</loc></url><url><loc>https://scalingthoughts.com/blog/quantization-for-llm-inference/</loc></url><url><loc>https://scalingthoughts.com/blog/scaling-foundation-model-inference/</loc></url><url><loc>https://scalingthoughts.com/blog/speculative-decoding-explained/</loc></url><url><loc>https://scalingthoughts.com/blog/structural-conventions-across-models/</loc></url><url><loc>https://scalingthoughts.com/blog/tensor-parallelism-fundamentals/</loc></url><url><loc>https://scalingthoughts.com/publications/</loc></url><url><loc>https://scalingthoughts.com/write/</loc></url><url><loc>https://scalingthoughts.com/writing/</loc></url></urlset>