A "Best of the Best Practices" (BOBP) guide to developing in Python.
- "Build tools for others that you want to be built for you." - Kenneth Reitz
- "Simplicity is alway better than functionality." - Pieter Hintjens
| # train_grpo.py | |
| # | |
| # See https://github.com/willccbb/verifiers for ongoing developments | |
| # | |
| import re | |
| import torch | |
| from datasets import load_dataset, Dataset | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from peft import LoraConfig | |
| from trl import GRPOConfig, GRPOTrainer |
| # Advent of Code template by @MathisHammel | |
| # TODO | |
| # - Make a snapshot of the file when a submission is correct | |
| # - Display the rank when submission is accepted | |
| # - Utility function to rotate/flip a 2D array | |
| # - Cycle length detector/extrapolator to make loops faster | |
| # - Put examples in cache | |
| # - Warning if DAY is not the current day |
| <!doctype html> | |
| <html> | |
| <head> | |
| <title>Site Maintenance</title> | |
| <meta charset="utf-8"/> | |
| <meta name="robots" content="noindex"/> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <style> | |
| body { text-align: center; padding: 150px; } | |
| h1 { font-size: 50px; } |