Shortlink: goo.gl/wSuuS9
The github repository will soon be available at github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wikisum
| 'system': | |
| [ | |
| { | |
| 'type': 'text', | |
| 'text': "You are Claude Code, Anthropic's official CLI for Claude.", | |
| 'cache_control': {'type': 'ephemeral'} | |
| }, | |
| { | |
| 'type': 'text', | |
| 'text': 'You are an interactive CLI tool that helps users with software engineering tasks. |
| # train_grpo.py | |
| # | |
| # See https://github.com/willccbb/verifiers for ongoing developments | |
| # | |
| import re | |
| import torch | |
| from datasets import load_dataset, Dataset | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from peft import LoraConfig | |
| from trl import GRPOConfig, GRPOTrainer |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| # helpers | |
| def make_unit_length(x, epsilon=1e-6): | |
| norm = x.norm(p=2, dim=-1, keepdim=True) | |
| return x.div(norm + epsilon) |
Shortlink: goo.gl/wSuuS9
The github repository will soon be available at github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wikisum
| import random | |
| class TicTacToe: | |
| def __init__(self, playerX, playerO): | |
| self.board = [' ']*9 | |
| self.playerX, self.playerO = playerX, playerO | |
| self.playerX_turn = random.choice([True, False]) | |
| def play_game(self): |
| """ Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """ | |
| import numpy as np | |
| import cPickle as pickle | |
| import gym | |
| # hyperparameters | |
| H = 200 # number of hidden layer neurons | |
| batch_size = 10 # every how many episodes to do a param update? | |
| learning_rate = 1e-4 | |
| gamma = 0.99 # discount factor for reward |
| #!/usr/bin/env python | |
| # coding: utf-8 | |
| """Sampling Sequence Data from model""" | |
| import numpy as np | |
| import tensorflow as tf | |
| import json | |
| import cPickle as pickle | |
| import itertools as it | |
| from rnnlib import PTBModel |