Development workflow and aliases

Someone asked me in slack so I figured I’d post some tricks that I use here, would love to hear the tricks of others! What follows is unofficial, opinionated, and maybe not even best practice.

aliases to add to .zshrc (use oh-my-zsh it’s dope)!

install_pl_dev() {
   pip uninstall typing
   pip install -U git+
   pip install typing

### pytest
hft () {
    pytest  -p no:warnings -n auto --dist=loadfile ./tests/ $@

tfork () {
	cd ~/transformers_fork

tmar () {
    RUN_SLOW=1 pytest --tb=short -p no:warnings ./tests/ -ra $@

tmar_tok () {
    RUN_SLOW=1 pytest --tb=short -p no:warnings ./tests/ -ra $@

tbart () {
    #pytest  -p no:warnings ./tests/ -ra $@
    pytest --tb=short -p no:warnings ./tests/ -ra $@
ttf () { 
    pytest  -p no:warnings ./tests/ -ra $@
tbm () {
	pytest  -p no:warnings ./tests/ -ra $@
	RUN_SLOW=1 pytest -p no:warnings tests/ -sv -k mnli
tcnn () {
	RUN_SLOW=1 pytest --tb=short -p no:warnings tests/ -sv -k cnn $@
	pytest  -p no:warnings ./tests/ -ra $@
txsum () {
	RUN_SLOW=1 pytest --tb=short -p no:warnings tests/ -sv -k xsum $@
	# pytest  -p no:warnings ./tests/ -ra $@
tmbart () {
	RUN_SLOW=1 pytest --tb=short -p no:warnings tests/ -sv -k mbart $@
	# pytest  -p no:warnings ./tests/ -ra $@

tenro() {
	RUN_SLOW=1 pytest --tb=short -p no:warnings tests/ -s -k enro $@
	# pytest  -p no:warnings ./tests/ -ra $@
# misc
_checkout_grep() {
	git checkout $1 > /dev/null 2>&1  # surpress Previous HEAD position msg
	git grep $2 | wc -l

check_torch_compat () {
	# check the pytorch compatibility of a function
	# example usage check_torch_compat torch.bool
	cd ~/pytorch/docs
	echo "1.0"
	_checkout_grep v1.0.0 $1
	echo "1.1"
	_checkout_grep v1.1.0 $1
	echo "1.2"
	_checkout_grep v1.2.0 $1
	echo "1.3"
	_checkout_grep v1.3.0 $1
	echo "1.4"
	_checkout_grep v1.4.0 $1
	echo "master"
	_checkout_grep master $1
	cd -  > /dev/null 2>&1

texamples () {
	pytest --tb=short -p no:warnings examples/ $@

sty() {
	make style
	flake8 --ignore=P,E501,E203,W503,E741 examples templates tests src utils

gsync (){
	g fetch upstream
	g merge upstream/master

covg() {
	open "$COVERAGE_URL$1"

gcloud config set project $CUR_PROJ
start_gpu () {
    gcloud compute instances start $CUR_INSTANCE_NAME --project $CUR_PROJ --zone $ZONE
stop_gpu () {
    gcloud compute instances stop $CUR_INSTANCE_NAME --project $CUR_PROJ  --zone $ZONE
export HF_PROJ="FIXME your gcp project name"
hfg_ssh () {
	gcloud beta compute ssh --zone $ZONE $CUR_INSTANCE_NAME --project $CUR_PROJ -- -L 5555:localhost:8888
tidy_ssh () {
	gcloud beta compute ssh --zone $ZONE $CUR_INSTANCE_NAME --project $CUR_PROJ
put_my_s3 () {
	s3cmd put --recursive $1 s3://  $@

# Workon different machines then run hfg_ssh
workon_hfg (){	
	export ZONE='us-central1-a'
workon_pegasus (){
	export CUR_INSTANCE_NAME="notreally-pegasus-vm"
	export ZONE="us-west1-b"
workon_tpu (){
	export ZONE="us-central1-f"
workon_v8 (){
	export CUR_INSTANCE_NAME="shleifer-BLAH"
	export ZONE='us-central1-a'
start_v8 () {
export PYTEST_ADDOPTS='--pdbcls=IPython.terminal.debugger:Pdb'

### AWS/Seq2Seq Stuff
export COVERAGE_URL=""
export h="s3://"
export b="s3://"
export ss="s3://"
export sdbart="s3://sshleifer.logs/dbart"
export sdir=$HOME/transformers_fork/examples/seq2seq/
export CNN_DIR=$sdir/dbart/cnn_dm
export XSUM_DIR=$sdir/dbart/xsum
export ENRO_DIR=$sdir/dbart/wmt_en_ro
export XSUM_URL=""
export XSUM_RAW_S3="s3://sshleifer.logs/dbart/XSUM-EMNLP18-Summary-Data-Original.tar.gz"

aw3 () {
	aws s3 $@
s3ls () {
	aws s3 ls $@



  • fork, called that directory transformers_fork, and clone it to $HOME/ on every machine.
  • use pip install -e .[“dev”] to keep up to date with dependency changes (isort, tokenizers mostly)
  • every time you start a VM I put my dotfiles up there, either with scp or git. I use git for dotfiles and scp for ~/.ssh/
  • When i want to update a branch, I usually run:
git checkout master
gsync  # fetch upstream, merge upstream/master
git checkout <branch>
git merge master

If there are merge conflicts, I fix them in my IDE (vscode is nice, or pycharm cmd-k). I don’t trust git very much with this. The more you run this, the simpler it is to resolve merge conflicts.

Test Driven Development

(my version)

  • I run texamples -k finetune a lot also and try to keep it always green if I am working on examples/seq2seq. mostly on my mac but also on my VM.
  • I also run sty, my make style, isort, flake8 alias, all the time.
  • When I am updating bart, or adding a new model. I write the tests first and then try to get them green one by one. Same with new feature. Test first, add feature. This often includes adding a new check to an existing check.
  • I set tons of ipdb breakpoints for debugging hard things.

Thanks @sshleifer

What’s going on with your install of lightning, though? Shouldn’t it just be…

install_pl_dev() {
   pip uninstall typing
   pip install -U git+
   pip install typing

Also, texamples -k finetune is :fire:

1 Like