Development workflow and aliases

Someone asked me in slack so I figured I’d post some tricks that I use here, would love to hear the tricks of others! What follows is unofficial, opinionated, and maybe not even best practice.

aliases to add to .zshrc (use oh-my-zsh it’s dope)!


install_pl_dev() {
   pip uninstall typing
   pip install -U git+https://github.com/PyTorchLightning/pytorch-lightning.git
   pip install typing
}

### pytest
hft () {
    pytest  -p no:warnings -n auto --dist=loadfile ./tests/ $@
}

tfork () {
	cd ~/transformers_fork
}

tmar () {
    RUN_SLOW=1 pytest --tb=short -p no:warnings ./tests/test_modeling_marian.py -ra $@
}

tmar_tok () {
    RUN_SLOW=1 pytest --tb=short -p no:warnings ./tests/test_tokenization_marian.py -ra $@
}

tbart () {
    #pytest  -p no:warnings ./tests/test_modeling_bart.py -ra $@
    pytest --tb=short -p no:warnings ./tests/test_modeling_bart.py -ra $@
}
ttf () { 
    pytest  -p no:warnings ./tests/test_modeling_tf_bart.py -ra $@
}
tbm () {
	pytest  -p no:warnings ./tests/test_modeling_bart.py -ra $@
	RUN_SLOW=1 pytest -p no:warnings tests/test_modeling_bart.py -sv -k mnli
}
tcnn () {
	RUN_SLOW=1 pytest --tb=short -p no:warnings tests/test_modeling_bart.py -sv -k cnn $@
	pytest  -p no:warnings ./tests/test_modeling_bart.py -ra $@
	
}
txsum () {
	RUN_SLOW=1 pytest --tb=short -p no:warnings tests/test_modeling_bart.py -sv -k xsum $@
	# pytest  -p no:warnings ./tests/test_modeling_bart.py -ra $@
	
}
tmbart () {
	RUN_SLOW=1 pytest --tb=short -p no:warnings tests/test_modeling_bart.py -sv -k mbart $@
	# pytest  -p no:warnings ./tests/test_modeling_bart.py -ra $@
	
}

tenro() {
	RUN_SLOW=1 pytest --tb=short -p no:warnings tests/test_modeling_bart.py -s -k enro $@
	# pytest  -p no:warnings ./tests/test_modeling_bart.py -ra $@
	
}
# misc
_checkout_grep() {
	git checkout $1 > /dev/null 2>&1  # surpress Previous HEAD position msg
	git grep $2 | wc -l
}

check_torch_compat () {
	# check the pytorch compatibility of a function
	# example usage check_torch_compat torch.bool
	cd ~/pytorch/docs
	echo "1.0"
	_checkout_grep v1.0.0 $1
	echo "1.1"
	_checkout_grep v1.1.0 $1
	echo "1.2"
	_checkout_grep v1.2.0 $1
	echo "1.3"
	_checkout_grep v1.3.0 $1
	echo "1.4"
	_checkout_grep v1.4.0 $1
	echo "master"
	_checkout_grep master $1
	cd -  > /dev/null 2>&1
}


texamples () {
	pytest --tb=short -p no:warnings examples/ $@
}


sty() {
	make style
	flake8 --ignore=P,E501,E203,W503,E741 examples templates tests src utils
}

gsync (){
	g fetch upstream
	g merge upstream/master
}

covg() {
	open "$COVERAGE_URL$1"
}

# GCP STUFF
export CUR_PROJ="YOUR GCP PROJECT"
gcloud config set project $CUR_PROJ
start_gpu () {
    gcloud compute instances start $CUR_INSTANCE_NAME --project $CUR_PROJ --zone $ZONE
}
stop_gpu () {
    gcloud compute instances stop $CUR_INSTANCE_NAME --project $CUR_PROJ  --zone $ZONE
}
export HF_PROJ="FIXME your gcp project name"
hfg_ssh () {
	gcloud beta compute ssh --zone $ZONE $CUR_INSTANCE_NAME --project $CUR_PROJ -- -L 5555:localhost:8888
}
tidy_ssh () {
	gcloud beta compute ssh --zone $ZONE $CUR_INSTANCE_NAME --project $CUR_PROJ
}
put_my_s3 () {
	s3cmd put --recursive $1 s3://models.huggingface.co/bert/sshleifer/  $@
}

# Workon different machines then run hfg_ssh
workon_hfg (){	
	export CUR_INSTANCE_NAME="shleifer-MYSICKGPU"
	export ZONE='us-central1-a'
}
workon_pegasus (){
	export CUR_INSTANCE_NAME="notreally-pegasus-vm"
	export ZONE="us-west1-b"
}
workon_tpu (){
	export CUR_INSTANCE_NAME="shleifer-HUGETPUCLUSTERFORMAKEAGI"
	export ZONE="us-central1-f"
}
workon_v8 (){
	export CUR_INSTANCE_NAME="shleifer-BLAH"
	export ZONE='us-central1-a'
}
start_v8 () {
	workon_v8
	start_gpu
}
export TOKENIZERS_PARALLELISM=false
export PYTEST_ADDOPTS='--pdbcls=IPython.terminal.debugger:Pdb'


### AWS/Seq2Seq Stuff
export COVERAGE_URL="https://codecov.io/gh/huggingface/transformers/src/master/src/transformers/"
export h="s3://models.huggingface.co/bert/Helsinki-NLP"
export b="s3://models.huggingface.co/bert"
export ss="s3://models.huggingface.co/bert/sshleifer"
export sdbart="s3://sshleifer.logs/dbart"
export sdir=$HOME/transformers_fork/examples/seq2seq/
export CNN_DIR=$sdir/dbart/cnn_dm
export XSUM_DIR=$sdir/dbart/xsum
export ENRO_DIR=$sdir/dbart/wmt_en_ro
export XSUM_URL="http://bollin.inf.ed.ac.uk/public/direct/XSUM-EMNLP18-Summary-Data-Original.tar.gz"
export XSUM_RAW_S3="s3://sshleifer.logs/dbart/XSUM-EMNLP18-Summary-Data-Original.tar.gz"

aw3 () {
	aws s3 $@
}
s3ls () {
	aws s3 ls $@

}

Misc-tips:

  • fork, called that directory transformers_fork, and clone it to $HOME/ on every machine.
  • use pip install -e .[“dev”] to keep up to date with dependency changes (isort, tokenizers mostly)
  • every time you start a VM I put my dotfiles up there, either with scp or git. I use git for dotfiles and scp for ~/.ssh/
  • When i want to update a branch, I usually run:
git checkout master
gsync  # fetch upstream, merge upstream/master
git checkout <branch>
git merge master

If there are merge conflicts, I fix them in my IDE (vscode is nice, or pycharm cmd-k). I don’t trust git very much with this. The more you run this, the simpler it is to resolve merge conflicts.

Test Driven Development

(my version)

  • I run texamples -k finetune a lot also and try to keep it always green if I am working on examples/seq2seq. mostly on my mac but also on my VM.
  • I also run sty, my make style, isort, flake8 alias, all the time.
  • When I am updating bart, or adding a new model. I write the tests first and then try to get them green one by one. Same with new feature. Test first, add feature. This often includes adding a new check to an existing check.
  • I set tons of ipdb breakpoints for debugging hard things.
3 Likes

Thanks @sshleifer

What’s going on with your install of lightning, though? Shouldn’t it just be…

install_pl_dev() {
   pip uninstall typing
   pip install -U git+https://github.com/PyTorchLightning/pytorch-lightning.git
   pip install typing
}

Also, texamples -k finetune is :fire:

1 Like