diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 2f8841c62ff507d1b33a3fa68a19b8e63016d605..5c9a3241ce214cb8f72893413982256f904a48b5 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -8,6 +8,8 @@
 		}
 	},
 
+	"hostRequirements": { "cpus": 4, "memory": "8gb", "storage": "100gb"},
+
 	// Configure tool-specific properties.
 	"customizations": {
 		// Configure properties specific to VS Code.
diff --git a/.devcontainer/requirements.txt b/.devcontainer/requirements.txt
index 36f009a4fe6c0d7d7d51bba186ed7c69cc7391e9..935d6db666436a2a6950f9a821b87f82ba16008d 100644
--- a/.devcontainer/requirements.txt
+++ b/.devcontainer/requirements.txt
@@ -21,7 +21,8 @@ tokenizers==0.10.3
 torch==1.11.0
 torchaudio
 torchinfo
-torchtext
-torchvision
+torchtext==0.12.0
+torchvision==0.12.0
+torchdata
 tqdm==4.62.3
 transformers==4.3.3
diff --git a/README.md b/README.md
index e54db339786ba5878e95e4089f05f4c6176b5b89..ced7a3b9a7d613511b21458bc21ec90e9bd496db 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,7 @@
 [![GitHub watchers](https://img.shields.io/github/watchers/microsoft/AI-For-Beginners.svg?style=social&label=Watch)](https://GitHub.com/microsoft/AI-For-Beginners/watchers/)
 [![GitHub forks](https://img.shields.io/github/forks/microsoft/AI-For-Beginners.svg?style=social&label=Fork)](https://GitHub.com/microsoft/AI-For-Beginners/network/)
 [![GitHub stars](https://img.shields.io/github/stars/microsoft/AI-For-Beginners.svg?style=social&label=Star)](https://GitHub.com/microsoft/AI-For-Beginners/stargazers/)
+[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/microsoft/ai-for-beginners/HEAD)
 
 # Artificial Intelligence for Beginners - A Curriculum
 
diff --git a/binder/requirements.txt b/binder/requirements.txt
index 36f009a4fe6c0d7d7d51bba186ed7c69cc7391e9..935d6db666436a2a6950f9a821b87f82ba16008d 100644
--- a/binder/requirements.txt
+++ b/binder/requirements.txt
@@ -21,7 +21,8 @@ tokenizers==0.10.3
 torch==1.11.0
 torchaudio
 torchinfo
-torchtext
-torchvision
+torchtext==0.12.0
+torchvision==0.12.0
+torchdata
 tqdm==4.62.3
 transformers==4.3.3
diff --git a/lessons/5-NLP/14-Embeddings/torchnlp.py b/lessons/5-NLP/14-Embeddings/torchnlp.py
index d6ca5e0c19c08862edc19d7720ae9d66d364b26a..cd709f0d792fe3018a1f219509db3f4481d019e9 100644
--- a/lessons/5-NLP/14-Embeddings/torchnlp.py
+++ b/lessons/5-NLP/14-Embeddings/torchnlp.py
@@ -20,12 +20,12 @@ def load_dataset(ngrams=1,min_freq=1):
     counter = collections.Counter()
     for (label, line) in train_dataset:
         counter.update(torchtext.data.utils.ngrams_iterator(tokenizer(line),ngrams=ngrams))
-    vocab = torchtext.vocab.Vocab(counter, min_freq=min_freq)
+    vocab = torchtext.vocab.vocab(counter, min_freq=min_freq)
     return train_dataset,test_dataset,classes,vocab
 
 def encode(x,voc=None,unk=0,tokenizer=tokenizer):
     v = vocab if voc is None else voc
-    return [v.stoi.get(s,unk) for s in tokenizer(x)]
+    return [v.get_stoi().get(s,unk) for s in tokenizer(x)]
 
 def train_epoch(net,dataloader,lr=0.01,optimizer=None,loss_fn = torch.nn.CrossEntropyLoss(),epoch_size=None, report_freq=200):
     optimizer = optimizer or torch.optim.Adam(net.parameters(),lr=lr)
diff --git a/lessons/5-NLP/16-RNN/torchnlp.py b/lessons/5-NLP/16-RNN/torchnlp.py
index d6ca5e0c19c08862edc19d7720ae9d66d364b26a..cd709f0d792fe3018a1f219509db3f4481d019e9 100644
--- a/lessons/5-NLP/16-RNN/torchnlp.py
+++ b/lessons/5-NLP/16-RNN/torchnlp.py
@@ -20,12 +20,12 @@ def load_dataset(ngrams=1,min_freq=1):
     counter = collections.Counter()
     for (label, line) in train_dataset:
         counter.update(torchtext.data.utils.ngrams_iterator(tokenizer(line),ngrams=ngrams))
-    vocab = torchtext.vocab.Vocab(counter, min_freq=min_freq)
+    vocab = torchtext.vocab.vocab(counter, min_freq=min_freq)
     return train_dataset,test_dataset,classes,vocab
 
 def encode(x,voc=None,unk=0,tokenizer=tokenizer):
     v = vocab if voc is None else voc
-    return [v.stoi.get(s,unk) for s in tokenizer(x)]
+    return [v.get_stoi().get(s,unk) for s in tokenizer(x)]
 
 def train_epoch(net,dataloader,lr=0.01,optimizer=None,loss_fn = torch.nn.CrossEntropyLoss(),epoch_size=None, report_freq=200):
     optimizer = optimizer or torch.optim.Adam(net.parameters(),lr=lr)
diff --git a/lessons/5-NLP/17-GenerativeNetworks/GenerativePyTorch.ipynb b/lessons/5-NLP/17-GenerativeNetworks/GenerativePyTorch.ipynb
index 04a069d8aed315a496d3f31b34ceddbc10e5d6f5..8420b357ae03511610c1c62a49f832c6f67c3f76 100644
--- a/lessons/5-NLP/17-GenerativeNetworks/GenerativePyTorch.ipynb
+++ b/lessons/5-NLP/17-GenerativeNetworks/GenerativePyTorch.ipynb
@@ -53,9 +53,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Vocabulary size = 84\n",
-      "Encoding of 'a' is 4\n",
-      "Character with code 13 is h\n"
+      "Vocabulary size = 82\n",
+      "Encoding of 'a' is 1\n",
+      "Character with code 13 is c\n"
      ]
     }
    ],
@@ -66,12 +66,12 @@
     "counter = collections.Counter()\n",
     "for (label, line) in train_dataset:\n",
     "    counter.update(char_tokenizer(line))\n",
-    "vocab = torchtext.vocab.Vocab(counter)\n",
+    "vocab = torchtext.vocab.vocab(counter)\n",
     "\n",
     "vocab_size = len(vocab)\n",
     "print(f\"Vocabulary size = {vocab_size}\")\n",
-    "print(f\"Encoding of 'a' is {vocab.stoi['a']}\")\n",
-    "print(f\"Character with code 13 is {vocab.itos[13]}\")"
+    "print(f\"Encoding of 'a' is {vocab.get_stoi()['a']}\")\n",
+    "print(f\"Character with code 13 is {vocab.get_itos()[13]}\")"
    ]
   },
   {
@@ -83,23 +83,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "tensor([43,  4, 11, 11,  2, 26,  5, 23,  2, 38,  3,  4, 10,  9,  2, 31, 11,  4,\n",
-       "        21,  2, 38,  4, 14, 25,  2, 34,  8,  5,  6,  2,  5, 13,  3,  2, 38, 11,\n",
-       "         4, 14, 25,  2, 55, 37,  3, 15,  5,  3, 10,  9, 56,  2, 37,  3, 15,  5,\n",
-       "         3, 10,  9,  2, 29,  2, 26, 13,  6, 10,  5, 29,  9,  3, 11, 11,  3, 10,\n",
-       "         9, 27,  2, 43,  4, 11, 11,  2, 26,  5, 10,  3,  3,  5, 58,  9,  2, 12,\n",
-       "        21,  7,  8, 12, 11,  7,  8, 18, 61, 22,  4,  8, 12,  2,  6, 19,  2, 15,\n",
-       "        11,  5, 10,  4, 29, 14, 20,  8,  7, 14,  9, 27,  2,  4, 10,  3,  2,  9,\n",
-       "         3,  3,  7,  8, 18,  2, 18, 10,  3,  3,  8,  2,  4, 18,  4,  7,  8, 23])"
+       "tensor([ 0,  1,  2,  2,  3,  4,  5,  6,  3,  7,  8,  1,  9, 10,  3, 11,  2,  1,\n",
+       "        12,  3,  7,  1, 13, 14,  3, 15, 16,  5, 17,  3,  5, 18,  8,  3,  7,  2,\n",
+       "         1, 13, 14,  3, 19, 20,  8, 21,  5,  8,  9, 10, 22,  3, 20,  8, 21,  5,\n",
+       "         8,  9, 10,  3, 23,  3,  4, 18, 17,  9,  5, 23, 10,  8,  2,  2,  8,  9,\n",
+       "        10, 24,  3,  0,  1,  2,  2,  3,  4,  5,  9,  8,  8,  5, 25, 10,  3, 26,\n",
+       "        12, 27, 16, 26,  2, 27, 16, 28, 29, 30,  1, 16, 26,  3, 17, 31,  3, 21,\n",
+       "         2,  5,  9,  1, 23, 13, 32, 16, 27, 13, 10, 24,  3,  1,  9,  8,  3, 10,\n",
+       "         8,  8, 27, 16, 28,  3, 28,  9,  8,  8, 16,  3,  1, 28,  1, 27, 16,  6])"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -128,29 +128,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(tensor([[43,  4, 11,  ..., 18, 61, 22],\n",
-       "         [ 4, 11, 11,  ..., 61, 22,  4],\n",
-       "         [11, 11,  2,  ..., 22,  4,  8],\n",
+       "(tensor([[ 0,  1,  2,  ..., 28, 29, 30],\n",
+       "         [ 1,  2,  2,  ..., 29, 30,  1],\n",
+       "         [ 2,  2,  3,  ..., 30,  1, 16],\n",
        "         ...,\n",
-       "         [37,  3, 15,  ...,  4, 18,  4],\n",
-       "         [ 3, 15,  5,  ..., 18,  4,  7],\n",
-       "         [15,  5,  3,  ...,  4,  7,  8]], device='cuda:0'),\n",
-       " tensor([[ 4, 11, 11,  ..., 61, 22,  4],\n",
-       "         [11, 11,  2,  ..., 22,  4,  8],\n",
-       "         [11,  2, 26,  ...,  4,  8, 12],\n",
+       "         [20,  8, 21,  ...,  1, 28,  1],\n",
+       "         [ 8, 21,  5,  ..., 28,  1, 27],\n",
+       "         [21,  5,  8,  ...,  1, 27, 16]]),\n",
+       " tensor([[ 1,  2,  2,  ..., 29, 30,  1],\n",
+       "         [ 2,  2,  3,  ..., 30,  1, 16],\n",
+       "         [ 2,  3,  4,  ...,  1, 16, 26],\n",
        "         ...,\n",
-       "         [ 3, 15,  5,  ..., 18,  4,  7],\n",
-       "         [15,  5,  3,  ...,  4,  7,  8],\n",
-       "         [ 5,  3, 10,  ...,  7,  8, 23]], device='cuda:0'))"
+       "         [ 8, 21,  5,  ..., 28,  1, 27],\n",
+       "         [21,  5,  8,  ...,  1, 27, 16],\n",
+       "         [ 5,  8,  9,  ..., 27, 16,  6]]))"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -180,7 +180,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -207,7 +207,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -216,7 +216,7 @@
     "        out, s = net(enc(chars).view(1,-1).to(device))\n",
     "        for i in range(size):\n",
     "            nc = torch.argmax(out[0][-1])\n",
-    "            chars.append(vocab.itos[nc])\n",
+    "            chars.append(vocab.get_itos()[nc])\n",
     "            out, s = net(nc.view(1,-1),s)\n",
     "        return ''.join(chars)"
    ]
@@ -234,35 +234,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Current loss = 4.442246913909912\n",
-      "today ggrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrg\n",
-      "Current loss = 2.1178359985351562\n",
-      "today and a could a the to the to the to the to the to the to the to the to the to the to the to the to th\n",
-      "Current loss = 1.6465336084365845\n",
-      "today on Tuesday the company to the United States and a policing to the United States and a policing to th\n",
-      "Current loss = 2.3716814517974854\n",
-      "today to the United States and a new men to the United States and a new men to the United States and a new\n",
-      "Current loss = 1.6844098567962646\n",
-      "today of the first the first the first the first the first the first the first the first the first the fir\n",
-      "Current loss = 1.702707052230835\n",
-      "today of the United States a said the United States a said the United States a said the United States a sa\n",
-      "Current loss = 1.9633255004882812\n",
-      "today of the first the first the first the first the first the first the first the first the first the fir\n",
-      "Current loss = 1.8642014265060425\n",
-      "today of the second a second a second a second a second a second a second a second a second a second a sec\n",
-      "Current loss = 1.7720613479614258\n",
-      "today and and and the company of the company of the company of the company of the company of the company o\n",
-      "Current loss = 1.52818763256073\n",
-      "today and the company of the company of the company of the company of the company of the company of the co\n",
-      "Current loss = 1.5444810390472412\n",
-      "today and the counters to the first the counters to the first the counters to the first the counters to th\n"
+      "Current loss = 4.398899078369141\n",
+      "today sr sr sr sr sr sr sr sr sr sr sr sr sr sr sr sr sr sr sr sr sr sr sr sr sr sr sr sr sr sr sr sr sr s\n"
      ]
     }
    ],
@@ -355,7 +335,7 @@
     "            #nc = torch.argmax(out[0][-1])\n",
     "            out_dist = out[0][-1].div(temperature).exp()\n",
     "            nc = torch.multinomial(out_dist,1)[0]\n",
-    "            chars.append(vocab.itos[nc])\n",
+    "            chars.append(vocab.get_itos()[nc])\n",
     "            out, s = net(nc.view(1,-1),s)\n",
     "        return ''.join(chars)\n",
     "    \n",
@@ -372,10 +352,13 @@
   }
  ],
  "metadata": {
+  "interpreter": {
+   "hash": "16af2a8bbb083ea23e5e41c7f5787656b2ce26968575d8763f2c4b17f9cd711f"
+  },
   "kernelspec": {
-   "display_name": "py37_pytorch",
+   "display_name": "Python 3.8.12 ('py38')",
    "language": "python",
-   "name": "conda-env-py37_pytorch-py"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -387,7 +370,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.7"
+   "version": "3.8.12"
   }
  },
  "nbformat": 4,
diff --git a/lessons/5-NLP/17-GenerativeNetworks/GenerativeTF.ipynb b/lessons/5-NLP/17-GenerativeNetworks/GenerativeTF.ipynb
index 2b2e95babc0152042762a447328d42e7cf2483d8..67ebf03278d89c627b7ad923e394a0c9cad20ac5 100644
--- a/lessons/5-NLP/17-GenerativeNetworks/GenerativeTF.ipynb
+++ b/lessons/5-NLP/17-GenerativeNetworks/GenerativeTF.ipynb
@@ -455,10 +455,13 @@
   }
  ],
  "metadata": {
+  "interpreter": {
+   "hash": "16af2a8bbb083ea23e5e41c7f5787656b2ce26968575d8763f2c4b17f9cd711f"
+  },
   "kernelspec": {
-   "display_name": "py38_tensorflow",
+   "display_name": "Python 3.8.12 ('py38')",
    "language": "python",
-   "name": "conda-env-py38_tensorflow-py"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -470,7 +473,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.10"
+   "version": "3.8.12"
   }
  },
  "nbformat": 4,
diff --git a/lessons/5-NLP/17-GenerativeNetworks/torchnlp.py b/lessons/5-NLP/17-GenerativeNetworks/torchnlp.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd709f0d792fe3018a1f219509db3f4481d019e9
--- /dev/null
+++ b/lessons/5-NLP/17-GenerativeNetworks/torchnlp.py
@@ -0,0 +1,104 @@
+import builtins
+import torch
+import torchtext
+import collections
+import os
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+vocab = None
+tokenizer = torchtext.data.utils.get_tokenizer('basic_english')
+
+def load_dataset(ngrams=1,min_freq=1):
+    global vocab, tokenizer
+    print("Loading dataset...")
+    train_dataset, test_dataset = torchtext.datasets.AG_NEWS(root='./data')
+    train_dataset = list(train_dataset)
+    test_dataset = list(test_dataset)
+    classes = ['World', 'Sports', 'Business', 'Sci/Tech']
+    print('Building vocab...')
+    counter = collections.Counter()
+    for (label, line) in train_dataset:
+        counter.update(torchtext.data.utils.ngrams_iterator(tokenizer(line),ngrams=ngrams))
+    vocab = torchtext.vocab.vocab(counter, min_freq=min_freq)
+    return train_dataset,test_dataset,classes,vocab
+
+def encode(x,voc=None,unk=0,tokenizer=tokenizer):
+    v = vocab if voc is None else voc
+    return [v.get_stoi().get(s,unk) for s in tokenizer(x)]
+
+def train_epoch(net,dataloader,lr=0.01,optimizer=None,loss_fn = torch.nn.CrossEntropyLoss(),epoch_size=None, report_freq=200):
+    optimizer = optimizer or torch.optim.Adam(net.parameters(),lr=lr)
+    loss_fn = loss_fn.to(device)
+    net.train()
+    total_loss,acc,count,i = 0,0,0,0
+    for labels,features in dataloader:
+        optimizer.zero_grad()
+        features, labels = features.to(device), labels.to(device)
+        out = net(features)
+        loss = loss_fn(out,labels) #cross_entropy(out,labels)
+        loss.backward()
+        optimizer.step()
+        total_loss+=loss
+        _,predicted = torch.max(out,1)
+        acc+=(predicted==labels).sum()
+        count+=len(labels)
+        i+=1
+        if i%report_freq==0:
+            print(f"{count}: acc={acc.item()/count}")
+        if epoch_size and count>epoch_size:
+            break
+    return total_loss.item()/count, acc.item()/count
+
+def padify(b,voc=None,tokenizer=tokenizer):
+    # b is the list of tuples of length batch_size
+    #   - first element of a tuple = label, 
+    #   - second = feature (text sequence)
+    # build vectorized sequence
+    v = [encode(x[1],voc=voc,tokenizer=tokenizer) for x in b]
+    # compute max length of a sequence in this minibatch
+    l = max(map(len,v))
+    return ( # tuple of two tensors - labels and features
+        torch.LongTensor([t[0]-1 for t in b]),
+        torch.stack([torch.nn.functional.pad(torch.tensor(t),(0,l-len(t)),mode='constant',value=0) for t in v])
+    )
+
+def offsetify(b,voc=None):
+    # first, compute data tensor from all sequences
+    x = [torch.tensor(encode(t[1],voc=voc)) for t in b]
+    # now, compute the offsets by accumulating the tensor of sequence lengths
+    o = [0] + [len(t) for t in x]
+    o = torch.tensor(o[:-1]).cumsum(dim=0)
+    return ( 
+        torch.LongTensor([t[0]-1 for t in b]), # labels
+        torch.cat(x), # text 
+        o
+    )
+
+def train_epoch_emb(net,dataloader,lr=0.01,optimizer=None,loss_fn = torch.nn.CrossEntropyLoss(),epoch_size=None, report_freq=200,use_pack_sequence=False):
+    optimizer = optimizer or torch.optim.Adam(net.parameters(),lr=lr)
+    loss_fn = loss_fn.to(device)
+    net.train()
+    total_loss,acc,count,i = 0,0,0,0
+    for labels,text,off in dataloader:
+        optimizer.zero_grad()
+        labels,text = labels.to(device), text.to(device)
+        if use_pack_sequence:
+            off = off.to('cpu')
+        else:
+            off = off.to(device)
+        out = net(text, off)
+        loss = loss_fn(out,labels) #cross_entropy(out,labels)
+        loss.backward()
+        optimizer.step()
+        total_loss+=loss
+        _,predicted = torch.max(out,1)
+        acc+=(predicted==labels).sum()
+        count+=len(labels)
+        i+=1
+        if i%report_freq==0:
+            print(f"{count}: acc={acc.item()/count}")
+        if epoch_size and count>epoch_size:
+            break
+    return total_loss.item()/count, acc.item()/count
+