4 жил өмнө · 4075c19e0a
--- a/README.md
+++ b/README.md
@@ -7,6 +7,8 @@ Official repository for the paper [Real-Time High-Resolution Background Matting]
 
				 * [Visit project site](https://grail.cs.washington.edu/projects/background-matting-v2/)
			
 
				 * [Watch project video](https://www.youtube.com/watch?v=oMfPTeYDF9g)
			
 
				 
			
 
				+**Disclaimer**: The video conversion script in this repo is not meant be real-time. Our research's main contribution is the neural architecture for high resolution refinement and the new matting datasets. The `inference_speed_test.py` script allows you to measure the tensor throughput of our model, which should achieve real-time. The `inference_video.py` script allows you to test your video on our model, but the video encoding and decoding is done without hardware acceleration and parallization. For production use, you are expected to do additional engineering for hardware encoding/decoding and loading frames to GPU in parallel. For more architecture detail, please refer to our paper.
			
 
				+
			
 
				 &nbsp;
			
 
				 
			
 
				 ## Overview
			
@@ -28,6 +30,7 @@ Official repository for the paper [Real-Time High-Resolution Background Matting]
 
				 
			
 
				 ## Updates
			
 
				 
			
 
				+* [Apr 21 2021] VideoMatte240K dataset is now published.
			
 
				 * [Mar 06 2021] Training script is published.
			
 
				 * [Feb 28 2021] Paper is accepted to CVPR 2021.
			
 
				 * [Jan 09 2021] PhotoMatte85 dataset is now published.
			
@@ -49,9 +52,7 @@ Official repository for the paper [Real-Time High-Resolution Background Matting]
 
				 
			
 
				 ### Datasets
			
 
				 
			
 
				-* [PhotoMatte85](https://drive.google.com/file/d/1KpHKYW986Dax9-ZIM7I-HyBoWVcLPuaQ/view?usp=sharing)
			
 
				-* VideoMatte240K (We are still dealing with licensing. In the meantime, you can visit [storyblocks.com](https://www.storyblocks.com/video/search/green+screen+human?max_duration=10000&sort=most_relevant&video_quality=HD) to download raw green screen videos and recreate the dataset yourself.)
			
 
				-
			
 
				+* [Download datasets](https://grail.cs.washington.edu/projects/background-matting-v2/#/datasets)
			
 
				 
			
 
				 &nbsp;
			
 
				 
			
--- a/model/refiner.py
+++ b/model/refiner.py
@@ -219,8 +219,8 @@ class Refiner(nn.Module):
 
				             return torchvision.ops.roi_align(x, boxes, size + 2 * padding, sampling_ratio=1)
			
 
				         else:
			
 
				             # Use gather. Crops out patches pixel by pixel.
			
 
				-            idx = self.compute_pixel_indices(x, idx, size, padding)
			
 
				-            pat = torch.gather(x.view(-1), 0, idx.view(-1))
			
 
				+            idx_pix = self.compute_pixel_indices(x, idx, size, padding)
			
 
				+            pat = torch.gather(x.view(-1), 0, idx_pix.view(-1))
			
 
				             pat = pat.view(-1, x.size(1), size + 2 * padding, size + 2 * padding)
			
 
				             return pat
			
 
				     
			
@@ -249,8 +249,8 @@ class Refiner(nn.Module):
 
				             return x
			
 
				         else:
			
 
				             # Use scatter_element. Best compatibility for ONNX. Replacing pixel by pixel.
			
 
				-            idx = self.compute_pixel_indices(x, idx, size=4, padding=0)
			
 
				-            return x.view(-1).scatter_(0, idx.view(-1), y.view(-1)).view(x.shape)
			
 
				+            idx_pix = self.compute_pixel_indices(x, idx, size=4, padding=0)
			
 
				+            return x.view(-1).scatter_(0, idx_pix.view(-1), y.view(-1)).view(x.shape)
			
 
				 
			
 
				     def compute_pixel_indices(self,
			
 
				                               x: torch.Tensor,
			
@@ -278,5 +278,5 @@ class Refiner(nn.Module):
 
				         o = torch.arange(O)
			
 
				         idx_pat = (c * H * W).view(C, 1, 1).expand([C, O, O]) + (o * W).view(1, O, 1).expand([C, O, O]) + o.view(1, 1, O).expand([C, O, O])
			
 
				         idx_loc = b * W * H + y * W * S + x * S
			
 
				-        idx = idx_loc.view(-1, 1, 1, 1).expand([n, C, O, O]) + idx_pat.view(1, C, O, O).expand([n, C, O, O])
			
 
				-        return idx
			
 
				+        idx_pix = idx_loc.view(-1, 1, 1, 1).expand([n, C, O, O]) + idx_pat.view(1, C, O, O).expand([n, C, O, O])
			
 
				+        return idx_pix