8

Im trying to develop a frontend code that asks the user to provide a pdf and then internally (in the users browser) produces an array of png's (via data to url) where each entry in the array corresponds to a page in the pdf:

dat[0] = png of page 1
dat[1] = png of page 2
...

When I test the below code the pages are somehow rendered on top of eachother and rotated.

<script src="http://cdnjs.cloudflare.com/ajax/libs/processing.js/1.4.1/processing-api.min.js"></script><html>
<!--
  Created using jsbin.com
  Source can be edited via http://jsbin.com/pdfjs-helloworld-v2/8598/edit
-->
<body>
  <canvas id="the-canvas" style="border:1px solid black"></canvas>
  <input id='pdf' type='file'/>

  <!-- Use latest PDF.js build from Github -->

  <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.4/jquery.min.js"></script>
  <script src="pdf.js"></script>
  <script src="pdf.worker.js"></script>
  <script type="text/javascript">
    //
    // Asynchronous download PDF as an ArrayBuffer
    //
    dat = [];
    

    var pdf = document.getElementById('pdf');
    pdf.onchange = function(ev) {
      if (file = document.getElementById('pdf').files[0]) {
        fileReader = new FileReader();
        fileReader.onload = function(ev) {
          //console.log(ev);
          PDFJS.getDocument(fileReader.result).then(function getPdfHelloWorld(pdf) {
            //
            // Fetch the first page
            //
            number_of_pages = pdf.numPages;

            for(i = 1; i < number_of_pages+1; ++i) {
              pdf.getPage(i).then(function getPageHelloWorld(page) {

              var scale = 1;
              var viewport = page.getViewport(scale);

              //
              // Prepare canvas using PDF page dimensions
              //
              var canvas = document.getElementById('the-canvas');
              var context = canvas.getContext('2d');
              canvas.height = viewport.height;
              canvas.width = viewport.width;

              //
              // Render PDF page into canvas context
              //
              var renderContext = {
                canvasContext: context,
                viewport: viewport};
              page.render(renderContext).then(function() {
                dat.push(canvas.toDataURL('image/png'));
              });
              });
            }
            //console.log(pdf.numPages);
            //console.log(pdf)

          }, function(error){
            console.log(error);
          });
        };
        fileReader.readAsArrayBuffer(file);
      }
    }

  </script>


<style id="jsbin-css">

</style>
<script>

</script>
</body>
</html>

Im only interested in the array dat. When I render the images in the array I see that dat[0] = png of page 1 (correct)
dat[1] = png of page 1 and png page 2 rotated 180 on top of each other
...

How do I ensure a correct rendering of single pages in each entry of the array?

2 Answers 2

19
+25

Try rendering the pages on a different canvas. You can create a canvas and append it to the container using

var canvasdiv = document.getElementById('canvas');      
var canvas = document.createElement('canvas');
canvasdiv.appendChild(canvas);

var url = 'https://raw.githubusercontent.com/mozilla/pdf.js/ba2edeae/web/compressed.tracemonkey-pldi-09.pdf';

var PDFJS = window['pdfjs-dist/build/pdf'];

PDFJS.GlobalWorkerOptions.workerSrc = '//mozilla.github.io/pdf.js/build/pdf.worker.js';

var loadingTask = PDFJS.getDocument(url);

loadingTask.promise.then(function(pdf) {

  var canvasdiv = document.getElementById('canvas');
  var totalPages = pdf.numPages
  var data = [];

  for (let pageNumber = 1; pageNumber <= totalPages; pageNumber++) {
    pdf.getPage(pageNumber).then(function(page) {

      var scale = 1.5;
      var viewport = page.getViewport({ scale: scale });

      var canvas = document.createElement('canvas');
      canvasdiv.appendChild(canvas);

      // Prepare canvas using PDF page dimensions
      var context = canvas.getContext('2d');
      canvas.height = viewport.height;
      canvas.width = viewport.width;

      // Render PDF page into canvas context
      var renderContext = { canvasContext: context, viewport: viewport };

      var renderTask = page.render(renderContext);
      renderTask.promise.then(function() {
        data.push(canvas.toDataURL('image/png'))
        console.log(data.length + ' page(s) loaded in data')
      });
    });
  }

}, function(reason) {
  // PDF loading error
  console.error(reason);
});
canvas {
  border: 1px solid black;
  margin: 5px;
  width: 25%;
}
<script src="//mozilla.github.io/pdf.js/build/pdf.js"></script>

<div id="canvas"></div>

Sign up to request clarification or add additional context in comments.

Comments

5

For those who came here from google for an Angular solution here is an implementation, rendering each page on a different canvas.

pdf-viewer.component.html

<div *ngFor="let page of pages>
 <canvas #canvas hidden ></canvas>
 <img [src]="page">
</div>

pdf-viewer.component.ts

import * as pdfjsLib from 'pdfjs-dist';
pdfjsLib.GlobalWorkerOptions.workerSrc = 'pdf.worker.js';

@Component({
    selector: 'app-pdf-viewer',
    templateUrl: './pdf-viewer.component.html',
    styleUrls: ['./pdf-viewer.component.scss'],
})
export class PdfViewerComponent implements OnInit {
    constructor() { }
    @ViewChildren('canvas') canvas: QueryList<ElementRef<HTMLCanvasElement>>;

    @Input() pdfBase64: string;

    pages: string[] = [];

    ngOnInit(): void {
        this.setPages();
    }

    async setPages(): Promise<void> {
        const pdfDoc = await pdfjsLib.getDocument({ url: this.pdfBase64 }).promise;
        const totalPages = pdfDoc.numPages;
        this.pages = new Array(totalPages);

        for (let i = 0; i < totalPages; i++) {
            pdfDoc.getPage(i + 1).then((page) => {
                const canvas = this.canvas.toArray()[page.pageIndex].nativeElement;

                this.renderPdfPageToCanvas(page, canvas).then(() => {
                    this.pages[page.pageIndex] = canvas.toDataURL('image/png');
                });
            });
        }
    }

    renderPdfPageToCanvas(page: pdfjsLib.PDFPageProxy, canvas: HTMLCanvasElement): pdfjsLib.PDFPromise<pdfjsLib.PDFPageProxy> {
        const viewport = page.getViewport({ scale: 1.0 });
        const height = viewport.height;
        const width = viewport.width;
        canvas.height = height;
        canvas.width = width;
        const renderContext = {
            canvasContext: canvas.getContext('2d'),
            viewport: viewport
        };
        return page.render(renderContext).promise;
    }
}

package.json

{
    ...
    "dependencies": {
        ...
        "@angular/core": "^9.1.11",
        "pdfjs-dist": "2.3.200"
    },
    "devDependencies": {
        ...
        "@types/pdfjs-dist": "2.1.3"
    }
}

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.