国标文件+行标文件获取指南
国家标准全文公开系统
https://openstd.samr.gov.cn/bzgk/gb/std_list?r=0.747153859542881&p.p1=0&p.p2=%E7%BD%91%E7%BB%9C%E5%AE%89%E5%85%A8%E6%8A%80%E6%9C%AF&p.p6=35&p.p90=circulation_date&p.p91=desc
// ==UserScript== // @name 国标下载 // @namespace http://tampermonkey.net/ // @version 0.1 // @description try to take over the world! // @author wcd // @match http://c.gb688.cn/* // @icon https://www.google.com/s2/favicons?sz=64&domain=gb688.cn // @grant none // @require https://code.jquery.com/jquery-3.6.0.min.js // ==/UserScript== (function() { 'use strict'; $(function(){ $("head").append('<script src="https://cdnjs.cloudflare.com/ajax/libs/jspdf/2.5.1/jspdf.umd.min.js"></script>'); let my_script=`<script> function px2Num(px) { return Number(px.split("px")[0].toString()); } function getPages(){ if( $("canvas[id^=canvas_]").length > 0 ) { $("canvas[id^=canvas_]").delete(); } var baseurl = "http://c.gb688.cn/bzgk/gb/"; var pagecount = $("div.page").length; var pages = new Array(pagecount); var pagebg = new Array(pagecount); var title = $("title").text().split("|")[1].toString().trim(); var pheight = $("#0").css("height"); var pwidth = $("#0").css("width"); $(".page").each(function(i, elem) { if (elem.hasAttribute("bg")) { pagebg[i] = elem.getAttribute("bg"); } else { pagebg[i] = $(elem).children("span").first().css("background-image").split('"')[ 1].split(/\\//).slice(-1)[0]; } }); //拼合图片 $(".page").each(function(i, elem) { var canvasclone = $("canvas#canvas").clone(); canvasclone.attr("id","canvas_"+i).css("background-color","#FFFFFFFF"); $("#newimg").append(canvasclone); var canvas = document.getElementById('canvas_'+i); var ctx = canvas.getContext('2d'); ctx.fillStyle="white"; ctx.fillRect(0,0,px2Num(pwidth), px2Num(pheight)); $("#imgContainer").append("<img id=img_" + i + " src='" + baseurl+pagebg[i] + "' />") var image = document.getElementById('img_'+i); image.addEventListener('load', e => { $(elem).children("span").each(function(j,s){ ctx.drawImage(image, -px2Num($(s).css("background-position-x")), -px2Num($(s).css("background-position-y")), 119, 168, $(s).attr("class").split('-')[1]*119, $(s).attr("class").split('-')[2]*168, 119, 168); }); }); }); } function isimgComplete(imgs){ //$("img[id^=img_]") flag = true; for(i=0;i<imgs.length;i++){ flag=flag && imgs[i].complete; } return flag; } function downloadPDF(){ if( $("canvas[id^=canvas_]").length == 0 ) { alert("请先点击获取页面!"); return; } var images = $("img[id^=img_]"); //alert(isimgComplete(images)); if(!isimgComplete(images)){ alert("页面尚未提取完,稍后再试"); return; } var pheight = $("#0").css("height"); var pwidth = $("#0").css("width"); const { jsPDF } = window.jspdf; const pdf = new jsPDF('p','px',[px2Num(pwidth), px2Num(pheight)]); var title = $("title").text().split("|")[1].toString().trim(); let [imgX, imgY] = [595.28, 841.89]; let imgHeight = imgX / (px2Num(pwidth) / px2Num(pheight)); $("canvas[id^=canvas_]").each(function(i,e){ pdf.addImage(document.getElementById('canvas_'+i).toDataURL('image/jpeg'), 'jpeg', 0, 0, px2Num(pwidth), px2Num(pheight), '', 'MEDDIUM'); //pdf.addImage(document.getElementById('canvas_'+i).toDataURL('image/png'), 'jpeg', 0, 0, imgX, imgHeight, '', 'SLOW'); pdf.addPage(); }); let targetPage = pdf.internal.getNumberOfPages(); pdf.deletePage(targetPage); // 删除最后一页 pdf.save(title + ".pdf"); } function downloadPDF0(){ while($("canvas[id^=canvas_]").length < $(".page").length){ setTimeout(function(){ },1000); } } </script>`; let source_img = ` <div id="canvas_container"> <input type="button" value="获取页面" onclick="getPages()"/> <input type="button" value="下载pdf" onclick="downloadPDF()"/> </div> <div id="imgContainer" style="display:none;"><img id="source" src=""></div> <div id="newimg" width="1190px"></div> <canvas id="canvas" width="1190px" height="1680px" style="display:none;"></canvas>`; //let btn = `<input type="button" value="获取页面" onclick="getPages()"/> //<input type="button" value="下载pdf" onclick="downloadPDF()"/>`; let style = ` <style> #canvas_container { position: fixed; height: 30px; width: 150px; top: 50px; left: 10px; border: 1px; /*background-color: #00ff0099;*/ border-radius: 3px; } </style> `; $("head").append(style); $("body").append(source_img); $("body").append(my_script); //$("body").append(btn); //alert($("title").text()); }); })();
行业标准信息服务平台
https://hbba.sacinfo.org.cn/stdList
import os import time import requests from PIL import Image # 行业标准信息服务平台pdf下载 # https://hbba.sacinfo.org.cn/ # 利用图片一张一张加载 # 爬取页面从0开始,到最后一页 # https://hbba.sacinfo.org.cn/attachment/onlineRead/e99f8d17284a5e920923b11911b2f0b1df9ca7e1b6d177b9a7e71ba5390bf573 # name 是url的onlineRead最后面的一串数字 name = 'e99f8d17284a5e920923b11911b2f0b1df9ca7e1b6d177b9a7e71ba5390bf573' # 文件夹以name命名,避免重复 folder = r"./" + name + "/" # 转换的pdf名字为 name.pdf pdfFile = r"./" + name + "/" + name + ".pdf" os.mkdir(name) # 开始的页面 star_num = 0 def get_data(name, page): cookies = { 'Hm_lvt_bc6f61eace617162b31b982f796830e6': '1718068419', 'Hm_lpvt_bc6f61eace617162b31b982f796830e6': '1718069627', } headers = { 'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Pragma': 'no-cache', 'Referer': 'https://hbba.sacinfo.org.cn/attachment/onlineRead' '/e99f8d17284a5e920923b11911b2f0b1df9ca7e1b6d177b9a7e71ba5390bf573', 'Sec-Fetch-Dest': 'image', 'Sec-Fetch-Mode': 'no-cors', 'Sec-Fetch-Site': 'same-origin', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/125.0.0.0 Safari/537.36', 'sec-ch-ua': '"Google Chrome";v="125", "Chromium";v="125", "Not.A/Brand";v="24"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', } response = requests.get( 'https://hbba.sacinfo.org.cn/hbba_onlineRead_page/' + name + '/' + str(page) + '.png', cookies=cookies, headers=headers, ) if '404' in response.text: print("已经到最后一页\n下载结束,等待转换成pdf") return False save_png(response.content, page) def save_png(content, num): time.sleep(1) with open('./' + name + '/' + str(num) + '.jpg', 'wb') as file: file.write(content) file.close() print("第" + str(num) + "页保存成功") def combine_imgs_pdf(folder_path, pdf_file_path): """ 合成文件夹下的所有图片为pdf Args: folder_path (str): 源文件夹 pdf_file_path (str): 输出路径 """ files = os.listdir(folder_path) png_files = [] sources = [] # 筛选出图片格式 for file in files: if 'png' in file or 'jpg' in file: png_files.append(folder_path + file) # 排序 png_files.sort() output = Image.open(png_files[0]) png_files.pop(0) for file in png_files: png_file = Image.open(file) if png_file.mode == "RGB": png_file = png_file.convert("RGB") sources.append(png_file) output.save(pdf_file_path, "pdf", save_all=True, append_images=sources) while True: try: cz = get_data(name, star_num) star_num = star_num + 1 if cz is False: break except Exception as e: print(e) combine_imgs_pdf(folder, pdfFile) print("转换成功,在文件夹里面找名字为" + name + ".pdf的文件")
替换下面的值
密码行业标准化技术委员会
http://www.gmbz.org.cn/main/bzlb.html
原文始发于微信公众号(利刃信安):【标准文件】国标文件+行标文件获取指南
- 左青龙
- 微信扫一扫
-
- 右白虎
- 微信扫一扫
-
评论