浏览代码

Python3 With Network Library ( html2text scrapy beautifulsoup4 ipip-ipdb )

hongwenjun 3 年之前
父节点
当前提交
42a9ff920d
共有 3 个文件被更改,包括 180 次插入0 次删除
  1. 56 0
      python3/Dockerfile
  2. 18 0
      python3/Dockerfile-Chrome-Selenium
  3. 106 0
      python3/README.md

+ 56 - 0
python3/Dockerfile

@@ -0,0 +1,56 @@
+FROM debian:stable-slim
+RUN  apt update -y \
+  && apt install --no-install-recommends --no-install-suggests -y wget python3 python3-pip \
+  && apt install --no-install-recommends --no-install-suggests -y tmux  \
+  && apt-get clean  \
+  && apt-get autoremove  \
+  && rm -rf /var/lib/apt/lists/*  \
+  && wget https://git.io/me.py  \
+  && wget https://raw.githubusercontent.com/hongwenjun/srgb/master/python/html2md.py  \
+  && python3 -m pip install --upgrade pip  \
+  && pip3 install setuptools  \
+  && pip3 install --upgrade  pip  requests  \
+  && pip3 install html2text  \
+  && pip3 install dnspython bson feedparser qbittorrent-api pymongo func_timeout honeybadger  \
+  && pip3 install scrapy  \
+  && pip3 install beautifulsoup4  html5lib  ipip-ipdb
+
+
+EXPOSE 8000/tcp
+
+VOLUME  /app
+
+CMD ["python3"]
+
+
+################################################################################
+
+#  docker build -t python3 .
+
+#  docker run --rm -it python3
+
+#  docker run --rm -it python3  python3  -i me.py
+
+#  docker run --rm -it python3  python3  html2md.py  https://262235.xyz
+
+#  docker build -t hongwenjun/python3 .
+
+#  docker push hongwenjun/python3
+
+################################################################################
+
+#   docker run -d -p 8000:8000 --restart=always   \
+#       -v /app:/app  --name python3 \
+#       hongwenjun/python3  \
+#       python3 -m http.server 8000
+#
+#   docker exec -it python3   bash
+#
+#   tmux -u
+#
+#   docker exec -it  python3   bash
+#
+#   tmux -u a
+
+################################################################################
+

+ 18 - 0
python3/Dockerfile-Chrome-Selenium

@@ -0,0 +1,18 @@
+FROM debian:stable-slim\
+RUN  apt update -y \
+  && apt install --no-install-recommends --no-install-suggests -y wget unzip python3 python3-pip \
+  && wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb \
+  && dpkg -i google-chrome-stable_current_amd64.deb ; apt-get -f -y --no-install-recommends --no-install-suggests install \
+  && dpkg -i google-chrome-stable_current_amd64.deb  \
+  && rm google-chrome-stable_current_amd64.deb  \
+  && wget https://chromedriver.storage.googleapis.com/92.0.4515.107/chromedriver_linux64.zip \
+  && unzip chromedriver_linux64.zip  \
+  && rm chromedriver_linux64.zip  \
+  && mv chromedriver /usr/bin/chromedriver  \
+  && apt-get clean  \
+  && apt-get autoremove  \
+  && rm -rf /var/lib/apt/lists/*  \
+  && pip3 install selenium
+  
+
+#  docker run -it debian:stable-slim bash

+ 106 - 0
python3/README.md

@@ -0,0 +1,106 @@
+![](https://262235.xyz/usr/uploads/2021/08/2281641762.png)
+
+#  Python3  With  Network Library ( html2text scrapy beautifulsoup4 ipip-ipdb )
+
+##  Usage ,  Run Container in Background
+```
+docker run -d  --restart=always   \
+    -v /app:/app  --name python3 \
+    hongwenjun/python3  \
+    python3 -m http.server 8000
+
+```
+
+##  Run a command in a running container
+```
+docker exec -it  python3   bash
+
+tmux -u
+
+tmux -u a
+```
+
+## os sys html2text library
+
+```
+python3  -i me.py
+
+python3  html2md.py  https://www.262235.xyz/index.php/archives/242/
+
+
+```
+## Scrapy Shell 
+
+```
+scrapy shell 'https://262235.xyz'
+
+fetch("https://www.262235.xyz/index.php/archives/242/")
+
+response.xpath('/html/body/section/div/div').get()
+
+import html2text
+
+html = response.xpath('/html/body/section/div/div').get()
+text = html2text.html2text(html)
+print(text)
+```
+
+## Dockerfile
+```
+FROM debian:stable-slim
+RUN  apt update -y \
+  && apt install --no-install-recommends --no-install-suggests -y wget python3 python3-pip \
+  && apt install --no-install-recommends --no-install-suggests -y tmux  \
+  && apt-get clean  \
+  && apt-get autoremove  \
+  && rm -rf /var/lib/apt/lists/*  \
+  && wget https://git.io/me.py  \
+  && wget https://raw.githubusercontent.com/hongwenjun/srgb/master/python/html2md.py  \
+  && python3 -m pip install --upgrade pip  \
+  && pip3 install setuptools  \
+  && pip3 install --upgrade  pip  requests  \
+  && pip3 install html2text  \
+  && pip3 install dnspython bson feedparser qbittorrent-api pymongo func_timeout honeybadger  \
+  && pip3 install scrapy  \
+  && pip3 install beautifulsoup4  html5lib  ipip-ipdb
+
+
+EXPOSE 8000/tcp
+
+VOLUME  /app
+
+CMD ["python3"]
+
+
+################################################################################
+
+#  docker build -t python3 .
+
+#  docker run --rm -it python3
+
+#  docker run --rm -it python3  python3  -i me.py
+
+#  docker run --rm -it python3  python3  html2md.py  https://262235.xyz
+
+#  docker build -t hongwenjun/python3 .
+
+#  docker push hongwenjun/python3
+
+################################################################################
+
+#   docker run -d -p 8000:8000 --restart=always   \
+#       -v /app:/app  --name python3 \
+#       hongwenjun/python3  \
+#       python3 -m http.server 8000
+#
+#   docker exec -it python3   bash
+#
+#   tmux -u
+#
+#   docker exec -it  python3   bash
+#
+#   tmux -u a
+
+################################################################################
+
+```