Testing automated posting. Sorry for any bugs :D

  • DaGeek247@fedia.io
    link
    fedilink
    arrow-up
    18
    ·
    5 days ago

    I grabbed scanned copies of the books from the seven seas, which are higher quality than the ones available on gocomics. Since these are being pirated anyways, I figured using that would be a better option. I also ran them through a imagemagik script to try and automatically rearrange the comics so they’re more mobile friendly. I’ve attached an example. Feel free to poke me for the sources and the script if that’s something that interests you.

  • MentalEdge@sopuli.xyz
    link
    fedilink
    English
    arrow-up
    13
    ·
    edit-2
    5 days ago

    @m_f@lemm.ee noticed you’re the new mod. Just got a minimum viable bot to continue the daily posting done.

    • m_f@lemm.ee
      link
      fedilink
      English
      arrow-up
      11
      ·
      5 days ago

      Neat! You should definitely post then, it’s better if I’m not the single-point-of-failure for comics posting. I’ll post my still-somewhat-wip script for posting the Peanuts comic, which uses GoComic and probably has a lot of overlap with C&H.

      I run python post_stuff.py, which imports other scripts for each comic and runs them. get_lemmy.py handles caching auth credentials for up to several days (I’m sure it could go longer but I haven’t checked the docs yet). Caching the creds and running it all from a wrapper entry point keeps the number of logins down, which I was getting rate limited on. It doesn’t currently handle accounts with MFA enabled, but it should be pretty resilient to stuff like file upload failing.

      You can just ignore the image munging bits and probably some other bits, that was for turning the 1x4 Peanuts strip into a 2x2 post here. You might find interesting stuff by poking around on the site, Peanuts has a transcript for each comic in the HTML, though I don’t see that for C&H.

      I’ve also been meaning to publish the git repo somewhere, hopefully this will motivate me to get around to doing that.

      pyproject.toml dependencies section
      dependencies = [
          "backoff~=2.2",
          "beautifulsoup4~=4.12",
          "pythorhead~=0.26",
          "requests~=2.32",
          "urllib3>=2.2.3",
      ]
      
      get_lemmy.py
      import json
      import logging
      import os
      from base64 import b64decode
      from datetime import datetime
      
      from pythorhead import Lemmy
      
      
      def load_token(lemmy: Lemmy):
          token_path = "./tmp/lemmy-token.txt"
      
          try:
              with open(token_path) as f:
                  token = json.load(f)
      
                  print("Cached token found")
                  iat = json.loads(b64decode(token["token"].split(".")[1]))["iat"]
                  iat = datetime.fromtimestamp(iat)
                  now = datetime.now()
      
                  if (now - iat).seconds > 3 * 24 * 3600:
                      print("Token has expired, ignoring")
                      raise FileNotFoundError
      
                  print("Token hasn't expired, using")
                  lemmy._requestor._auth.token = token["token"]
                  lemmy._requestor.logged_in_username = token["logged_in_username"]
          except FileNotFoundError:
              print("Cached token not found, logging in")
              username = os.environ["LEMMY_USERNAME"]
              password = os.environ["LEMMY_PASSWORD"]
              if not lemmy.log_in(username, password):
                  raise Exception("Couldn't log in!")
      
              with open(token_path, "w") as f:
                  json.dump({"token": lemmy._requestor._auth.token, "logged_in_username": username}, f)
      
      
      def get_authed_lemmy() -> Lemmy:
          with open(".env") as f:
              os.environ.update(dict(line.strip().split("=") for line in f.readlines()))
      
          logging.getLogger("backoff").addHandler(logging.StreamHandler())
      
          lemmy = Lemmy(os.environ["LEMMY_DOMAIN"])
      
          load_token(lemmy)
      
          return lemmy
      
      post_peanuts.py
      import json
      import subprocess
      from dataclasses import dataclass
      from datetime import date, datetime
      from html import unescape
      from pathlib import Path
      from tempfile import TemporaryFile
      
      import backoff
      import requests
      from bs4 import BeautifulSoup
      from pythorhead import Lemmy
      from pythorhead.types import LanguageType, SortType
      
      from get_lemmy import get_authed_lemmy
      
      
      @dataclass
      class Strip:
          local_path: str
          transcript: str
          image_url: str | None = None
      
      
      def munge_image(input_image_file) -> bytes:
          crop_command = ["magick", "-", "-crop", "25%x100%", "+repage", "+adjoin", "miff:-"]
          montage_command = ["montage", "-", "-tile", "2x2", "-geometry", "+0+0", "miff:-"]
          final_command = [
              "magick",
              "-",
              "-resize",
              "200%",
              "-colors",
              "256",
              "-depth",
              "8",
              "-filter",
              "Box",
              "-sharpen",
              "0x2.0",
              "png:-",
          ]
      
          with subprocess.Popen(
              crop_command,
              stdin=input_image_file,
              stdout=subprocess.PIPE,
          ) as crop_result:
              with subprocess.Popen(
                  montage_command,
                  stdin=crop_result.stdout,
                  stdout=subprocess.PIPE,
              ) as montage_result:
                  final_bytes = subprocess.run(
                      final_command,
                      stdin=montage_result.stdout,
                      stdout=subprocess.PIPE,
                      check=True,
                  )
                  return final_bytes.stdout
      
      
      def ensure_downloaded(date: date) -> Strip | None:
          isodate = date.isoformat()
          formatted_date = date.strftime("%Y/%m/%d")
      
          local_path = Path("./tmp/") / f"peanuts-{isodate}.png"
          if local_path.exists():
              strip = json.loads(local_path.with_suffix(".json").read_text())
              transcript = strip["transcript"]
              print(f"Found existing file, using {local_path}")
              return Strip(local_path=str(local_path), transcript=transcript)
      
          base_url = "https://www.gocomics.com/peanuts/"
          url = f"{base_url}{formatted_date}"
      
          response = requests.get(url)
          response.raise_for_status()
          soup = BeautifulSoup(response.text, "html.parser")
      
          picture_element = soup.find("picture", class_="item-comic-image")
          if picture_element is None:
              if date.today().weekday() == 1:
                  print("No comic, expected since it's Tuesday")
                  return
              else:
                  raise Exception("It's not Tuesday and found no comic!")
          image_url = picture_element.find("img").get("src")
          print(f"Image URL found: {image_url}")
      
          with TemporaryFile("wb") as f:
              response = requests.get(image_url)
              response.raise_for_status()
              f.write(response.content)
              f.seek(0)
              munged_bytes = munge_image(f)
              with open(local_path, "wb") as rf:
                  rf.write(munged_bytes)
      
          print(f"Comic saved as: {local_path}")
      
          description = soup.find("meta", property="og:description")
          transcript = "\n>\n".join(
              f"> {line.strip()}"
              for line in unescape(description.get("content")).replace("<BR>", "\n").splitlines()
              if line
          )
      
          local_path.with_suffix(".json").write_text(json.dumps({"transcript": transcript}))
      
          return Strip(local_path=str(local_path), transcript=transcript)
      
      
      @backoff.on_exception(
          backoff.expo,
          (requests.exceptions.RequestException, ValueError),
          max_tries=16,
          base=6,
          max_time=120,
      )
      def ensure_uploaded(lemmy, path):
          print(f"Ensuring {path=} is uploaded...")
      
          marker_path = Path(path).with_suffix(".json")
      
          if marker_path.exists():
              print(f"Reading uploaded path from existing {marker_path=}")
              metadata = json.loads(marker_path.read_text())
              if "image_url" in metadata:
                  return metadata["image_url"]
          else:
              raise Exception(f"Does not exist! {marker_path=}")
      
          print(f"Didn't find {marker_path=}, uploading")
          uploaded = lemmy.image.upload(path)
          if uploaded is None:
              raise ValueError("Got `None` for `uploaded`")
          metadata["image_url"] = uploaded[0]["image_url"]
          marker_path.write_text(json.dumps(metadata))
          return metadata["image_url"]
      
      
      def ensure_posted(lemmy: Lemmy, community_id: int, date: date, strip: Strip):
          if date.weekday() == 6:
              print("It's Tuesday (i.e. Sunday in comic time), skipping post.")
              return
      
          posts = lemmy.post.list(community_id, sort=SortType.New)
          non_tribute_posts = []
          for post in posts:
              try:
                  comic_date = datetime.strptime(post["post"]["name"], "%d %B %Y").date()
              except ValueError:
                  # Tribute Tuesday post, probably. Ignore
                  continue
              non_tribute_posts.append((comic_date, post))
      
          newest_post_date, newest_post = sorted(non_tribute_posts, key=lambda x: x[0])[-1]
          if newest_post_date == date:
              print("All caught up!")
              return
      
          print("Need to post latest comic")
      
          body = f"Transcript:\n\n{strip.transcript}"
      
          post = lemmy.post(
              community_id,
              date.strftime("%-d %B %Y"),
              url=strip.image_url,
              body=body,
              language_id=LanguageType.EN,
          )
      
          if post:
              print(post)
          else:
              raise Exception("Failed to post!")
      
      
      def main(lemmy: Lemmy):
          year_offset = date.today().year - 2024
          today = date.today().replace(year=1950 + year_offset)
      
          strip = ensure_downloaded(today)
      
          if not strip:
              print(f"Got {strip=}, it's probably Tuesday, so ignoring...")
              return
          else:
              print(strip)
      
          community_id = lemmy.discover_community("peanuts@midwest.social")
          strip.image_url = ensure_uploaded(lemmy, strip.local_path)
          ensure_posted(lemmy, community_id, today, strip)
      
      
      if __name__ == "__main__":
          main(get_authed_lemmy())
      
      post_stuff.py
      import post_oglaf
      import post_peanuts
      import post_smbc
      from get_lemmy import get_authed_lemmy
      
      
      def main():
          lemmy = get_authed_lemmy()
      
          post_peanuts.main(lemmy=lemmy)
          post_smbc.main(lemmy=lemmy)
          post_oglaf.main(lemmy=lemmy)
      
      
      if __name__ == "__main__":
          main()
      

      cc @db0@lemmy.dbzer0.com on ☝️btw, pythorhead has been super useful! You might be interested in the use case of handling creds like I do there, maybe that’s something the library could do?

      • nocturne@sopuli.xyz
        link
        fedilink
        English
        arrow-up
        3
        ·
        edit-2
        3 days ago

        This all is over my head. I will continue to post bloomcounty manually for now. Plus I personally dislike when I see a bot posting.

        • MentalEdge@sopuli.xyz
          link
          fedilink
          English
          arrow-up
          2
          ·
          5 hours ago

          If you get tired of it, get in touch. I won’t mind adding Bloom County as one the communities @dailycomic@sh.itjust.works posts to.

          People don’t seem to mind with CnH. Often bots are introduced to do something without anyone asking, but with stuff like this it’s pretty clear that it’s content people like, and which they want to continue seeing. Just the first post from the bot account got a comment about it, and it was just someone saying “oh, this makes sense”.

      • MentalEdge@sopuli.xyz
        link
        fedilink
        English
        arrow-up
        9
        ·
        edit-2
        5 days ago

        Mine’s just 54 lines right now. Executed daily as a cron job. Wrote in some simple checks that enable it to account for leap days, or being behind. It can be made to catch up simply by having it run more than once a day.

        I should make it cache the login, too.

        It can be made to post any comic on gocomics, daily, in sync, simply by editing a config toml.

        comciposter.py
        import datetime
        import os
        import requests
        import pickle
        import toml
        from pythorhead import Lemmy
        from PIL import Image
        from io import BytesIO
        
        # Config
        config = toml.load(open(os.path.curdir+'/config.toml', 'r'))
        
        try:
            botstate = pickle.load(open(os.path.join(os.path.curdir, 'botstate.data'), 'rb'))
        except Exception:
            botstate = {}
            botstate['startdate'] = datetime.datetime.strptime(config['startdate']+' 12:00:00', '%d/%m/%Y %H:%M:%S')
            botstate['lastrun'] = datetime.datetime.now() - datetime.timedelta(days=1)
            botstate['lastpostdate'] = botstate['startdate'] - datetime.timedelta(days=1)
            with open(os.path.join(os.path.curdir, 'botstate.data'), 'wb') as handle:
                pickle.dump(botstate, handle)
        
        today = datetime.datetime.today()
        
        if datetime.datetime.now() - botstate['lastrun'] < datetime.timedelta(hours=10):
            print('less than a day has passed since the last post, exiting')
            exit(0)
        elif botstate['lastpostdate'].month <= today.month and botstate['lastpostdate'].day < today.day:
            postdate = botstate['lastpostdate'] + datetime.timedelta(days=1)
            title = postdate.strftime('%d %B %Y')
        
            url = 'https://www.gocomics.com/'+config['comic']+'/'+postdate.strftime('%Y/%m/%d')
            r = requests.get(url, allow_redirects=True)
            loc = int(r.text.find('https://assets.amuniversal.com/'))
            imgurl = r.text[loc:loc+63]
            img_response = requests.get(imgurl, allow_redirects=True)
            with Image.open(BytesIO(img_response.content)) as image:
                image.save(os.path.join(os.path.curdir, 'comic.webp'), 'WEBP')
        
            lemmy = Lemmy('https://'+config['Lemmy']['instance'], request_timeout=30)
            lemmy.log_in(config['Lemmy']['username'], config['Lemmy']['password'])
            try:
                image = lemmy.image.upload(os.path.join(os.path.curdir, 'comic.webp'))
            except IOError as e:
                print(e)
                exit(1)
            community_id = lemmy.discover_community(config['community'])
            post = lemmy.post.create(community_id, title, url=image[0]["image_url"])
            print('posted: '+post['post_view']['post']['ap_id'])
        
        botstate['lastpostdate'] = postdate
        botstate['lastrun'] = datetime.datetime.now()
        with open(os.path.join(os.path.curdir, 'botstate.data'), 'wb') as handle:
            pickle.dump(botstate, handle)