#Caching Not Working

1 messages · Page 1 of 1 (latest)

crisp drift
#

Hi, I am having a problem with caching dependencies after changing an arbitrary line in an unrelated file. When I run my pipeline in terminal, it runs and gives me the output. If I were to run it again, it would have everything (incuding all of the dependencies, which take up the majority of the time downloading) cached and takes significantly less time. However, if I go into a random file that is unrelated to the dependencies and change a line (add a blank line), and run the pipeline again, it redownloads all of the dependencies and doesn't use the cache. Is this intended? Is there a way to fix this? Thank you in advance for your help!

I read through the https://docs.dagger.io/635927/quickstart-caching/ documentation.

cunning ridge
#

👋 can you share your pipeline?

#

or an example about how you're doing it?

crisp drift
#

For sure! Here is my pipeline:

import sys
import anyio
import dagger

async def test():
async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client:
# get reference to the local project
src = client.host().directory(".")
# create a cache volume
node_cache = client.cache_volume("node")

    python = (
        # use a python
        client.container().from_("python:3.8-slim-buster")
        # mount cloned repository into image
        .with_directory("/src", src)
        # load in dependencies cache
        .with_mounted_cache("/src/.cache", node_cache)
        # set current working directory for next commands
        .with_workdir("/src")
        # install test dependencies
        .with_exec(["pip", "install", "poetry"])
        .with_exec(["poetry", "install", "--with", "test"])
        # run tests
        .with_exec(["poetry", "run", "pytest", "--junitxml=\"test-result.xml\"", "--cov-report", "xml",
                    "--cov-report", "term", "--cov=."])
    )

    # execute
    await python.exit_code()

print("Tests succeeded!")

if name == "main":
anyio.run(test)

light moth
#

The src declaration creates a content addressed (hashed) directory/file system, so any changes in that directory break the cache after that line. If you want to cache dependencies, grab just what you need to install those dependencies (requirements.txt at least), pull that into the install directory, install your dependencies, then pull the rest of your code in. We might have some examples of this. I can take a look in a few.

crisp drift
cunning ridge
#

Guilly the reason why dependencies get installed each time is because you don't seem to be caching the pip dependencies directory. If you change the with_mounted_cache to /root/.pip/cache that should help with the dependencies

#

you should also add ~/.cache/pypoetry for poetry deps. I'll add a documentation issue for this since it should be in our docs

crisp drift
cunning ridge
cunning ridge
#

@crisp drift small correction, pip cache should be /root/.cache/pip

#

I mixed pip and cache in the original answer

#

@light moth it should be necessary to split withDirectories is cache is mounted correctly. I generally prefer it this way since it doesn't require splitting the workflow

light moth
cunning ridge
#

cache still needs to be configured anyways as any change in requirements.txt will trigger a complete dependency reinstall

light moth
#

Which using a cache mount precludes.

light moth
#

Obviously not a problem with Go

cunning ridge
#

same as node_modules for node apps

crisp drift
#

@light moth @cunning ridge So would something like this be better? I initialized two caches at the beginning (pip and poetry). I moved the src declaration after the directory changes

async def test():
async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client:
# create a cache volume
pip_cache = client.cache_volume("pip")
poetry_cache = client.cache_volume("poetry")

    python = (
        # use a python
        client.container().from_("python:3.8-slim-buster")
        # mount cloned repository into image
        .with_directory("/src", client.host().directory("."), "/.cache")
        # load in dependencies cache
        .with_mounted_cache("/src/.cache/pip", pip_cache)
        .with_mounted_cache("~/.cache/pypoetry", poetry_cache)
        # set current working directory for next commands
        .with_workdir("/src")
        # install test dependencies
        .with_exec(["pip", "install", "poetry"])
        .with_exec(["poetry", "install", "--with", "test"])
        # run tests
        .with_exec(["poetry", "run", "pytest", "--junitxml=\"test-result.xml\"", "--cov-report", "xml",
                    "--cov-report", "term", "--cov=."])
    )

    # execute
    await python.exit_code()

print("Tests succeeded!")
cunning ridge
#

@crisp drift not quite. Let me help you out here.

#
"""Run tests for a single Python version."""

import sys

import anyio

import dagger


async def test():
    async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client:
        # get reference to the local project
        src = client.host().directory(".")
        pip_cache = client.cache_volume("pip")
        poetry_cache = client.cache_volume("poetry")


        python = (
            client.container().from_("python:3.10-slim-buster")
            .with_mounted_cache("/root/.cache/pip", pip_cache)
            .with_mounted_cache("/root/.cache/poetry", poetry_cache)
            # mount cloned repository into image
            .with_directory("/src", src)
            # set current working directory for next commands
            .with_workdir("/src")
            # install test dependencies
            .with_exec(["pip", "install", "poetry"])
            # run tests
            .with_exec(["poetry", "install"])
        )

        # execute
        await python.exit_code()

    print("Tests succeeded!")


if __name__ == "__main__":
    anyio.run(test)

that should be more like it

#

^ I'm missing some of the commands you're trying to run since I don't have your exact project layout.. but you'll get it from here. Let us know if you still have issues

crisp drift
#

@light moth Thank you for your help, as well!

light moth
#

My pleasure!

cunning ridge
fast root
#

Sorry to bring up an old thread but I can't get caching working either with the above code:

import sys
import anyio
import dagger


async def test():
    async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client:
        src = client.host().directory(".")
        pip_cache = client.cache_volume("pip")

        python = (
            client.container().from_("python:3.10-slim-buster")
            .with_mounted_cache("/root/.cache/pip", pip_cache)
            .with_directory("/src", src)
            .with_workdir("/src")
            .with_exec(["pip", "install", "-r", "requirements.txt"])
            .with_exec(["pytest", "tests"])
        )

        await python.sync()

    print("Tests succeeded!")

anyio.run(test)

Here anytime I change my test cases it triggers a full install of my dependencies. Is there a way to keep dependencies cached even if /src changes?

The src declaration creates a content addressed (hashed) directory/file system, so any changes in that directory break the cache after that line. If you want to cache dependencies, grab just what you need to install those dependencies (requirements.txt at least), pull that into the install directory, install your dependencies, then pull the rest of your code in. We might have some examples of this. I can take a look in a few.

Is this what I'm looking for?

cunning ridge
#

hey @fast root! I just tested your example and it seems to be working ok for me. I do see the output of pip install when changing my test cases, but if you check out the message, it'll state that all the dependencies are currently cached.
This is what I currently get:

[1.67s] exec pip install -r requirements.txt                                                 
┃ Collecting requests                                                                                                                                                                          
┃   Using cached requests-2.31.0-py3-none-any.whl (62 kB)                                      
┃ Collecting pytest                                                                            
┃   Using cached pytest-7.4.3-py3-none-any.whl (325 kB)                                                                                                                                        
┃ Collecting charset-normalizer<4,>=2                                                                                                                                                          
┃   Using cached charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (142 kB)                                                                                  
┃ Collecting idna<4,>=2.5                                                                      
┃   Using cached idna-3.4-py3-none-any.whl (61 kB)                                             
┃ Collecting certifi>=2017.4.17                                                                                                                                                                
┃   Using cached certifi-2023.7.22-py3-none-any.whl (158 kB)                                                                                                                                   
┃ Collecting urllib3<3,>=1.21.1                           
rare pendant
#

To avoid getting cache invalidation you can pull in requirements without all the rest of the source code first, yes. Here's an example:

import sys
import anyio
import dagger


async def test():
    async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client:
        req = client.host().directory(".", include=["requirements.txt"])
        src = client.host().directory(".")
        pip_cache = client.cache_volume("pip")

        python = (
            client.container().from_("python:3.10-slim-buster")
            .with_mounted_cache("/root/.cache/pip", pip_cache)
            .with_directory("/src", req)
            .with_workdir("/src")
            .with_exec(["pip", "install", "-r", "requirements.txt"])
            .with_directory("/src", src)
            .with_exec(["pytest", "tests"])
        )

        await python.sync()

    print("Tests succeeded!")

anyio.run(test)
fast root